yoloserv/modules/paravision/recognition/tensorrt/engine.py

463 lines
15 KiB
Python

import os
import importlib
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # noqa
from .. import _utils as utils
from ..exceptions import (
ModelLoadingException,
)
from . import utils as trt_utils
from .builder import load_engine
QUALITIES_QUALITIES_NAME = "qualities"
QUALITIES_ACCEPTABILTIES_NAME = "acceptabilities"
LANDMARKS_LANDMARKS_NAME = "landmarks"
ATTRIBUTES_AGES_NAME = "age_probs"
ATTRIBUTES_GENDERS_NAME = "gender_probs"
UNIT_LOWER_LIMIT = 0
UNIT_UPPER_LIMIT = 1
ERR_ENGINE_UNINITIALIZED = "The engine is not initialized."
ERR_MASK_MODEL_NOT_LOADED = "Mask model not loaded."
FD_NAME = "detection"
LM_NAME = "landmarks"
QL_NAME = "quality"
FR_NAME = "recognition"
AT_NAME = "attributes"
MD_NAME = "mask"
ENGINE_EXT = ".engine"
class Engine:
def __init__(self, models_dir, settings):
engine_dirpath = models_dir
try:
paravision_models = importlib.import_module("paravision_models")
if paravision_models.location() == models_dir:
engine_dirpath = paravision_models.TRT_ENGINE_PATH
except (ModuleNotFoundError, AttributeError):
pass
self.stream = cuda.Stream()
(
fd_model_type,
lm_model_type,
ql_model_type,
fr_model_type,
at_model_type,
md_model_type,
) = utils.get_model_types(settings)
self.fd_input_shape = utils.read_fd_input_shape(models_dir, fd_model_type)
fd_engine_path = os.path.join(
engine_dirpath, FD_NAME, fd_model_type, FD_NAME + ENGINE_EXT
)
self.fd_engine = load_engine(
FD_NAME,
fd_engine_path,
models_dir,
fd_model_type,
settings,
self.fd_input_shape,
)
if self.fd_engine:
self.fd_context = self.fd_engine.create_execution_context()
(
self.fd_inputs,
self.fd_outputs,
self.fd_data,
self.fd_bindings,
) = trt_utils.allocate_buffers(self.fd_engine)
self.lm_input_shape = utils.read_lm_input_shape(models_dir)
lm_engine_path = os.path.join(
engine_dirpath, LM_NAME, lm_model_type, LM_NAME + ENGINE_EXT
)
self.lm_engine = load_engine(
LM_NAME,
lm_engine_path,
models_dir,
lm_model_type,
settings,
self.lm_input_shape,
)
if self.lm_engine:
self.lm_context = self.lm_engine.create_execution_context()
(
self.lm_inputs,
self.lm_outputs,
self.lm_data,
self.lm_bindings,
) = trt_utils.allocate_buffers(self.lm_engine)
self.ql_input_shape = utils.read_lm_input_shape(models_dir)
ql_engine_path = os.path.join(
engine_dirpath, QL_NAME, ql_model_type, QL_NAME + ENGINE_EXT
)
self.ql_engine = load_engine(
QL_NAME,
ql_engine_path,
models_dir,
ql_model_type,
settings,
self.ql_input_shape,
)
if self.ql_engine:
self.ql_context = self.ql_engine.create_execution_context()
(
self.ql_inputs,
self.ql_outputs,
self.ql_data,
self.ql_bindings,
) = trt_utils.allocate_buffers(self.ql_engine)
self.fr_input_shape = utils.read_fr_input_shape(models_dir)
fr_engine_path = os.path.join(
engine_dirpath, FR_NAME, fr_model_type, FR_NAME + ENGINE_EXT
)
self.fr_engine = load_engine(
FR_NAME,
fr_engine_path,
models_dir,
fr_model_type,
settings,
self.fr_input_shape,
)
if self.fr_engine:
self.fr_context = self.fr_engine.create_execution_context()
(
self.fr_inputs,
self.fr_outputs,
self.fr_data,
self.fr_bindings,
) = trt_utils.allocate_buffers(self.fr_engine)
self.fr_output_shape = utils.read_fr_output_shape(models_dir)
self.at_input_shape = utils.read_at_input_shape(models_dir)
at_engine_path = os.path.join(
engine_dirpath, AT_NAME, at_model_type, AT_NAME + ENGINE_EXT
)
self.at_engine = load_engine(
AT_NAME,
at_engine_path,
models_dir,
at_model_type,
settings,
self.at_input_shape,
)
if self.at_engine:
self.at_context = self.at_engine.create_execution_context()
(
self.at_inputs,
self.at_outputs,
self.at_data,
self.at_bindings,
) = trt_utils.allocate_buffers(self.at_engine)
# Mask input image is prepared separately as the shape can deviate from landmark input images.
if "mask" in settings:
md_model_path = settings["mask"]["models_dir"]
md_engine_path = os.path.join(
md_model_path, md_model_type, MD_NAME + ENGINE_EXT
)
self.md_input_shape = utils.read_md_input_shape(models_dir)
self.md_engine = load_engine(
MD_NAME,
md_engine_path,
md_model_path,
md_model_type,
settings,
self.md_input_shape,
)
if self.md_engine:
self.md_context = self.md_engine.create_execution_context()
(
self.md_inputs,
self.md_outputs,
self.md_data,
self.md_bindings,
) = trt_utils.allocate_buffers(self.md_engine)
self.mask_enabled = True
else:
self.mask_enabled = False
def predict_bounding_boxes(self, np_imgs):
"""
Args:
np_imgs: (list) list of images loaded in numpy, of format (1, H, W, C)
Returns:
bboxes: (list) list containing arrays of bboxes for each image
in order [x1, y1, x2, y2], scaled between 0, 1
confs: (list) list containing arrays of confidences scores
of the faces for each image
"""
if not self.fd_engine:
raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)
max_batch_size = self.fd_engine.max_batch_size
bboxes, confidences, img_idxs = [], [], []
for i in range(0, len(np_imgs), max_batch_size):
batch = np_imgs[i : min(len(np_imgs), i + max_batch_size)]
(
bboxes_batch,
confidences_batch,
img_idxs_batch,
) = self._batch_predict_bounding_boxes(batch)
bboxes.extend(bboxes_batch)
confidences.extend(confidences_batch)
img_idxs.extend(img_idxs_batch + i)
bboxes = np.asarray(bboxes).reshape(-1, 4)
confidences = np.asarray(confidences).reshape(-1)
return bboxes, confidences, img_idxs
def _batch_predict_bounding_boxes(self, np_imgs):
np_imgs = np.transpose(np.asarray(np_imgs), [0, 3, 1, 2]).astype(np.float32)
batch_size = len(np_imgs)
results = trt_utils.do_inference(
self.fd_context,
bindings=self.fd_bindings,
inputs=self.fd_inputs,
input_data=np_imgs,
outputs=self.fd_outputs,
output_data=self.fd_data,
stream=self.stream,
batch_size=batch_size,
)
num_detections = int(results[0])
bboxes = results[1].reshape(-1, 4)[:num_detections]
scores = results[2][:num_detections].tolist()
indexes = results[3][:num_detections].astype(np.int32)
return bboxes, scores, indexes
def predict_landmarks(self, np_imgs):
"""
Args:
np_imgs: (list) imgs loaded in numpy of format (1, H, W, C)
Returns:
qualities: (numpy array) qualities values between 0 and 1
lmks: (numpy array) landmarks in the shape of (N, 5, 2)
acceptabilities: (numpy array) acceptabilities values between 0 and 1
"""
if not self.lm_engine:
raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)
max_batch_size = self.lm_engine.max_batch_size
lmks = []
for i in range(0, len(np_imgs), max_batch_size):
batch = np_imgs[i : min(len(np_imgs), i + max_batch_size)]
lmks_batch = self._batch_predict_landmarks(batch)
lmks.extend(lmks_batch)
return np.asarray(lmks)
def _batch_predict_landmarks(self, np_imgs):
np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
batch_size = len(np_imgs)
results = trt_utils.do_inference(
self.lm_context,
bindings=self.lm_bindings,
inputs=self.lm_inputs,
input_data=np_imgs,
outputs=self.lm_outputs,
output_data=self.lm_data,
stream=self.stream,
batch_size=batch_size,
)
# because we pre-allocating the buffer to accomodate the max batch size,
# the last elements of the results will be 0 unless we're finding
# landmarks for max_batch_size faces, so we need to explicitly grab
# the elements we want
landmarks = results[self.lm_engine[LANDMARKS_LANDMARKS_NAME] - 1].reshape(
-1, 10
)[:batch_size]
return landmarks
def predict_embeddings(self, np_imgs):
"""
Args:
np_imgs: (list) list of images loaded in numpy of format (1, H, W, C)
Returns:
embs: (numpy array) array of embedding arrays
"""
if not self.fr_engine:
raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)
max_batch_size = self.fr_engine.max_batch_size
batch_size = len(np_imgs)
embeddings = []
for i in range(0, batch_size, max_batch_size):
batch = np_imgs[i : min(batch_size, i + max_batch_size)]
embs = self._batch_predict_embeddings(batch)
embeddings.extend(embs)
return np.asarray(embeddings).reshape(batch_size, -1)
def _batch_predict_embeddings(self, np_imgs):
np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
batch_size = len(np_imgs)
results = trt_utils.do_inference(
self.fr_context,
bindings=self.fr_bindings,
inputs=self.fr_inputs,
input_data=np_imgs,
outputs=self.fr_outputs,
output_data=self.fr_data,
stream=self.stream,
batch_size=batch_size,
)
return results[0]
def predict_attributes(self, np_imgs):
if not self.at_engine:
raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)
max_batch_size = self.at_engine.max_batch_size
batch_size = len(np_imgs)
all_ages, all_genders = [], []
for i in range(0, batch_size, max_batch_size):
batch = np_imgs[i : min(batch_size, i + max_batch_size)]
ages, genders = self._batch_predict_attributes(batch)
all_ages.extend(ages)
all_genders.extend(genders)
return all_ages, all_genders
def _batch_predict_attributes(self, np_imgs):
"""
Args:
np_img: (numpy array) img loaded in numpy of format (1, H, W, C)
Returns:
age_probs: (numpy array) age probabilities in the shape of (N, 1, 7)
gender_probs: (numpy array) gender probabilities in the shape of (N, 1, 2)
"""
np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
batch_size = len(np_imgs)
results = trt_utils.do_inference(
self.at_context,
bindings=self.at_bindings,
inputs=self.at_inputs,
input_data=np_imgs,
outputs=self.at_outputs,
output_data=self.at_data,
batch_size=batch_size,
stream=self.stream,
)
ages = results[self.at_engine[ATTRIBUTES_AGES_NAME] - 1].reshape(-1, 7)[
:batch_size
]
genders = results[self.at_engine[ATTRIBUTES_GENDERS_NAME] - 1].reshape(-1, 2)[
:batch_size
]
return [ages, genders]
def get_qualities(self, np_imgs):
"""
Args:
np_imgs: (list) imgs loaded in numpy of format (1, H, W, C)
Returns:
qualities: (numpy array) qualities values between 0 and 1
"""
if not self.ql_engine:
raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)
max_batch_size = self.ql_engine.max_batch_size
qualities, acceptabilities = [], []
for i in range(0, len(np_imgs), max_batch_size):
batch = np_imgs[i : min(len(np_imgs), i + max_batch_size)]
qualities_batch, acceptabilities_batch = self._batch_get_qualities(batch)
qualities.extend(qualities_batch)
acceptabilities.extend(acceptabilities_batch)
return (
np.clip(qualities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
np.clip(acceptabilities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
)
def _batch_get_qualities(self, np_imgs):
np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
batch_size = len(np_imgs)
results = trt_utils.do_inference(
self.ql_context,
bindings=self.ql_bindings,
inputs=self.ql_inputs,
input_data=np_imgs,
outputs=self.ql_outputs,
output_data=self.ql_data,
stream=self.stream,
batch_size=batch_size,
)
qualities = results[self.ql_engine[QUALITIES_QUALITIES_NAME] - 1][:batch_size]
acceptabilities = results[self.ql_engine[QUALITIES_ACCEPTABILTIES_NAME] - 1][
:batch_size
]
return qualities, acceptabilities
def check_for_masks(self, np_imgs):
if not self.md_engine:
raise ModelLoadingException(ERR_MASK_MODEL_NOT_LOADED)
max_batch_size = self.md_engine.max_batch_size
batch_size = len(np_imgs)
mask_probabilities = []
for i in range(0, batch_size, max_batch_size):
batch = np_imgs[i : min(batch_size, i + max_batch_size)]
mask_probabilities.extend(self._batch_check_for_masks(batch))
return np.asarray(mask_probabilities)
def _batch_check_for_masks(self, np_imgs):
"""
Args:
np_imgs: (list) imgs loaded in numpy of format (1, H, W, C)
Returns:
mask_probs: (numpy array) mask probabilities in the shape of (N, 1, 1)
"""
np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
results = trt_utils.do_inference(
self.md_context,
bindings=self.md_bindings,
inputs=self.md_inputs,
input_data=np_imgs,
outputs=self.md_outputs,
output_data=self.md_data,
stream=self.stream,
batch_size=len(np_imgs),
)
return results[0]