yoloserv/modules/paravision/recognition/tensorrt/engine.py

import os
import importlib

import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit  # noqa

from .. import _utils as utils
from ..exceptions import (
    ModelLoadingException,
)
from . import utils as trt_utils
from .builder import load_engine

QUALITIES_QUALITIES_NAME = "qualities"
QUALITIES_ACCEPTABILTIES_NAME = "acceptabilities"
LANDMARKS_LANDMARKS_NAME = "landmarks"

ATTRIBUTES_AGES_NAME = "age_probs"
ATTRIBUTES_GENDERS_NAME = "gender_probs"

UNIT_LOWER_LIMIT = 0
UNIT_UPPER_LIMIT = 1

ERR_ENGINE_UNINITIALIZED = "The engine is not initialized."
ERR_MASK_MODEL_NOT_LOADED = "Mask model not loaded."

FD_NAME = "detection"
LM_NAME = "landmarks"
QL_NAME = "quality"
FR_NAME = "recognition"
AT_NAME = "attributes"
MD_NAME = "mask"

ENGINE_EXT = ".engine"


class Engine:
    def __init__(self, models_dir, settings):
        engine_dirpath = models_dir

        try:
            paravision_models = importlib.import_module("paravision_models")

            if paravision_models.location() == models_dir:
                engine_dirpath = paravision_models.TRT_ENGINE_PATH
        except (ModuleNotFoundError, AttributeError):
            pass

        self.stream = cuda.Stream()

        (
            fd_model_type,
            lm_model_type,
            ql_model_type,
            fr_model_type,
            at_model_type,
            md_model_type,
        ) = utils.get_model_types(settings)

        self.fd_input_shape = utils.read_fd_input_shape(models_dir, fd_model_type)

        fd_engine_path = os.path.join(
            engine_dirpath, FD_NAME, fd_model_type, FD_NAME + ENGINE_EXT
        )
        self.fd_engine = load_engine(
            FD_NAME,
            fd_engine_path,
            models_dir,
            fd_model_type,
            settings,
            self.fd_input_shape,
        )
        if self.fd_engine:
            self.fd_context = self.fd_engine.create_execution_context()
            (
                self.fd_inputs,
                self.fd_outputs,
                self.fd_data,
                self.fd_bindings,
            ) = trt_utils.allocate_buffers(self.fd_engine)

        self.lm_input_shape = utils.read_lm_input_shape(models_dir)
        lm_engine_path = os.path.join(
            engine_dirpath, LM_NAME, lm_model_type, LM_NAME + ENGINE_EXT
        )
        self.lm_engine = load_engine(
            LM_NAME,
            lm_engine_path,
            models_dir,
            lm_model_type,
            settings,
            self.lm_input_shape,
        )
        if self.lm_engine:
            self.lm_context = self.lm_engine.create_execution_context()
            (
                self.lm_inputs,
                self.lm_outputs,
                self.lm_data,
                self.lm_bindings,
            ) = trt_utils.allocate_buffers(self.lm_engine)

        self.ql_input_shape = utils.read_lm_input_shape(models_dir)
        ql_engine_path = os.path.join(
            engine_dirpath, QL_NAME, ql_model_type, QL_NAME + ENGINE_EXT
        )
        self.ql_engine = load_engine(
            QL_NAME,
            ql_engine_path,
            models_dir,
            ql_model_type,
            settings,
            self.ql_input_shape,
        )
        if self.ql_engine:
            self.ql_context = self.ql_engine.create_execution_context()
            (
                self.ql_inputs,
                self.ql_outputs,
                self.ql_data,
                self.ql_bindings,
            ) = trt_utils.allocate_buffers(self.ql_engine)

        self.fr_input_shape = utils.read_fr_input_shape(models_dir)
        fr_engine_path = os.path.join(
            engine_dirpath, FR_NAME, fr_model_type, FR_NAME + ENGINE_EXT
        )
        self.fr_engine = load_engine(
            FR_NAME,
            fr_engine_path,
            models_dir,
            fr_model_type,
            settings,
            self.fr_input_shape,
        )
        if self.fr_engine:
            self.fr_context = self.fr_engine.create_execution_context()
            (
                self.fr_inputs,
                self.fr_outputs,
                self.fr_data,
                self.fr_bindings,
            ) = trt_utils.allocate_buffers(self.fr_engine)
            self.fr_output_shape = utils.read_fr_output_shape(models_dir)

        self.at_input_shape = utils.read_at_input_shape(models_dir)
        at_engine_path = os.path.join(
            engine_dirpath, AT_NAME, at_model_type, AT_NAME + ENGINE_EXT
        )
        self.at_engine = load_engine(
            AT_NAME,
            at_engine_path,
            models_dir,
            at_model_type,
            settings,
            self.at_input_shape,
        )
        if self.at_engine:
            self.at_context = self.at_engine.create_execution_context()
            (
                self.at_inputs,
                self.at_outputs,
                self.at_data,
                self.at_bindings,
            ) = trt_utils.allocate_buffers(self.at_engine)

        # Mask input image is prepared separately as the shape can deviate from landmark input images.
        if "mask" in settings:
            md_model_path = settings["mask"]["models_dir"]
            md_engine_path = os.path.join(
                md_model_path, md_model_type, MD_NAME + ENGINE_EXT
            )

            self.md_input_shape = utils.read_md_input_shape(models_dir)
            self.md_engine = load_engine(
                MD_NAME,
                md_engine_path,
                md_model_path,
                md_model_type,
                settings,
                self.md_input_shape,
            )
            if self.md_engine:
                self.md_context = self.md_engine.create_execution_context()
                (
                    self.md_inputs,
                    self.md_outputs,
                    self.md_data,
                    self.md_bindings,
                ) = trt_utils.allocate_buffers(self.md_engine)
            self.mask_enabled = True
        else:
            self.mask_enabled = False

    def predict_bounding_boxes(self, np_imgs):
        """
        Args:
            np_imgs: (list) list of  images loaded in numpy, of format (1, H, W, C)

        Returns:
            bboxes: (list) list containing arrays of bboxes for each image
                    in order [x1, y1, x2, y2], scaled between 0, 1
            confs: (list) list containing arrays of confidences scores
                    of the faces for each image
        """
        if not self.fd_engine:
            raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)

        max_batch_size = self.fd_engine.max_batch_size
        bboxes, confidences, img_idxs = [], [], []

        for i in range(0, len(np_imgs), max_batch_size):
            batch = np_imgs[i : min(len(np_imgs), i + max_batch_size)]
            (
                bboxes_batch,
                confidences_batch,
                img_idxs_batch,
            ) = self._batch_predict_bounding_boxes(batch)

            bboxes.extend(bboxes_batch)
            confidences.extend(confidences_batch)
            img_idxs.extend(img_idxs_batch + i)

        bboxes = np.asarray(bboxes).reshape(-1, 4)
        confidences = np.asarray(confidences).reshape(-1)

        return bboxes, confidences, img_idxs

    def _batch_predict_bounding_boxes(self, np_imgs):
        np_imgs = np.transpose(np.asarray(np_imgs), [0, 3, 1, 2]).astype(np.float32)
        batch_size = len(np_imgs)

        results = trt_utils.do_inference(
            self.fd_context,
            bindings=self.fd_bindings,
            inputs=self.fd_inputs,
            input_data=np_imgs,
            outputs=self.fd_outputs,
            output_data=self.fd_data,
            stream=self.stream,
            batch_size=batch_size,
        )

        num_detections = int(results[0])
        bboxes = results[1].reshape(-1, 4)[:num_detections]
        scores = results[2][:num_detections].tolist()
        indexes = results[3][:num_detections].astype(np.int32)
        return bboxes, scores, indexes

    def predict_landmarks(self, np_imgs):
        """
        Args:
            np_imgs: (list) imgs loaded in numpy of format (1, H, W, C)
        Returns:
            qualities: (numpy array) qualities values between 0 and 1
            lmks: (numpy array) landmarks in the shape of (N, 5, 2)
            acceptabilities: (numpy array) acceptabilities values between 0 and 1
        """
        if not self.lm_engine:
            raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)

        max_batch_size = self.lm_engine.max_batch_size
        lmks = []

        for i in range(0, len(np_imgs), max_batch_size):
            batch = np_imgs[i : min(len(np_imgs), i + max_batch_size)]
            lmks_batch = self._batch_predict_landmarks(batch)

            lmks.extend(lmks_batch)

        return np.asarray(lmks)

    def _batch_predict_landmarks(self, np_imgs):
        np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
        batch_size = len(np_imgs)
        results = trt_utils.do_inference(
            self.lm_context,
            bindings=self.lm_bindings,
            inputs=self.lm_inputs,
            input_data=np_imgs,
            outputs=self.lm_outputs,
            output_data=self.lm_data,
            stream=self.stream,
            batch_size=batch_size,
        )

        # because we pre-allocating the buffer to accomodate the max batch size,
        # the last elements of the results will be 0 unless we're finding
        # landmarks for max_batch_size faces, so we need to explicitly grab
        # the elements we want
        landmarks = results[self.lm_engine[LANDMARKS_LANDMARKS_NAME] - 1].reshape(
            -1, 10
        )[:batch_size]

        return landmarks

    def predict_embeddings(self, np_imgs):
        """
        Args:
            np_imgs: (list) list of images loaded in numpy of format (1, H, W, C)

        Returns:
            embs: (numpy array) array of embedding arrays
        """
        if not self.fr_engine:
            raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)

        max_batch_size = self.fr_engine.max_batch_size
        batch_size = len(np_imgs)

        embeddings = []

        for i in range(0, batch_size, max_batch_size):
            batch = np_imgs[i : min(batch_size, i + max_batch_size)]
            embs = self._batch_predict_embeddings(batch)

            embeddings.extend(embs)

        return np.asarray(embeddings).reshape(batch_size, -1)

    def _batch_predict_embeddings(self, np_imgs):
        np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
        batch_size = len(np_imgs)
        results = trt_utils.do_inference(
            self.fr_context,
            bindings=self.fr_bindings,
            inputs=self.fr_inputs,
            input_data=np_imgs,
            outputs=self.fr_outputs,
            output_data=self.fr_data,
            stream=self.stream,
            batch_size=batch_size,
        )

        return results[0]

    def predict_attributes(self, np_imgs):
        if not self.at_engine:
            raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)

        max_batch_size = self.at_engine.max_batch_size
        batch_size = len(np_imgs)
        all_ages, all_genders = [], []

        for i in range(0, batch_size, max_batch_size):
            batch = np_imgs[i : min(batch_size, i + max_batch_size)]
            ages, genders = self._batch_predict_attributes(batch)
            all_ages.extend(ages)
            all_genders.extend(genders)

        return all_ages, all_genders

    def _batch_predict_attributes(self, np_imgs):
        """
        Args:
            np_img: (numpy array) img loaded in numpy of format (1, H, W, C)

        Returns:
            age_probs: (numpy array) age probabilities in the shape of (N, 1, 7)
            gender_probs: (numpy array) gender probabilities in the shape of (N, 1, 2)
        """
        np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
        batch_size = len(np_imgs)
        results = trt_utils.do_inference(
            self.at_context,
            bindings=self.at_bindings,
            inputs=self.at_inputs,
            input_data=np_imgs,
            outputs=self.at_outputs,
            output_data=self.at_data,
            batch_size=batch_size,
            stream=self.stream,
        )

        ages = results[self.at_engine[ATTRIBUTES_AGES_NAME] - 1].reshape(-1, 7)[
            :batch_size
        ]
        genders = results[self.at_engine[ATTRIBUTES_GENDERS_NAME] - 1].reshape(-1, 2)[
            :batch_size
        ]

        return [ages, genders]

    def get_qualities(self, np_imgs):
        """
        Args:
            np_imgs: (list) imgs loaded in numpy of format (1, H, W, C)
        Returns:
            qualities: (numpy array) qualities values between 0 and 1
        """
        if not self.ql_engine:
            raise ModelLoadingException(ERR_ENGINE_UNINITIALIZED)

        max_batch_size = self.ql_engine.max_batch_size
        qualities, acceptabilities = [], []

        for i in range(0, len(np_imgs), max_batch_size):
            batch = np_imgs[i : min(len(np_imgs), i + max_batch_size)]
            qualities_batch, acceptabilities_batch = self._batch_get_qualities(batch)
            qualities.extend(qualities_batch)
            acceptabilities.extend(acceptabilities_batch)

        return (
            np.clip(qualities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
            np.clip(acceptabilities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
        )

    def _batch_get_qualities(self, np_imgs):
        np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
        batch_size = len(np_imgs)
        results = trt_utils.do_inference(
            self.ql_context,
            bindings=self.ql_bindings,
            inputs=self.ql_inputs,
            input_data=np_imgs,
            outputs=self.ql_outputs,
            output_data=self.ql_data,
            stream=self.stream,
            batch_size=batch_size,
        )

        qualities = results[self.ql_engine[QUALITIES_QUALITIES_NAME] - 1][:batch_size]
        acceptabilities = results[self.ql_engine[QUALITIES_ACCEPTABILTIES_NAME] - 1][
            :batch_size
        ]

        return qualities, acceptabilities

    def check_for_masks(self, np_imgs):
        if not self.md_engine:
            raise ModelLoadingException(ERR_MASK_MODEL_NOT_LOADED)

        max_batch_size = self.md_engine.max_batch_size
        batch_size = len(np_imgs)
        mask_probabilities = []

        for i in range(0, batch_size, max_batch_size):
            batch = np_imgs[i : min(batch_size, i + max_batch_size)]
            mask_probabilities.extend(self._batch_check_for_masks(batch))

        return np.asarray(mask_probabilities)

    def _batch_check_for_masks(self, np_imgs):
        """
        Args:
            np_imgs: (list) imgs loaded in numpy of format (1, H, W, C)
        Returns:
            mask_probs: (numpy array) mask probabilities in the shape of (N, 1, 1)
        """
        np_imgs = np.transpose(np_imgs, [0, 3, 1, 2]).astype(np.float32)
        results = trt_utils.do_inference(
            self.md_context,
            bindings=self.md_bindings,
            inputs=self.md_inputs,
            input_data=np_imgs,
            outputs=self.md_outputs,
            output_data=self.md_data,
            stream=self.stream,
            batch_size=len(np_imgs),
        )
        return results[0]