yoloserv/modules/paravision/recognition/openvino/engine.py

import multiprocessing
import numpy as np
import os

from openvino.inference_engine import IECore

from .. import _utils as utils

UNIT_LOWER_LIMIT = 0
UNIT_UPPER_LIMIT = 1

FD_NAME = "detection"
LM_NAME = "landmarks"
QL_NAME = "quality"
FR_NAME = "recognition"
AT_NAME = "attributes"
MD_NAME = "mask"

BIN_EXT = ".bin"
XML_EXT = ".xml"


class Engine:
    def __init__(self, models_dir, settings):
        ie_core = IECore()
        num_threads = multiprocessing.cpu_count()
        try:
            num_threads = min(
                num_threads, max(int(os.getenv("PV_OPENVINO_THREADS_LIMIT")), 1)
            )
        except (TypeError, ValueError):
            pass
        ie_core.set_config({"CPU_THREADS_NUM": str(num_threads)}, "CPU")

        (
            fd_model_type,
            lm_model_type,
            ql_model_type,
            fr_model_type,
            at_model_type,
            md_model_type,
        ) = utils.get_model_types(settings)

        fd_net = ie_core.read_network(
            model=os.path.join(models_dir, FD_NAME, fd_model_type, FD_NAME + XML_EXT),
            weights=os.path.join(models_dir, FD_NAME, fd_model_type, FD_NAME + BIN_EXT),
        )

        self.fd_input_name = next(iter(fd_net.input_info))
        self.fd_input_shape = utils.read_fd_input_shape(models_dir, fd_model_type)
        self.fd_bboxes_name = "bboxes"
        self.fd_scores_name = "scores"
        self.fd_select_idxs_name = "selected_indices"
        self.fd_net = ie_core.load_network(network=fd_net, device_name="CPU")

        lm_net = ie_core.read_network(
            model=os.path.join(models_dir, LM_NAME, lm_model_type, LM_NAME + XML_EXT),
            weights=os.path.join(models_dir, LM_NAME, lm_model_type, LM_NAME + BIN_EXT),
        )

        self.lm_input_name = next(iter(lm_net.input_info))
        self.lm_input_shape = utils.read_lm_input_shape(models_dir)
        self.lm_landmarks_name = "landmarks"
        self.lm_net = ie_core.load_network(network=lm_net, device_name="CPU")

        ql_net = ie_core.read_network(
            model=os.path.join(models_dir, QL_NAME, ql_model_type, QL_NAME + XML_EXT),
            weights=os.path.join(models_dir, QL_NAME, ql_model_type, QL_NAME + BIN_EXT),
        )

        self.ql_input_name = next(iter(ql_net.input_info))
        self.ql_input_shape = utils.read_lm_input_shape(models_dir)
        self.ql_qualities_name = "qualities"
        self.ql_acceptabilities_name = "acceptabilities"
        self.ql_net = ie_core.load_network(network=ql_net, device_name="CPU")

        fr_net = ie_core.read_network(
            model=os.path.join(models_dir, FR_NAME, fr_model_type, FR_NAME + XML_EXT),
            weights=os.path.join(models_dir, FR_NAME, fr_model_type, FR_NAME + BIN_EXT),
        )
        self.fr_input_name = next(iter(fr_net.input_info))
        self.fr_input_shape = utils.read_fr_input_shape(models_dir)
        self.fr_output_name = next(iter(fr_net.outputs))
        self.fr_output_shape = utils.read_fr_output_shape(models_dir)
        self.fr_net = ie_core.load_network(network=fr_net, device_name="CPU")

        at_net = ie_core.read_network(
            model=os.path.join(models_dir, AT_NAME, at_model_type, AT_NAME + XML_EXT),
            weights=os.path.join(models_dir, AT_NAME, at_model_type, AT_NAME + BIN_EXT),
        )
        self.at_input_name = next(iter(at_net.input_info))
        self.at_input_shape = utils.read_at_input_shape(models_dir)
        self.at_net = ie_core.load_network(network=at_net, device_name="CPU")

        if "mask" in settings:
            md_model_path = settings["mask"]["models_dir"]
            md_net = ie_core.read_network(
                model=os.path.join(md_model_path, md_model_type, MD_NAME + XML_EXT),
                weights=os.path.join(md_model_path, md_model_type, MD_NAME + BIN_EXT),
            )
            self.md_input_name = next(iter(md_net.input_info))
            self.md_input_shape = md_net.input_info[
                self.md_input_name
            ].input_data.shape[2:]
            self.md_net = ie_core.load_network(network=md_net, device_name="CPU")
            self.mask_enabled = True
        else:
            self.mask_enabled = False

    def predict_bounding_boxes(self, np_imgs):
        """
        Args:
            np_imgs: (list) list of  images loaded in numpy, of format (1, H, W, C)

        Returns:
            bboxes: (list) list containing arrays of bboxes for each image
                    in order [x1, y1, x2, y2], scaled between 0, 1
            confs: (list) list containing arrays of confidences scores
                    of the faces for each image
        """
        all_bboxes, all_scores, all_face_counts = [], [], []
        np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))

        for np_img in np_imgs:
            ie_out = self.fd_net.infer(inputs={self.fd_input_name: np_img})

            bboxes = ie_out[self.fd_bboxes_name]
            scores = ie_out[self.fd_scores_name]
            select_idxs = ie_out[self.fd_select_idxs_name]

            # keep select_idxs until we see -1
            i = 0
            for idx in select_idxs[:, 0]:
                if idx == -1:
                    break
                i += 1

            select_idxs = select_idxs[:i]

            # filter bboxes and scores based on select_idxs
            for batch_idx, class_idx, idx in select_idxs:
                all_bboxes.append(bboxes[batch_idx][idx])
                all_scores.append(scores[batch_idx][class_idx][idx].item())

            all_face_counts.append(len(select_idxs))

        img_idxs = []

        for img, num in enumerate(all_face_counts):
            img_idxs += [img] * num

        return all_bboxes, all_scores, img_idxs

    def predict_landmarks(self, np_imgs):
        """
        Args:
            np_imgs: (list) list of imgages loaded in numpy of format (1, C, H, W)
        Returns:
            qualities: (numpy array) qualities value between 0 and 1
            lmks: (numpy array) landmarks in the shape of (N, 5, 2)
        """
        np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
        landmarks = []

        for np_img in np_imgs:
            ie_out = self.lm_net.infer(inputs={self.lm_input_name: np_img})
            lmks = np.squeeze(ie_out[self.lm_landmarks_name])
            landmarks.append(lmks)

        return np.asarray(landmarks)

    def get_qualities(self, np_imgs):
        """
        Args:
            np_imgs: (list) list of imgages loaded in numpy of format (1, C, H, W)
        Returns:
            qualities: (numpy array) qualities value between 0 and 1
        """
        np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
        qualities, acceptabilities = [], []

        for np_img in np_imgs:
            ie_out = self.ql_net.infer(inputs={self.ql_input_name: np_img})

            quality = np.squeeze(ie_out[self.ql_qualities_name])
            qualities.append(quality)

            acceptability = np.squeeze(ie_out[self.ql_acceptabilities_name])
            acceptabilities.append(acceptability)

        return (
            np.clip(qualities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
            np.clip(acceptabilities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
        )

    def predict_embeddings(self, np_imgs):
        """
        Args:
            np_imgs: (list) list of images loaded in numpy of format (1, C, H, W)

        Returns:
            embs: (numpy array) array of embedding arrays
        """
        np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
        embeddings = []

        for np_img in np_imgs:
            ie_out = self.fr_net.infer(inputs={self.fr_input_name: np_img})
            embeddings.append(np.squeeze(ie_out[self.fr_output_name]))

        return np.asarray(embeddings)

    def predict_attributes(self, np_imgs):
        """
        Args:
            np_img: (numpy array) img loaded in numpy of format (1, C, H, W)

        Returns:
            ages: (numpy array) age probabilities in the shape of (N, 1, 7)
            genders: (numpy array) gender probabilities in the shape of (N, 1, 2)
        """
        np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
        ages, genders = [], []

        for np_img in np_imgs:
            ie_out = self.at_net.infer(inputs={self.at_input_name: np_img})
            ages.append(ie_out["age_probs"][0])
            genders.append(ie_out["gender_probs"][0])

        return ages, genders

    def check_for_masks(self, np_imgs):
        """
        Args:
            np_img: (numpy array) img loaded in numpy of format (1, C, H, W)

        Returns:
            mask_probabilities: (numpy array) mask probabilities in the shape of (N, 1, 4)
        """
        np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
        mask_probabilities = []
        for np_img in np_imgs:
            ie_out = self.md_net.infer(inputs={self.md_input_name: np_img})
            mask_probabilities.append(list(ie_out.values())[0][0][0])
        return mask_probabilities