yoloserv/modules/paravision/recognition/openvino/engine.py

246 lines
8.8 KiB
Python

import multiprocessing
import numpy as np
import os
from openvino.inference_engine import IECore
from .. import _utils as utils
UNIT_LOWER_LIMIT = 0
UNIT_UPPER_LIMIT = 1
FD_NAME = "detection"
LM_NAME = "landmarks"
QL_NAME = "quality"
FR_NAME = "recognition"
AT_NAME = "attributes"
MD_NAME = "mask"
BIN_EXT = ".bin"
XML_EXT = ".xml"
class Engine:
def __init__(self, models_dir, settings):
ie_core = IECore()
num_threads = multiprocessing.cpu_count()
try:
num_threads = min(
num_threads, max(int(os.getenv("PV_OPENVINO_THREADS_LIMIT")), 1)
)
except (TypeError, ValueError):
pass
ie_core.set_config({"CPU_THREADS_NUM": str(num_threads)}, "CPU")
(
fd_model_type,
lm_model_type,
ql_model_type,
fr_model_type,
at_model_type,
md_model_type,
) = utils.get_model_types(settings)
fd_net = ie_core.read_network(
model=os.path.join(models_dir, FD_NAME, fd_model_type, FD_NAME + XML_EXT),
weights=os.path.join(models_dir, FD_NAME, fd_model_type, FD_NAME + BIN_EXT),
)
self.fd_input_name = next(iter(fd_net.input_info))
self.fd_input_shape = utils.read_fd_input_shape(models_dir, fd_model_type)
self.fd_bboxes_name = "bboxes"
self.fd_scores_name = "scores"
self.fd_select_idxs_name = "selected_indices"
self.fd_net = ie_core.load_network(network=fd_net, device_name="CPU")
lm_net = ie_core.read_network(
model=os.path.join(models_dir, LM_NAME, lm_model_type, LM_NAME + XML_EXT),
weights=os.path.join(models_dir, LM_NAME, lm_model_type, LM_NAME + BIN_EXT),
)
self.lm_input_name = next(iter(lm_net.input_info))
self.lm_input_shape = utils.read_lm_input_shape(models_dir)
self.lm_landmarks_name = "landmarks"
self.lm_net = ie_core.load_network(network=lm_net, device_name="CPU")
ql_net = ie_core.read_network(
model=os.path.join(models_dir, QL_NAME, ql_model_type, QL_NAME + XML_EXT),
weights=os.path.join(models_dir, QL_NAME, ql_model_type, QL_NAME + BIN_EXT),
)
self.ql_input_name = next(iter(ql_net.input_info))
self.ql_input_shape = utils.read_lm_input_shape(models_dir)
self.ql_qualities_name = "qualities"
self.ql_acceptabilities_name = "acceptabilities"
self.ql_net = ie_core.load_network(network=ql_net, device_name="CPU")
fr_net = ie_core.read_network(
model=os.path.join(models_dir, FR_NAME, fr_model_type, FR_NAME + XML_EXT),
weights=os.path.join(models_dir, FR_NAME, fr_model_type, FR_NAME + BIN_EXT),
)
self.fr_input_name = next(iter(fr_net.input_info))
self.fr_input_shape = utils.read_fr_input_shape(models_dir)
self.fr_output_name = next(iter(fr_net.outputs))
self.fr_output_shape = utils.read_fr_output_shape(models_dir)
self.fr_net = ie_core.load_network(network=fr_net, device_name="CPU")
at_net = ie_core.read_network(
model=os.path.join(models_dir, AT_NAME, at_model_type, AT_NAME + XML_EXT),
weights=os.path.join(models_dir, AT_NAME, at_model_type, AT_NAME + BIN_EXT),
)
self.at_input_name = next(iter(at_net.input_info))
self.at_input_shape = utils.read_at_input_shape(models_dir)
self.at_net = ie_core.load_network(network=at_net, device_name="CPU")
if "mask" in settings:
md_model_path = settings["mask"]["models_dir"]
md_net = ie_core.read_network(
model=os.path.join(md_model_path, md_model_type, MD_NAME + XML_EXT),
weights=os.path.join(md_model_path, md_model_type, MD_NAME + BIN_EXT),
)
self.md_input_name = next(iter(md_net.input_info))
self.md_input_shape = md_net.input_info[
self.md_input_name
].input_data.shape[2:]
self.md_net = ie_core.load_network(network=md_net, device_name="CPU")
self.mask_enabled = True
else:
self.mask_enabled = False
def predict_bounding_boxes(self, np_imgs):
"""
Args:
np_imgs: (list) list of images loaded in numpy, of format (1, H, W, C)
Returns:
bboxes: (list) list containing arrays of bboxes for each image
in order [x1, y1, x2, y2], scaled between 0, 1
confs: (list) list containing arrays of confidences scores
of the faces for each image
"""
all_bboxes, all_scores, all_face_counts = [], [], []
np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
for np_img in np_imgs:
ie_out = self.fd_net.infer(inputs={self.fd_input_name: np_img})
bboxes = ie_out[self.fd_bboxes_name]
scores = ie_out[self.fd_scores_name]
select_idxs = ie_out[self.fd_select_idxs_name]
# keep select_idxs until we see -1
i = 0
for idx in select_idxs[:, 0]:
if idx == -1:
break
i += 1
select_idxs = select_idxs[:i]
# filter bboxes and scores based on select_idxs
for batch_idx, class_idx, idx in select_idxs:
all_bboxes.append(bboxes[batch_idx][idx])
all_scores.append(scores[batch_idx][class_idx][idx].item())
all_face_counts.append(len(select_idxs))
img_idxs = []
for img, num in enumerate(all_face_counts):
img_idxs += [img] * num
return all_bboxes, all_scores, img_idxs
def predict_landmarks(self, np_imgs):
"""
Args:
np_imgs: (list) list of imgages loaded in numpy of format (1, C, H, W)
Returns:
qualities: (numpy array) qualities value between 0 and 1
lmks: (numpy array) landmarks in the shape of (N, 5, 2)
"""
np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
landmarks = []
for np_img in np_imgs:
ie_out = self.lm_net.infer(inputs={self.lm_input_name: np_img})
lmks = np.squeeze(ie_out[self.lm_landmarks_name])
landmarks.append(lmks)
return np.asarray(landmarks)
def get_qualities(self, np_imgs):
"""
Args:
np_imgs: (list) list of imgages loaded in numpy of format (1, C, H, W)
Returns:
qualities: (numpy array) qualities value between 0 and 1
"""
np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
qualities, acceptabilities = [], []
for np_img in np_imgs:
ie_out = self.ql_net.infer(inputs={self.ql_input_name: np_img})
quality = np.squeeze(ie_out[self.ql_qualities_name])
qualities.append(quality)
acceptability = np.squeeze(ie_out[self.ql_acceptabilities_name])
acceptabilities.append(acceptability)
return (
np.clip(qualities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
np.clip(acceptabilities, UNIT_LOWER_LIMIT, UNIT_UPPER_LIMIT),
)
def predict_embeddings(self, np_imgs):
"""
Args:
np_imgs: (list) list of images loaded in numpy of format (1, C, H, W)
Returns:
embs: (numpy array) array of embedding arrays
"""
np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
embeddings = []
for np_img in np_imgs:
ie_out = self.fr_net.infer(inputs={self.fr_input_name: np_img})
embeddings.append(np.squeeze(ie_out[self.fr_output_name]))
return np.asarray(embeddings)
def predict_attributes(self, np_imgs):
"""
Args:
np_img: (numpy array) img loaded in numpy of format (1, C, H, W)
Returns:
ages: (numpy array) age probabilities in the shape of (N, 1, 7)
genders: (numpy array) gender probabilities in the shape of (N, 1, 2)
"""
np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
ages, genders = [], []
for np_img in np_imgs:
ie_out = self.at_net.infer(inputs={self.at_input_name: np_img})
ages.append(ie_out["age_probs"][0])
genders.append(ie_out["gender_probs"][0])
return ages, genders
def check_for_masks(self, np_imgs):
"""
Args:
np_img: (numpy array) img loaded in numpy of format (1, C, H, W)
Returns:
mask_probabilities: (numpy array) mask probabilities in the shape of (N, 1, 4)
"""
np_imgs = np.transpose(np_imgs, (0, 3, 1, 2))
mask_probabilities = []
for np_img in np_imgs:
ie_out = self.md_net.infer(inputs={self.md_input_name: np_img})
mask_probabilities.append(list(ie_out.values())[0][0][0])
return mask_probabilities