import os from typing import Union, Tuple import base64 from pathlib import Path # 3rd party dependencies from PIL import Image import requests import numpy as np import cv2 import tensorflow as tf from deprecated import deprecated # package dependencies from deepface.detectors import FaceDetector from deepface.commons.logger import Logger logger = Logger(module="commons.functions") # pylint: disable=no-else-raise # -------------------------------------------------- # configurations of dependencies tf_version = tf.__version__ tf_major_version = int(tf_version.split(".", maxsplit=1)[0]) tf_minor_version = int(tf_version.split(".")[1]) if tf_major_version == 1: from keras.preprocessing import image elif tf_major_version == 2: from tensorflow.keras.preprocessing import image # -------------------------------------------------- def initialize_folder() -> None: """Initialize the folder for storing weights and models. Raises: OSError: if the folder cannot be created. """ home = get_deepface_home() deepFaceHomePath = home + "/.deepface" weightsPath = deepFaceHomePath + "/weights" if not os.path.exists(deepFaceHomePath): os.makedirs(deepFaceHomePath, exist_ok=True) logger.info(f"Directory {home}/.deepface created") if not os.path.exists(weightsPath): os.makedirs(weightsPath, exist_ok=True) logger.info(f"Directory {home}/.deepface/weights created") def get_deepface_home() -> str: """Get the home directory for storing weights and models. Returns: str: the home directory. """ return str(os.getenv("DEEPFACE_HOME", default=str(Path.home()))) # -------------------------------------------------- def loadBase64Img(uri: str) -> np.ndarray: """Load image from base64 string. Args: uri: a base64 string. Returns: numpy array: the loaded image. """ encoded_data = uri.split(",")[1] nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8) img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) return img_bgr def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: """ Load image from path, url, base64 or numpy array. Args: img: a path, url, base64 or numpy array. Returns: image (numpy array): the loaded image in BGR format image name (str): image name itself """ # The image is already a numpy array if isinstance(img, np.ndarray): return img, "numpy array" if isinstance(img, Path): img = str(img) if not isinstance(img, str): raise ValueError(f"img must be numpy array or str but it is {type(img)}") # The image is a base64 string if img.startswith("data:image/"): return loadBase64Img(img), "base64 encoded string" # The image is a url if img.startswith("http"): return ( np.array(Image.open(requests.get(img, stream=True, timeout=60).raw).convert("BGR")), # return url as image name img, ) # The image is a path if os.path.isfile(img) is not True: raise ValueError(f"Confirm that {img} exists") # image must be a file on the system then # image name must have english characters if img.isascii() is False: raise ValueError(f"Input image must not have non-english characters - {img}") img_obj_bgr = cv2.imread(img) # img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB) return img_obj_bgr, img # -------------------------------------------------- def extract_faces( img: Union[str, np.ndarray], target_size: tuple = (224, 224), detector_backend: str = "opencv", grayscale: bool = False, enforce_detection: bool = True, align: bool = True, ) -> list: """Extract faces from an image. Args: img: a path, url, base64 or numpy array. target_size (tuple, optional): the target size of the extracted faces. Defaults to (224, 224). detector_backend (str, optional): the face detector backend. Defaults to "opencv". grayscale (bool, optional): whether to convert the extracted faces to grayscale. Defaults to False. enforce_detection (bool, optional): whether to enforce face detection. Defaults to True. align (bool, optional): whether to align the extracted faces. Defaults to True. Raises: ValueError: if face could not be detected and enforce_detection is True. Returns: list: a list of extracted faces. """ # this is going to store a list of img itself (numpy), it region and confidence extracted_faces = [] # img might be path, base64 or numpy array. Convert it to numpy whatever it is. img, img_name = load_image(img) img_region = [0, 0, img.shape[1], img.shape[0]] if detector_backend == "skip": face_objs = [(img, img_region, 0)] else: face_detector = FaceDetector.build_model(detector_backend) face_objs = FaceDetector.detect_faces(face_detector, detector_backend, img, align) # in case of no face found if len(face_objs) == 0 and enforce_detection is True: if img_name is not None: raise ValueError( f"Face could not be detected in {img_name}." "Please confirm that the picture is a face photo " "or consider to set enforce_detection param to False." ) else: raise ValueError( "Face could not be detected. Please confirm that the picture is a face photo " "or consider to set enforce_detection param to False." ) if len(face_objs) == 0 and enforce_detection is False: face_objs = [(img, img_region, 0)] for current_img, current_region, confidence in face_objs: if current_img.shape[0] > 0 and current_img.shape[1] > 0: if grayscale is True: current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY) # resize and padding factor_0 = target_size[0] / current_img.shape[0] factor_1 = target_size[1] / current_img.shape[1] factor = min(factor_0, factor_1) dsize = ( int(current_img.shape[1] * factor), int(current_img.shape[0] * factor), ) current_img = cv2.resize(current_img, dsize) diff_0 = target_size[0] - current_img.shape[0] diff_1 = target_size[1] - current_img.shape[1] if grayscale is False: # Put the base image in the middle of the padded image current_img = np.pad( current_img, ( (diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), (0, 0), ), "constant", ) else: current_img = np.pad( current_img, ( (diff_0 // 2, diff_0 - diff_0 // 2), (diff_1 // 2, diff_1 - diff_1 // 2), ), "constant", ) # double check: if target image is not still the same size with target. if current_img.shape[0:2] != target_size: current_img = cv2.resize(current_img, target_size) # normalizing the image pixels # what this line doing? must? img_pixels = image.img_to_array(current_img) img_pixels = np.expand_dims(img_pixels, axis=0) img_pixels /= 255 # normalize input in [0, 1] # int cast is for the exception - object of type 'float32' is not JSON serializable region_obj = { "x": int(current_region[0]), "y": int(current_region[1]), "w": int(current_region[2]), "h": int(current_region[3]), } extracted_face = [img_pixels, region_obj, confidence] extracted_faces.append(extracted_face) if len(extracted_faces) == 0 and enforce_detection == True: raise ValueError( f"Detected face shape is {img.shape}. Consider to set enforce_detection arg to False." ) return extracted_faces def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray: """Normalize input image. Args: img (numpy array): the input image. normalization (str, optional): the normalization technique. Defaults to "base", for no normalization. Returns: numpy array: the normalized image. """ # issue 131 declares that some normalization techniques improves the accuracy if normalization == "base": return img # @trevorgribble and @davedgd contributed this feature # restore input in scale of [0, 255] because it was normalized in scale of # [0, 1] in preprocess_face img *= 255 if normalization == "raw": pass # return just restored pixels elif normalization == "Facenet": mean, std = img.mean(), img.std() img = (img - mean) / std elif normalization == "Facenet2018": # simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted) img /= 127.5 img -= 1 elif normalization == "VGGFace": # mean subtraction based on VGGFace1 training data img[..., 0] -= 93.5940 img[..., 1] -= 104.7624 img[..., 2] -= 129.1863 elif normalization == "VGGFace2": # mean subtraction based on VGGFace2 training data img[..., 0] -= 91.4953 img[..., 1] -= 103.8827 img[..., 2] -= 131.0912 elif normalization == "ArcFace": # Reference study: The faces are cropped and resized to 112×112, # and each pixel (ranged between [0, 255]) in RGB images is normalised # by subtracting 127.5 then divided by 128. img -= 127.5 img /= 128 else: raise ValueError(f"unimplemented normalization type - {normalization}") return img def find_target_size(model_name: str) -> tuple: """Find the target size of the model. Args: model_name (str): the model name. Returns: tuple: the target size. """ target_sizes = { "VGG-Face": (224, 224), "Facenet": (160, 160), "Facenet512": (160, 160), "OpenFace": (96, 96), "DeepFace": (152, 152), "DeepID": (47, 55), "Dlib": (150, 150), "ArcFace": (112, 112), "SFace": (112, 112), } target_size = target_sizes.get(model_name) if target_size == None: raise ValueError(f"unimplemented model name - {model_name}") return target_size # --------------------------------------------------- # deprecated functions @deprecated(version="0.0.78", reason="Use extract_faces instead of preprocess_face") def preprocess_face( img: Union[str, np.ndarray], target_size=(224, 224), detector_backend="opencv", grayscale=False, enforce_detection=True, align=True, ) -> Union[np.ndarray, None]: """ Preprocess only one face Args: img (str or numpy): the input image. target_size (tuple, optional): the target size. Defaults to (224, 224). detector_backend (str, optional): the detector backend. Defaults to "opencv". grayscale (bool, optional): whether to convert to grayscale. Defaults to False. enforce_detection (bool, optional): whether to enforce face detection. Defaults to True. align (bool, optional): whether to align the face. Defaults to True. Returns: loaded image (numpt): the preprocessed face. Raises: ValueError: if face is not detected and enforce_detection is True. Deprecated: 0.0.78: Use extract_faces instead of preprocess_face. """ logger.warn("Function preprocess_face is deprecated. Use extract_faces instead.") result = None img_objs = extract_faces( img=img, target_size=target_size, detector_backend=detector_backend, grayscale=grayscale, enforce_detection=enforce_detection, align=align, ) if len(img_objs) > 0: result, _, _ = img_objs[0] # discard expanded dimension if len(result.shape) == 4: result = result[0] return result