486 lines
17 KiB
Python
486 lines
17 KiB
Python
# common dependencies
|
|
import os
|
|
import warnings
|
|
import logging
|
|
from typing import Any, Dict, List, Tuple, Union
|
|
|
|
# 3rd party dependencies
|
|
import numpy as np
|
|
import pandas as pd
|
|
import tensorflow as tf
|
|
from deprecated import deprecated
|
|
|
|
# package dependencies
|
|
from deepface.commons import functions
|
|
from deepface.commons.logger import Logger
|
|
from deepface.modules import (
|
|
modeling,
|
|
representation,
|
|
verification,
|
|
recognition,
|
|
demography,
|
|
detection,
|
|
realtime,
|
|
)
|
|
|
|
# pylint: disable=no-else-raise, simplifiable-if-expression
|
|
|
|
logger = Logger(module="DeepFace")
|
|
|
|
# -----------------------------------
|
|
# configurations for dependencies
|
|
|
|
warnings.filterwarnings("ignore")
|
|
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
tf_version = int(tf.__version__.split(".", maxsplit=1)[0])
|
|
if tf_version == 2:
|
|
tf.get_logger().setLevel(logging.ERROR)
|
|
from tensorflow.keras.models import Model
|
|
else:
|
|
from keras.models import Model
|
|
# -----------------------------------
|
|
|
|
|
|
def build_model(model_name: str) -> Union[Model, Any]:
|
|
"""
|
|
This function builds a deepface model
|
|
Parameters:
|
|
model_name (string): face recognition or facial attribute model
|
|
VGG-Face, Facenet, OpenFace, DeepFace, DeepID for face recognition
|
|
Age, Gender, Emotion, Race for facial attributes
|
|
|
|
Returns:
|
|
built deepface model ( (tf.)keras.models.Model )
|
|
"""
|
|
return modeling.build_model(model_name=model_name)
|
|
|
|
|
|
def verify(
|
|
img1_path: Union[str, np.ndarray],
|
|
img2_path: Union[str, np.ndarray],
|
|
model_name: str = "VGG-Face",
|
|
detector_backend: str = "opencv",
|
|
distance_metric: str = "cosine",
|
|
enforce_detection: bool = True,
|
|
align: bool = True,
|
|
normalization: str = "base",
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
This function verifies an image pair is same person or different persons. In the background,
|
|
verification function represents facial images as vectors and then calculates the similarity
|
|
between those vectors. Vectors of same person images should have more similarity (or less
|
|
distance) than vectors of different persons.
|
|
|
|
Parameters:
|
|
img1_path, img2_path: exact image path as string. numpy array (BGR) or based64 encoded
|
|
images are also welcome. If one of pair has more than one face, then we will compare the
|
|
face pair with max similarity.
|
|
|
|
model_name (str): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib
|
|
, ArcFace and SFace
|
|
|
|
distance_metric (string): cosine, euclidean, euclidean_l2
|
|
|
|
enforce_detection (boolean): If no face could not be detected in an image, then this
|
|
function will return exception by default. Set this to False not to have this exception.
|
|
This might be convenient for low resolution images.
|
|
|
|
detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
|
|
dlib, mediapipe or yolov8.
|
|
|
|
align (boolean): alignment according to the eye positions.
|
|
|
|
normalization (string): normalize the input image before feeding to model
|
|
|
|
Returns:
|
|
Verify function returns a dictionary.
|
|
|
|
{
|
|
"verified": True
|
|
, "distance": 0.2563
|
|
, "max_threshold_to_verify": 0.40
|
|
, "model": "VGG-Face"
|
|
, "similarity_metric": "cosine"
|
|
, 'facial_areas': {
|
|
'img1': {'x': 345, 'y': 211, 'w': 769, 'h': 769},
|
|
'img2': {'x': 318, 'y': 534, 'w': 779, 'h': 779}
|
|
}
|
|
, "time": 2
|
|
}
|
|
|
|
"""
|
|
|
|
return verification.verify(
|
|
img1_path=img1_path,
|
|
img2_path=img2_path,
|
|
model_name=model_name,
|
|
detector_backend=detector_backend,
|
|
distance_metric=distance_metric,
|
|
enforce_detection=enforce_detection,
|
|
align=align,
|
|
normalization=normalization,
|
|
)
|
|
|
|
|
|
def analyze(
|
|
img_path: Union[str, np.ndarray],
|
|
actions: Union[tuple, list] = ("emotion", "age", "gender", "race"),
|
|
enforce_detection: bool = True,
|
|
detector_backend: str = "opencv",
|
|
align: bool = True,
|
|
silent: bool = False,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
This function analyzes facial attributes including age, gender, emotion and race.
|
|
In the background, analysis function builds convolutional neural network models to
|
|
classify age, gender, emotion and race of the input image.
|
|
|
|
Parameters:
|
|
img_path: exact image path, numpy array (BGR) or base64 encoded image could be passed.
|
|
If source image has more than one face, then result will be size of number of faces
|
|
appearing in the image.
|
|
|
|
actions (tuple): The default is ('age', 'gender', 'emotion', 'race'). You can drop
|
|
some of those attributes.
|
|
|
|
enforce_detection (bool): The function throws exception if no face detected by default.
|
|
Set this to False if you don't want to get exception. This might be convenient for low
|
|
resolution images.
|
|
|
|
detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
|
|
dlib, mediapipe or yolov8.
|
|
|
|
align (boolean): alignment according to the eye positions.
|
|
|
|
silent (boolean): disable (some) log messages
|
|
|
|
Returns:
|
|
The function returns a list of dictionaries for each face appearing in the image.
|
|
|
|
[
|
|
{
|
|
"region": {'x': 230, 'y': 120, 'w': 36, 'h': 45},
|
|
"age": 28.66,
|
|
'face_confidence': 0.9993908405303955,
|
|
"dominant_gender": "Woman",
|
|
"gender": {
|
|
'Woman': 99.99407529830933,
|
|
'Man': 0.005928758764639497,
|
|
}
|
|
"dominant_emotion": "neutral",
|
|
"emotion": {
|
|
'sad': 37.65260875225067,
|
|
'angry': 0.15512987738475204,
|
|
'surprise': 0.0022171278033056296,
|
|
'fear': 1.2489334680140018,
|
|
'happy': 4.609785228967667,
|
|
'disgust': 9.698561953541684e-07,
|
|
'neutral': 56.33133053779602
|
|
}
|
|
"dominant_race": "white",
|
|
"race": {
|
|
'indian': 0.5480832420289516,
|
|
'asian': 0.7830780930817127,
|
|
'latino hispanic': 2.0677512511610985,
|
|
'black': 0.06337375962175429,
|
|
'middle eastern': 3.088453598320484,
|
|
'white': 93.44925880432129
|
|
}
|
|
}
|
|
]
|
|
"""
|
|
return demography.analyze(
|
|
img_path=img_path,
|
|
actions=actions,
|
|
enforce_detection=enforce_detection,
|
|
detector_backend=detector_backend,
|
|
align=align,
|
|
silent=silent,
|
|
)
|
|
|
|
|
|
def find(
|
|
img_path: Union[str, np.ndarray],
|
|
db_path: str,
|
|
model_name: str = "VGG-Face",
|
|
distance_metric: str = "cosine",
|
|
enforce_detection: bool = True,
|
|
detector_backend: str = "opencv",
|
|
align: bool = True,
|
|
normalization: str = "base",
|
|
silent: bool = False,
|
|
) -> List[pd.DataFrame]:
|
|
"""
|
|
This function applies verification several times and find the identities in a database
|
|
|
|
Parameters:
|
|
img_path: exact image path, numpy array (BGR) or based64 encoded image.
|
|
Source image can have many faces. Then, result will be the size of number of
|
|
faces in the source image.
|
|
|
|
db_path (string): You should store some image files in a folder and pass the
|
|
exact folder path to this. A database image can also have many faces.
|
|
Then, all detected faces in db side will be considered in the decision.
|
|
|
|
model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID,
|
|
Dlib, ArcFace, SFace or Ensemble
|
|
|
|
distance_metric (string): cosine, euclidean, euclidean_l2
|
|
|
|
enforce_detection (bool): The function throws exception if a face could not be detected.
|
|
Set this to False if you don't want to get exception. This might be convenient for low
|
|
resolution images.
|
|
|
|
detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
|
|
dlib, mediapipe or yolov8.
|
|
|
|
align (boolean): alignment according to the eye positions.
|
|
|
|
normalization (string): normalize the input image before feeding to model
|
|
|
|
silent (boolean): disable some logging and progress bars
|
|
|
|
Returns:
|
|
This function returns list of pandas data frame. Each item of the list corresponding to
|
|
an identity in the img_path.
|
|
"""
|
|
return recognition.find(
|
|
img_path=img_path,
|
|
db_path=db_path,
|
|
model_name=model_name,
|
|
distance_metric=distance_metric,
|
|
enforce_detection=enforce_detection,
|
|
detector_backend=detector_backend,
|
|
align=align,
|
|
normalization=normalization,
|
|
silent=silent,
|
|
)
|
|
|
|
|
|
def represent(
|
|
img_path: Union[str, np.ndarray],
|
|
model_name: str = "VGG-Face",
|
|
enforce_detection: bool = True,
|
|
detector_backend: str = "opencv",
|
|
align: bool = True,
|
|
normalization: str = "base",
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
This function represents facial images as vectors. The function uses convolutional neural
|
|
networks models to generate vector embeddings.
|
|
|
|
Parameters:
|
|
img_path (string): exact image path. Alternatively, numpy array (BGR) or based64
|
|
encoded images could be passed. Source image can have many faces. Then, result will
|
|
be the size of number of faces appearing in the source image.
|
|
|
|
model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib,
|
|
ArcFace, SFace
|
|
|
|
enforce_detection (boolean): If no face could not be detected in an image, then this
|
|
function will return exception by default. Set this to False not to have this exception.
|
|
This might be convenient for low resolution images.
|
|
|
|
detector_backend (string): set face detector backend to opencv, retinaface, mtcnn, ssd,
|
|
dlib, mediapipe or yolov8. A special value `skip` could be used to skip face-detection
|
|
and only encode the given image.
|
|
|
|
align (boolean): alignment according to the eye positions.
|
|
|
|
normalization (string): normalize the input image before feeding to model
|
|
|
|
Returns:
|
|
Represent function returns a list of object, each object has fields as follows:
|
|
{
|
|
// Multidimensional vector
|
|
// The number of dimensions is changing based on the reference model.
|
|
// E.g. FaceNet returns 128 dimensional vector;
|
|
// VGG-Face returns 2622 dimensional vector.
|
|
"embedding": np.array,
|
|
|
|
// Detected Facial-Area by Face detection in dict format.
|
|
// (x, y) is left-corner point, and (w, h) is the width and height
|
|
// If `detector_backend` == `skip`, it is the full image area and nonsense.
|
|
"facial_area": dict{"x": int, "y": int, "w": int, "h": int},
|
|
|
|
// Face detection confidence.
|
|
// If `detector_backend` == `skip`, will be 0 and nonsense.
|
|
"face_confidence": float
|
|
}
|
|
"""
|
|
return representation.represent(
|
|
img_path=img_path,
|
|
model_name=model_name,
|
|
enforce_detection=enforce_detection,
|
|
detector_backend=detector_backend,
|
|
align=align,
|
|
normalization=normalization,
|
|
)
|
|
|
|
|
|
def stream(
|
|
db_path: str = "",
|
|
model_name: str = "VGG-Face",
|
|
detector_backend: str = "opencv",
|
|
distance_metric: str = "cosine",
|
|
enable_face_analysis: bool = True,
|
|
source: Any = 0,
|
|
time_threshold: int = 5,
|
|
frame_threshold: int = 5,
|
|
) -> None:
|
|
"""
|
|
This function applies real time face recognition and facial attribute analysis
|
|
|
|
Parameters:
|
|
db_path (string): facial database path. You should store some .jpg files in this folder.
|
|
|
|
model_name (string): VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib,
|
|
ArcFace, SFace
|
|
|
|
detector_backend (string): opencv, retinaface, mtcnn, ssd, dlib, mediapipe or yolov8.
|
|
|
|
distance_metric (string): cosine, euclidean, euclidean_l2
|
|
|
|
enable_facial_analysis (boolean): Set this to False to just run face recognition
|
|
|
|
source: Set this to 0 for access web cam. Otherwise, pass exact video path.
|
|
|
|
time_threshold (int): how many second analyzed image will be displayed
|
|
|
|
frame_threshold (int): how many frames required to focus on face
|
|
|
|
"""
|
|
|
|
time_threshold = max(time_threshold, 1)
|
|
frame_threshold = max(frame_threshold, 1)
|
|
|
|
realtime.analysis(
|
|
db_path=db_path,
|
|
model_name=model_name,
|
|
detector_backend=detector_backend,
|
|
distance_metric=distance_metric,
|
|
enable_face_analysis=enable_face_analysis,
|
|
source=source,
|
|
time_threshold=time_threshold,
|
|
frame_threshold=frame_threshold,
|
|
)
|
|
|
|
|
|
def extract_faces(
|
|
img_path: Union[str, np.ndarray],
|
|
target_size: Tuple[int, int] = (224, 224),
|
|
detector_backend: str = "opencv",
|
|
enforce_detection: bool = True,
|
|
align: bool = True,
|
|
grayscale: bool = False,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
This function applies pre-processing stages of a face recognition pipeline
|
|
including detection and alignment
|
|
|
|
Parameters:
|
|
img_path: exact image path, numpy array (BGR) or base64 encoded image.
|
|
Source image can have many face. Then, result will be the size of number
|
|
of faces appearing in that source image.
|
|
|
|
target_size (tuple): final shape of facial image. black pixels will be
|
|
added to resize the image.
|
|
|
|
detector_backend (string): face detection backends are retinaface, mtcnn,
|
|
opencv, ssd or dlib
|
|
|
|
enforce_detection (boolean): function throws exception if face cannot be
|
|
detected in the fed image. Set this to False if you do not want to get
|
|
an exception and run the function anyway.
|
|
|
|
align (boolean): alignment according to the eye positions.
|
|
|
|
grayscale (boolean): extracting faces in rgb or gray scale
|
|
|
|
Returns:
|
|
list of dictionaries. Each dictionary will have facial image itself,
|
|
extracted area from the original image and confidence score.
|
|
|
|
"""
|
|
|
|
return detection.extract_faces(
|
|
img_path=img_path,
|
|
target_size=target_size,
|
|
detector_backend=detector_backend,
|
|
enforce_detection=enforce_detection,
|
|
align=align,
|
|
grayscale=grayscale,
|
|
)
|
|
|
|
|
|
# ---------------------------
|
|
# deprecated functions
|
|
|
|
|
|
@deprecated(version="0.0.78", reason="Use DeepFace.extract_faces instead of DeepFace.detectFace")
|
|
def detectFace(
|
|
img_path: Union[str, np.ndarray],
|
|
target_size: tuple = (224, 224),
|
|
detector_backend: str = "opencv",
|
|
enforce_detection: bool = True,
|
|
align: bool = True,
|
|
) -> Union[np.ndarray, None]:
|
|
"""
|
|
Deprecated function. Use extract_faces for same functionality.
|
|
|
|
This function applies pre-processing stages of a face recognition pipeline
|
|
including detection and alignment
|
|
|
|
Parameters:
|
|
img_path: exact image path, numpy array (BGR) or base64 encoded image.
|
|
Source image can have many face. Then, result will be the size of number
|
|
of faces appearing in that source image.
|
|
|
|
target_size (tuple): final shape of facial image. black pixels will be
|
|
added to resize the image.
|
|
|
|
detector_backend (string): face detection backends are retinaface, mtcnn,
|
|
opencv, ssd or dlib
|
|
|
|
enforce_detection (boolean): function throws exception if face cannot be
|
|
detected in the fed image. Set this to False if you do not want to get
|
|
an exception and run the function anyway.
|
|
|
|
align (boolean): alignment according to the eye positions.
|
|
|
|
grayscale (boolean): extracting faces in rgb or gray scale
|
|
|
|
Returns:
|
|
detected and aligned face as numpy array
|
|
|
|
"""
|
|
logger.warn("Function detectFace is deprecated. Use extract_faces instead.")
|
|
face_objs = extract_faces(
|
|
img_path=img_path,
|
|
target_size=target_size,
|
|
detector_backend=detector_backend,
|
|
enforce_detection=enforce_detection,
|
|
align=align,
|
|
grayscale=False,
|
|
)
|
|
|
|
extracted_face = None
|
|
if len(face_objs) > 0:
|
|
extracted_face = face_objs[0]["face"]
|
|
return extracted_face
|
|
|
|
|
|
# ---------------------------
|
|
# main
|
|
|
|
functions.initialize_folder()
|
|
|
|
|
|
def cli() -> None:
|
|
"""
|
|
command line interface function will be offered in this block
|
|
"""
|
|
import fire
|
|
|
|
fire.Fire()
|