comfyui client
71
CITATION.md
|
|
@ -1,71 +0,0 @@
|
|||
## Cite DeepFace Papers
|
||||
|
||||
Please cite deepface in your publications if it helps your research. Here are its BibTex entries:
|
||||
|
||||
### Facial Recognition
|
||||
|
||||
If you use deepface in your research for facial recogntion purposes, please cite these publications:
|
||||
|
||||
```BibTeX
|
||||
@article{serengil2024lightface,
|
||||
title = {A Benchmark of Facial Recognition Pipelines and Co-Usability Performances of Modules},
|
||||
author = {Serengil, Sefik Ilkin and Ozpinar, Alper},
|
||||
journal = {Bilisim Teknolojileri Dergisi},
|
||||
volume = {17},
|
||||
number = {2},
|
||||
pages = {95-107},
|
||||
year = {2024},
|
||||
doi = {10.17671/gazibtd.1399077},
|
||||
url = {https://dergipark.org.tr/en/pub/gazibtd/issue/84331/1399077},
|
||||
publisher = {Gazi University}
|
||||
}
|
||||
```
|
||||
|
||||
```BibTeX
|
||||
@inproceedings{serengil2020lightface,
|
||||
title = {LightFace: A Hybrid Deep Face Recognition Framework},
|
||||
author = {Serengil, Sefik Ilkin and Ozpinar, Alper},
|
||||
booktitle = {2020 Innovations in Intelligent Systems and Applications Conference (ASYU)},
|
||||
pages = {23-27},
|
||||
year = {2020},
|
||||
doi = {10.1109/ASYU50717.2020.9259802},
|
||||
url = {https://ieeexplore.ieee.org/document/9259802},
|
||||
organization = {IEEE}
|
||||
}
|
||||
```
|
||||
|
||||
### Facial Attribute Analysis
|
||||
|
||||
If you use deepface in your research for facial attribute analysis purposes such as age, gender, emotion or ethnicity prediction, please cite the this publication.
|
||||
|
||||
```BibTeX
|
||||
@inproceedings{serengil2021lightface,
|
||||
title = {HyperExtended LightFace: A Facial Attribute Analysis Framework},
|
||||
author = {Serengil, Sefik Ilkin and Ozpinar, Alper},
|
||||
booktitle = {2021 International Conference on Engineering and Emerging Technologies (ICEET)},
|
||||
pages = {1-4},
|
||||
year = {2021},
|
||||
doi = {10.1109/ICEET53442.2021.9659697},
|
||||
url = {https://ieeexplore.ieee.org/document/9659697/},
|
||||
organization = {IEEE}
|
||||
}
|
||||
```
|
||||
|
||||
### Additional Papers
|
||||
|
||||
We have additionally released these papers within the DeepFace project for a multitude of purposes.
|
||||
|
||||
```BibTeX
|
||||
@misc{serengil2023db,
|
||||
title = {An evaluation of sql and nosql databases for facial recognition pipelines},
|
||||
author = {Serengil, Sefik Ilkin and Ozpinar, Alper},
|
||||
year = {2023},
|
||||
archivePrefix = {Cambridge Open Engage},
|
||||
doi = {10.33774/coe-2023-18rcn},
|
||||
url = {https://www.cambridge.org/engage/coe/article-details/63f3e5541d2d184063d4f569}
|
||||
}
|
||||
```
|
||||
|
||||
### Repositories
|
||||
|
||||
Also, if you use deepface in your GitHub projects, please add `deepface` in the `requirements.txt`. Thereafter, your project will be listed in its [dependency graph](https://github.com/serengil/deepface/network/dependents).
|
||||
58
Dockerfile
|
|
@ -1,58 +0,0 @@
|
|||
# base image
|
||||
FROM python:3.8.12
|
||||
LABEL org.opencontainers.image.source https://github.com/serengil/deepface
|
||||
|
||||
# -----------------------------------
|
||||
# create required folder
|
||||
RUN mkdir /app
|
||||
RUN mkdir /app/deepface
|
||||
|
||||
# -----------------------------------
|
||||
# switch to application directory
|
||||
WORKDIR /app
|
||||
|
||||
# -----------------------------------
|
||||
# update image os
|
||||
RUN apt-get update
|
||||
RUN apt-get install ffmpeg libsm6 libxext6 -y
|
||||
|
||||
# -----------------------------------
|
||||
# Copy required files from repo into image
|
||||
COPY ./deepface /app/deepface
|
||||
# even though we will use local requirements, this one is required to perform install deepface from source code
|
||||
COPY ./requirements.txt /app/requirements.txt
|
||||
COPY ./requirements_local /app/requirements_local.txt
|
||||
COPY ./package_info.json /app/
|
||||
COPY ./setup.py /app/
|
||||
COPY ./README.md /app/
|
||||
|
||||
# -----------------------------------
|
||||
# if you plan to use a GPU, you should install the 'tensorflow-gpu' package
|
||||
# RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org tensorflow-gpu
|
||||
|
||||
# if you plan to use face anti-spoofing, then activate this line
|
||||
# RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org torch==2.1.2
|
||||
# -----------------------------------
|
||||
# install deepface from pypi release (might be out-of-date)
|
||||
# RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org deepface
|
||||
# -----------------------------------
|
||||
# install dependencies - deepface with these dependency versions is working
|
||||
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org -r /app/requirements_local.txt
|
||||
# install deepface from source code (always up-to-date)
|
||||
RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org -e .
|
||||
|
||||
# -----------------------------------
|
||||
# some packages are optional in deepface. activate if your task depends on one.
|
||||
# RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org cmake==3.24.1.1
|
||||
# RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org dlib==19.20.0
|
||||
# RUN pip install --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org lightgbm==2.3.1
|
||||
|
||||
# -----------------------------------
|
||||
# environment variables
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# -----------------------------------
|
||||
# run the app (re-configure port if necessary)
|
||||
WORKDIR /app/deepface/api/src
|
||||
EXPOSE 5000
|
||||
CMD ["gunicorn", "--workers=1", "--timeout=3600", "--bind=0.0.0.0:5000", "app:create_app()"]
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
{
|
||||
"3": {
|
||||
"inputs": {
|
||||
"seed": 479096427492872,
|
||||
"steps": 20,
|
||||
"cfg": 8,
|
||||
"sampler_name": "dpmpp_2m",
|
||||
"scheduler": "normal",
|
||||
"denoise": 0.9,
|
||||
"model": [
|
||||
"19",
|
||||
0
|
||||
],
|
||||
"positive": [
|
||||
"6",
|
||||
0
|
||||
],
|
||||
"negative": [
|
||||
"7",
|
||||
0
|
||||
],
|
||||
"latent_image": [
|
||||
"12",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "KSampler",
|
||||
"_meta": {
|
||||
"title": "K采样器"
|
||||
}
|
||||
},
|
||||
"6": {
|
||||
"inputs": {
|
||||
"text": "1 girl",
|
||||
"clip": [
|
||||
"19",
|
||||
1
|
||||
]
|
||||
},
|
||||
"class_type": "CLIPTextEncode",
|
||||
"_meta": {
|
||||
"title": "CLIP文本编码"
|
||||
}
|
||||
},
|
||||
"7": {
|
||||
"inputs": {
|
||||
"text": "",
|
||||
"clip": [
|
||||
"19",
|
||||
1
|
||||
]
|
||||
},
|
||||
"class_type": "CLIPTextEncode",
|
||||
"_meta": {
|
||||
"title": "CLIP文本编码"
|
||||
}
|
||||
},
|
||||
"8": {
|
||||
"inputs": {
|
||||
"samples": [
|
||||
"3",
|
||||
0
|
||||
],
|
||||
"vae": [
|
||||
"14",
|
||||
2
|
||||
]
|
||||
},
|
||||
"class_type": "VAEDecode",
|
||||
"_meta": {
|
||||
"title": "VAE解码"
|
||||
}
|
||||
},
|
||||
"10": {
|
||||
"inputs": {
|
||||
"image": "WechatIMG422.jpg",
|
||||
"upload": "image"
|
||||
},
|
||||
"class_type": "LoadImage",
|
||||
"_meta": {
|
||||
"title": "加载图像"
|
||||
}
|
||||
},
|
||||
"12": {
|
||||
"inputs": {
|
||||
"pixels": [
|
||||
"10",
|
||||
0
|
||||
],
|
||||
"vae": [
|
||||
"14",
|
||||
2
|
||||
]
|
||||
},
|
||||
"class_type": "VAEEncode",
|
||||
"_meta": {
|
||||
"title": "VAE编码"
|
||||
}
|
||||
},
|
||||
"14": {
|
||||
"inputs": {
|
||||
"ckpt_name": "majicMIX realistic 麦橘写实_v7.safetensors"
|
||||
},
|
||||
"class_type": "CheckpointLoaderSimple",
|
||||
"_meta": {
|
||||
"title": "Checkpoint加载器(简易)"
|
||||
}
|
||||
},
|
||||
"19": {
|
||||
"inputs": {
|
||||
"lora_name": "instantid_diffusion_pytorch_model.safetensors",
|
||||
"strength_model": 1,
|
||||
"strength_clip": 1,
|
||||
"model": [
|
||||
"14",
|
||||
0
|
||||
],
|
||||
"clip": [
|
||||
"14",
|
||||
1
|
||||
]
|
||||
},
|
||||
"class_type": "LoraLoader",
|
||||
"_meta": {
|
||||
"title": "加载LoRA"
|
||||
}
|
||||
},
|
||||
"26": {
|
||||
"inputs": {
|
||||
"images": [
|
||||
"8",
|
||||
0
|
||||
]
|
||||
},
|
||||
"class_type": "PreviewImage",
|
||||
"_meta": {
|
||||
"title": "预览图像"
|
||||
}
|
||||
}
|
||||
}
|
||||
21
LICENSE
|
|
@ -1,21 +0,0 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2019 Sefik Ilkin Serengil
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
8
Makefile
|
|
@ -1,8 +0,0 @@
|
|||
test:
|
||||
cd tests && python -m pytest . -s --disable-warnings
|
||||
|
||||
lint:
|
||||
python -m pylint deepface/ --fail-under=10
|
||||
|
||||
coverage:
|
||||
pip install pytest-cov && cd tests && python -m pytest --cov=deepface
|
||||
|
|
@ -1,352 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8133a99d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Perform Experiments with DeepFace on LFW dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "5aab0cbe",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# built-in dependencies\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"# 3rd party dependencies\n",
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.datasets import fetch_lfw_pairs\n",
|
||||
"from deepface import DeepFace"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "64c9ed9a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This experiment is done with pip package of deepface with 0.0.90 version\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"This experiment is done with pip package of deepface with {DeepFace.__version__} version\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "feaec973",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configuration Sets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "453104b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# all configuration alternatives for 4 dimensions of arguments\n",
|
||||
"alignment = [True, False]\n",
|
||||
"models = [\"Facenet512\", \"Facenet\", \"VGG-Face\", \"ArcFace\", \"Dlib\", \"GhostFaceNet\", \"SFace\", \"OpenFace\", \"DeepFace\", \"DeepID\"]\n",
|
||||
"detectors = [\"retinaface\", \"mtcnn\", \"fastmtcnn\", \"dlib\", \"yolov8\", \"yunet\", \"centerface\", \"mediapipe\", \"ssd\", \"opencv\", \"skip\"]\n",
|
||||
"metrics = [\"euclidean\", \"euclidean_l2\", \"cosine\"]\n",
|
||||
"expand_percentage = 0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c9aeb57a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create Required Folders if necessary"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "671d8a00",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_paths = [\"lfwe\", \"dataset\", \"outputs\", \"outputs/test\", \"results\"]\n",
|
||||
"for target_path in target_paths:\n",
|
||||
" if not os.path.exists(target_path):\n",
|
||||
" os.mkdir(target_path)\n",
|
||||
" print(f\"{target_path} is just created\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "fc31f03a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Load LFW Dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "721a7d70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"pairs_touch = \"outputs/test_lfwe.txt\"\n",
|
||||
"instances = 1000 #pairs.shape[0]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "010184d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"target_path = \"dataset/test_lfw.npy\"\n",
|
||||
"labels_path = \"dataset/test_labels.npy\"\n",
|
||||
"\n",
|
||||
"if os.path.exists(target_path) != True:\n",
|
||||
" fetch_lfw_pairs = fetch_lfw_pairs(subset = 'test', color = True\n",
|
||||
" , resize = 2\n",
|
||||
" , funneled = False\n",
|
||||
" , slice_=None\n",
|
||||
" )\n",
|
||||
" pairs = fetch_lfw_pairs.pairs\n",
|
||||
" labels = fetch_lfw_pairs.target\n",
|
||||
" target_names = fetch_lfw_pairs.target_names\n",
|
||||
" np.save(target_path, pairs)\n",
|
||||
" np.save(labels_path, labels)\n",
|
||||
"else:\n",
|
||||
" if not os.path.exists(pairs_touch):\n",
|
||||
" # loading pairs takes some time. but if we extract these pairs as image, no need to load it anymore\n",
|
||||
" pairs = np.load(target_path)\n",
|
||||
" labels = np.load(labels_path) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "005f582e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Save LFW image pairs into file system"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "5bc23313",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"100%|██████████| 1000/1000 [00:00<00:00, 190546.25it/s]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in tqdm(range(0, instances)):\n",
|
||||
" img1_target = f\"lfwe/test/{i}_1.jpg\"\n",
|
||||
" img2_target = f\"lfwe/test/{i}_2.jpg\"\n",
|
||||
" \n",
|
||||
" if not os.path.exists(img1_target):\n",
|
||||
" img1 = pairs[i][0]\n",
|
||||
" # plt.imsave(img1_target, img1/255) #works for my mac\n",
|
||||
" plt.imsave(img1_target, img1) #works for my debian\n",
|
||||
" \n",
|
||||
" if not os.path.exists(img2_target):\n",
|
||||
" img2 = pairs[i][1]\n",
|
||||
" # plt.imsave(img2_target, img2/255) #works for my mac\n",
|
||||
" plt.imsave(img2_target, img2) #works for my debian\n",
|
||||
" \n",
|
||||
"if not os.path.exists(pairs_touch):\n",
|
||||
" open(pairs_touch,'a').close()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f8fa8fa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Perform Experiments\n",
|
||||
"\n",
|
||||
"This block will save the experiments results in outputs folder"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "e7fba936",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for model_name in models:\n",
|
||||
" for detector_backend in detectors:\n",
|
||||
" for distance_metric in metrics:\n",
|
||||
" for align in alignment:\n",
|
||||
" \n",
|
||||
" if detector_backend == \"skip\" and align is True:\n",
|
||||
" # Alignment is not possible for a skipped detector configuration\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" alignment_text = \"aligned\" if align is True else \"unaligned\"\n",
|
||||
" task = f\"{model_name}_{detector_backend}_{distance_metric}_{alignment_text}\"\n",
|
||||
" output_file = f\"outputs/test/{task}.csv\"\n",
|
||||
" if os.path.exists(output_file):\n",
|
||||
" #print(f\"{output_file} is available already\")\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" distances = []\n",
|
||||
" for i in tqdm(range(0, instances), desc = task):\n",
|
||||
" img1_target = f\"lfwe/test/{i}_1.jpg\"\n",
|
||||
" img2_target = f\"lfwe/test/{i}_2.jpg\"\n",
|
||||
" result = DeepFace.verify(\n",
|
||||
" img1_path=img1_target,\n",
|
||||
" img2_path=img2_target,\n",
|
||||
" model_name=model_name,\n",
|
||||
" detector_backend=detector_backend,\n",
|
||||
" distance_metric=distance_metric,\n",
|
||||
" align=align,\n",
|
||||
" enforce_detection=False,\n",
|
||||
" expand_percentage=expand_percentage,\n",
|
||||
" )\n",
|
||||
" distance = result[\"distance\"]\n",
|
||||
" distances.append(distance)\n",
|
||||
" # -----------------------------------\n",
|
||||
" df = pd.DataFrame(list(labels), columns = [\"actuals\"])\n",
|
||||
" df[\"distances\"] = distances\n",
|
||||
" df.to_csv(output_file, index=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a0b8dafa",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Calculate Results\n",
|
||||
"\n",
|
||||
"Experiments were responsible for calculating distances. We will calculate the best accuracy scores in this block."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "67376e76",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data = [[0 for _ in range(len(models))] for _ in range(len(detectors))]\n",
|
||||
"base_df = pd.DataFrame(data, columns=models, index=detectors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "f2cc536b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"results/pivot_euclidean_with_alignment_True.csv saved\n",
|
||||
"results/pivot_euclidean_l2_with_alignment_True.csv saved\n",
|
||||
"results/pivot_cosine_with_alignment_True.csv saved\n",
|
||||
"results/pivot_euclidean_with_alignment_False.csv saved\n",
|
||||
"results/pivot_euclidean_l2_with_alignment_False.csv saved\n",
|
||||
"results/pivot_cosine_with_alignment_False.csv saved\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for is_aligned in alignment:\n",
|
||||
" for distance_metric in metrics:\n",
|
||||
"\n",
|
||||
" current_df = base_df.copy()\n",
|
||||
" \n",
|
||||
" target_file = f\"results/pivot_{distance_metric}_with_alignment_{is_aligned}.csv\"\n",
|
||||
" if os.path.exists(target_file):\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" for model_name in models:\n",
|
||||
" for detector_backend in detectors:\n",
|
||||
"\n",
|
||||
" align = \"aligned\" if is_aligned is True else \"unaligned\"\n",
|
||||
"\n",
|
||||
" if detector_backend == \"skip\" and is_aligned is True:\n",
|
||||
" # Alignment is not possible for a skipped detector configuration\n",
|
||||
" align = \"unaligned\"\n",
|
||||
"\n",
|
||||
" source_file = f\"outputs/test/{model_name}_{detector_backend}_{distance_metric}_{align}.csv\"\n",
|
||||
" df = pd.read_csv(source_file)\n",
|
||||
" \n",
|
||||
" positive_mean = df[(df[\"actuals\"] == True) | (df[\"actuals\"] == 1)][\"distances\"].mean()\n",
|
||||
" negative_mean = df[(df[\"actuals\"] == False) | (df[\"actuals\"] == 0)][\"distances\"].mean()\n",
|
||||
"\n",
|
||||
" distances = sorted(df[\"distances\"].values.tolist())\n",
|
||||
"\n",
|
||||
" items = []\n",
|
||||
" for i, distance in enumerate(distances):\n",
|
||||
" if distance >= positive_mean and distance <= negative_mean:\n",
|
||||
" sandbox_df = df.copy()\n",
|
||||
" sandbox_df[\"predictions\"] = False\n",
|
||||
" idx = sandbox_df[sandbox_df[\"distances\"] < distance].index\n",
|
||||
" sandbox_df.loc[idx, \"predictions\"] = True\n",
|
||||
"\n",
|
||||
" actuals = sandbox_df.actuals.values.tolist()\n",
|
||||
" predictions = sandbox_df.predictions.values.tolist()\n",
|
||||
" accuracy = 100*accuracy_score(actuals, predictions)\n",
|
||||
" items.append((distance, accuracy))\n",
|
||||
"\n",
|
||||
" pivot_df = pd.DataFrame(items, columns = [\"distance\", \"accuracy\"])\n",
|
||||
" pivot_df = pivot_df.sort_values(by = [\"accuracy\"], ascending = False)\n",
|
||||
" threshold = pivot_df.iloc[0][\"distance\"]\n",
|
||||
" # print(f\"threshold for {model_name}/{detector_backend} is {threshold}\")\n",
|
||||
" accuracy = pivot_df.iloc[0][\"accuracy\"]\n",
|
||||
"\n",
|
||||
" # print(source_file, round(accuracy, 1))\n",
|
||||
" current_df.at[detector_backend, model_name] = round(accuracy, 1)\n",
|
||||
" \n",
|
||||
" current_df.to_csv(target_file)\n",
|
||||
" print(f\"{target_file} saved\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.16"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
# Benchmarks
|
||||
|
||||
[`🎥 Video Tutorial`](https://youtu.be/eKOZawGR3y0)
|
||||
|
||||
DeepFace offers various configurations that significantly impact accuracy, including the facial recognition model, face detector model, distance metric, and alignment mode. Our experiments conducted on the [LFW dataset](https://sefiks.com/2020/08/27/labeled-faces-in-the-wild-for-face-recognition/) using different combinations of these configurations yield the following results.
|
||||
|
||||
You can reproduce the results by executing the `Perform-Experiments.ipynb` and `Evaluate-Results.ipynb` notebooks, respectively.
|
||||
|
||||
## ROC Curves
|
||||
|
||||
ROC curves provide a valuable means of evaluating the performance of different models on a broader scale. The following illusration shows ROC curves for different facial recognition models alongside their optimal configurations yielding the highest accuracy scores.
|
||||
|
||||
<p align="center"><img src="https://raw.githubusercontent.com/serengil/deepface/master/icon/benchmarks.jpg" width="95%" height="95%"></p>
|
||||
|
||||
In summary, FaceNet-512d surpasses human-level accuracy, while FaceNet-128d reaches it, with Dlib, VGG-Face, and ArcFace closely trailing but slightly below, and GhostFaceNet and SFace making notable contributions despite not leading, while OpenFace, DeepFace, and DeepId exhibit lower performance.
|
||||
|
||||
## Accuracy Scores
|
||||
|
||||
Please note that humans achieve a 97.5% accuracy score on the same dataset. Configurations that outperform this benchmark are highlighted in bold.
|
||||
|
||||
## Performance Matrix for euclidean while alignment is True
|
||||
|
||||
| | Facenet512 |Facenet |VGG-Face |ArcFace |Dlib |GhostFaceNet |SFace |OpenFace |DeepFace |DeepID |
|
||||
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||
| retinaface |95.9 |93.5 |95.8 |85.2 |88.9 |85.9 |80.2 |69.4 |67.0 |65.6 |
|
||||
| mtcnn |95.2 |93.8 |95.9 |83.7 |89.4 |83.0 |77.4 |70.2 |66.5 |63.3 |
|
||||
| fastmtcnn |96.0 |93.4 |95.8 |83.5 |91.1 |82.8 |77.7 |69.4 |66.7 |64.0 |
|
||||
| dlib |96.0 |90.8 |94.5 |88.6 |96.8 |65.7 |66.3 |75.8 |63.4 |60.4 |
|
||||
| yolov8 |94.4 |91.9 |95.0 |84.1 |89.2 |77.6 |73.4 |68.7 |69.0 |66.5 |
|
||||
| yunet |97.3 |96.1 |96.0 |84.9 |92.2 |84.0 |79.4 |70.9 |65.8 |65.2 |
|
||||
| centerface |**97.6** |95.8 |95.7 |83.6 |90.4 |82.8 |77.4 |68.9 |65.5 |62.8 |
|
||||
| mediapipe |95.1 |88.6 |92.9 |73.2 |93.1 |63.2 |72.5 |78.7 |61.8 |62.2 |
|
||||
| ssd |88.9 |85.6 |87.0 |75.8 |83.1 |79.1 |76.9 |66.8 |63.4 |62.5 |
|
||||
| opencv |88.2 |84.2 |87.3 |73.0 |84.4 |83.8 |81.1 |66.4 |65.5 |59.6 |
|
||||
| skip |92.0 |64.1 |90.6 |56.6 |69.0 |75.1 |81.4 |57.4 |60.8 |60.7 |
|
||||
|
||||
## Performance Matrix for euclidean while alignment is False
|
||||
|
||||
| | Facenet512 |Facenet |VGG-Face |ArcFace |Dlib |GhostFaceNet |SFace |OpenFace |DeepFace |DeepID |
|
||||
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||
| retinaface |96.1 |92.8 |95.7 |84.1 |88.3 |83.2 |78.6 |70.8 |67.4 |64.3 |
|
||||
| mtcnn |95.9 |92.5 |95.5 |81.8 |89.3 |83.2 |76.3 |70.9 |65.9 |63.2 |
|
||||
| fastmtcnn |96.3 |93.0 |96.0 |82.2 |90.0 |82.7 |76.8 |71.2 |66.5 |64.3 |
|
||||
| dlib |96.0 |89.0 |94.1 |82.6 |96.3 |65.6 |73.1 |75.9 |61.8 |61.9 |
|
||||
| yolov8 |94.8 |90.8 |95.2 |83.2 |88.4 |77.6 |71.6 |68.9 |68.2 |66.3 |
|
||||
| yunet |**97.9** |96.5 |96.3 |84.1 |91.4 |82.7 |78.2 |71.7 |65.5 |65.2 |
|
||||
| centerface |97.4 |95.4 |95.8 |83.2 |90.3 |82.0 |76.5 |69.9 |65.7 |62.9 |
|
||||
| mediapipe |94.9 |87.1 |93.1 |71.1 |91.9 |61.9 |73.2 |77.6 |61.7 |62.4 |
|
||||
| ssd |97.2 |94.9 |96.7 |83.9 |88.6 |84.9 |82.0 |69.9 |66.7 |64.0 |
|
||||
| opencv |94.1 |90.2 |95.8 |89.8 |91.2 |91.0 |86.9 |71.1 |68.4 |61.1 |
|
||||
| skip |92.0 |64.1 |90.6 |56.6 |69.0 |75.1 |81.4 |57.4 |60.8 |60.7 |
|
||||
|
||||
## Performance Matrix for euclidean_l2 while alignment is True
|
||||
|
||||
| | Facenet512 |Facenet |VGG-Face |ArcFace |Dlib |GhostFaceNet |SFace |OpenFace |DeepFace |DeepID |
|
||||
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||
| retinaface |**98.4** |96.4 |95.8 |96.6 |89.1 |90.5 |92.4 |69.4 |67.7 |64.4 |
|
||||
| mtcnn |**97.6** |96.8 |95.9 |96.0 |90.0 |89.8 |90.5 |70.2 |66.4 |64.0 |
|
||||
| fastmtcnn |**98.1** |97.2 |95.8 |96.4 |91.0 |89.5 |90.0 |69.4 |67.4 |64.1 |
|
||||
| dlib |97.0 |92.6 |94.5 |95.1 |96.4 |63.3 |69.8 |75.8 |66.5 |59.5 |
|
||||
| yolov8 |97.3 |95.7 |95.0 |95.5 |88.8 |88.9 |91.9 |68.7 |67.5 |66.0 |
|
||||
| yunet |**97.9** |97.4 |96.0 |96.7 |91.6 |89.1 |91.0 |70.9 |66.5 |63.6 |
|
||||
| centerface |**97.7** |96.8 |95.7 |96.5 |90.9 |87.5 |89.3 |68.9 |67.8 |64.0 |
|
||||
| mediapipe |96.1 |90.6 |92.9 |90.3 |92.6 |64.4 |75.4 |78.7 |64.7 |63.0 |
|
||||
| ssd |88.7 |87.5 |87.0 |86.2 |83.3 |82.2 |84.6 |66.8 |64.1 |62.6 |
|
||||
| opencv |87.6 |84.8 |87.3 |84.6 |84.0 |85.0 |83.6 |66.4 |63.8 |60.9 |
|
||||
| skip |91.4 |67.6 |90.6 |57.2 |69.3 |78.4 |83.4 |57.4 |62.6 |61.6 |
|
||||
|
||||
## Performance Matrix for euclidean_l2 while alignment is False
|
||||
|
||||
| | Facenet512 |Facenet |VGG-Face |ArcFace |Dlib |GhostFaceNet |SFace |OpenFace |DeepFace |DeepID |
|
||||
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||
| retinaface |**98.0** |95.9 |95.7 |95.7 |88.4 |89.5 |90.6 |70.8 |67.7 |64.6 |
|
||||
| mtcnn |**97.8** |96.2 |95.5 |95.9 |89.2 |88.0 |91.1 |70.9 |67.0 |64.0 |
|
||||
| fastmtcnn |**97.7** |96.6 |96.0 |95.9 |89.6 |87.8 |89.7 |71.2 |67.8 |64.2 |
|
||||
| dlib |96.5 |89.9 |94.1 |93.8 |95.6 |63.0 |75.0 |75.9 |62.6 |61.8 |
|
||||
| yolov8 |**97.7** |95.8 |95.2 |95.0 |88.1 |88.7 |89.8 |68.9 |68.9 |65.3 |
|
||||
| yunet |**98.3** |96.8 |96.3 |96.1 |91.7 |88.0 |90.5 |71.7 |67.6 |63.2 |
|
||||
| centerface |97.4 |96.3 |95.8 |95.8 |90.2 |86.8 |89.3 |69.9 |68.4 |63.1 |
|
||||
| mediapipe |96.3 |90.0 |93.1 |89.3 |91.8 |65.6 |74.6 |77.6 |64.9 |61.6 |
|
||||
| ssd |**97.9** |97.0 |96.7 |96.6 |89.4 |91.5 |93.0 |69.9 |68.7 |64.9 |
|
||||
| opencv |96.2 |92.9 |95.8 |93.2 |91.5 |93.3 |91.7 |71.1 |68.3 |61.6 |
|
||||
| skip |91.4 |67.6 |90.6 |57.2 |69.3 |78.4 |83.4 |57.4 |62.6 |61.6 |
|
||||
|
||||
## Performance Matrix for cosine while alignment is True
|
||||
|
||||
| | Facenet512 |Facenet |VGG-Face |ArcFace |Dlib |GhostFaceNet |SFace |OpenFace |DeepFace |DeepID |
|
||||
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||
| retinaface |**98.4** |96.4 |95.8 |96.6 |89.1 |90.5 |92.4 |69.4 |67.7 |64.4 |
|
||||
| mtcnn |**97.6** |96.8 |95.9 |96.0 |90.0 |89.8 |90.5 |70.2 |66.3 |63.0 |
|
||||
| fastmtcnn |**98.1** |97.2 |95.8 |96.4 |91.0 |89.5 |90.0 |69.4 |67.4 |63.6 |
|
||||
| dlib |97.0 |92.6 |94.5 |95.1 |96.4 |63.3 |69.8 |75.8 |66.5 |58.7 |
|
||||
| yolov8 |97.3 |95.7 |95.0 |95.5 |88.8 |88.9 |91.9 |68.7 |67.5 |65.9 |
|
||||
| yunet |**97.9** |97.4 |96.0 |96.7 |91.6 |89.1 |91.0 |70.9 |66.5 |63.5 |
|
||||
| centerface |**97.7** |96.8 |95.7 |96.5 |90.9 |87.5 |89.3 |68.9 |67.8 |63.6 |
|
||||
| mediapipe |96.1 |90.6 |92.9 |90.3 |92.6 |64.3 |75.4 |78.7 |64.8 |63.0 |
|
||||
| ssd |88.7 |87.5 |87.0 |86.2 |83.3 |82.2 |84.5 |66.8 |63.8 |62.6 |
|
||||
| opencv |87.6 |84.9 |87.2 |84.6 |84.0 |85.0 |83.6 |66.2 |63.7 |60.1 |
|
||||
| skip |91.4 |67.6 |90.6 |54.8 |69.3 |78.4 |83.4 |57.4 |62.6 |61.1 |
|
||||
|
||||
## Performance Matrix for cosine while alignment is False
|
||||
|
||||
| | Facenet512 |Facenet |VGG-Face |ArcFace |Dlib |GhostFaceNet |SFace |OpenFace |DeepFace |DeepID |
|
||||
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||
| retinaface |**98.0** |95.9 |95.7 |95.7 |88.4 |89.5 |90.6 |70.8 |67.7 |63.7 |
|
||||
| mtcnn |**97.8** |96.2 |95.5 |95.9 |89.2 |88.0 |91.1 |70.9 |67.0 |64.0 |
|
||||
| fastmtcnn |**97.7** |96.6 |96.0 |95.9 |89.6 |87.8 |89.7 |71.2 |67.8 |62.7 |
|
||||
| dlib |96.5 |89.9 |94.1 |93.8 |95.6 |63.0 |75.0 |75.9 |62.6 |61.7 |
|
||||
| yolov8 |**97.7** |95.8 |95.2 |95.0 |88.1 |88.7 |89.8 |68.9 |68.9 |65.3 |
|
||||
| yunet |**98.3** |96.8 |96.3 |96.1 |91.7 |88.0 |90.5 |71.7 |67.6 |63.2 |
|
||||
| centerface |97.4 |96.3 |95.8 |95.8 |90.2 |86.8 |89.3 |69.9 |68.4 |62.6 |
|
||||
| mediapipe |96.3 |90.0 |93.1 |89.3 |91.8 |64.8 |74.6 |77.6 |64.9 |61.6 |
|
||||
| ssd |**97.9** |97.0 |96.7 |96.6 |89.4 |91.5 |93.0 |69.9 |68.7 |63.8 |
|
||||
| opencv |96.2 |92.9 |95.8 |93.2 |91.5 |93.3 |91.7 |71.1 |68.1 |61.1 |
|
||||
| skip |91.4 |67.6 |90.6 |54.8 |69.3 |78.4 |83.4 |57.4 |62.6 |61.1 |
|
||||
|
||||
# Citation
|
||||
|
||||
Please cite deepface in your publications if it helps your research - see [`CITATIONS`](https://github.com/serengil/deepface/blob/master/CITATION.md) for more details. Here is its BibTex entry:
|
||||
|
||||
```BibTeX
|
||||
@article{serengil2024lightface,
|
||||
title = {A Benchmark of Facial Recognition Pipelines and Co-Usability Performances of Modules},
|
||||
author = {Serengil, Sefik Ilkin and Ozpinar, Alper},
|
||||
journal = {Bilisim Teknolojileri Dergisi},
|
||||
volume = {17},
|
||||
number = {2},
|
||||
pages = {95-107},
|
||||
year = {2024},
|
||||
doi = {10.17671/gazibtd.1399077},
|
||||
url = {https://dergipark.org.tr/en/pub/gazibtd/issue/84331/1399077},
|
||||
publisher = {Gazi University}
|
||||
}
|
||||
```
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
import requests
|
||||
import uuid
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
|
||||
|
||||
class ComfyUIClient:
|
||||
def __init__(self, server_url="http://127.0.0.1:8188"):
|
||||
"""初始化ComfyUI客户端"""
|
||||
self.server_url = server_url.rstrip("/")
|
||||
self.client_id = str(uuid.uuid4())
|
||||
self.output_images = []
|
||||
|
||||
def upload_image(self, image_path):
|
||||
"""上传图片到ComfyUI服务器"""
|
||||
if not os.path.exists(image_path):
|
||||
raise FileNotFoundError(f"图片文件不存在: {image_path}")
|
||||
|
||||
# 读取图片文件
|
||||
with open(image_path, "rb") as file:
|
||||
image_data = file.read()
|
||||
|
||||
filename = os.path.basename(image_path)
|
||||
files = {
|
||||
"image": (filename, image_data, "image/png")
|
||||
}
|
||||
data = {
|
||||
"overwrite": "true"
|
||||
}
|
||||
|
||||
url = f"{self.server_url}/upload/image"
|
||||
response = requests.post(url, files=files, data=data)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"上传图片失败: {response.text}")
|
||||
|
||||
print(f"成功上传图片: {filename}")
|
||||
return filename
|
||||
|
||||
def submit_prompt(self, workflow, input_image_path):
|
||||
"""提交绘图任务"""
|
||||
# 上传图片并修改工作流
|
||||
image_filename = self.upload_image(input_image_path)
|
||||
|
||||
# 修改工作流中的图片输入
|
||||
if "10" in workflow:
|
||||
workflow["10"]["inputs"]["image"] = image_filename
|
||||
|
||||
# 构建请求数据
|
||||
data = {
|
||||
"client_id": self.client_id,
|
||||
"prompt": workflow
|
||||
}
|
||||
|
||||
# 提交任务
|
||||
url = f"{self.server_url}/prompt"
|
||||
response = requests.post(url, json=data)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"提交任务失败: {response.text}")
|
||||
|
||||
result = response.json()
|
||||
prompt_id = result.get("prompt_id")
|
||||
print(f"任务提交成功,任务ID: {prompt_id}")
|
||||
return prompt_id
|
||||
|
||||
def wait_for_completion(self, prompt_id, timeout=300, interval=2):
|
||||
"""通过 /history 轮询等待任务完成"""
|
||||
history_url = f"{self.server_url}/history/{prompt_id}"
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
try:
|
||||
response = requests.get(history_url)
|
||||
if response.status_code == 200:
|
||||
history = response.json().get(prompt_id)
|
||||
if history and history.get("status", {}).get("completed", False):
|
||||
outputs = history.get("outputs", {})
|
||||
for node_id, output in outputs.items():
|
||||
if "images" in output:
|
||||
self.output_images = output["images"]
|
||||
print(
|
||||
f"任务完成,生成了 {len(self.output_images)} 张图片")
|
||||
return self.output_images
|
||||
except Exception as e:
|
||||
print(f"轮询错误: {e}")
|
||||
|
||||
time.sleep(interval)
|
||||
|
||||
print("任务超时或未生成图片")
|
||||
return None
|
||||
|
||||
def get_image(self, filename, save_path=None):
|
||||
"""获取并可选保存生成的图片"""
|
||||
print(f"获取图片名称: {filename}")
|
||||
url = f"{self.server_url}/view"
|
||||
params = {
|
||||
"filename": filename,
|
||||
"type": "temp" # 这里可以根据需要修改为 "output" 或 "temp"
|
||||
}
|
||||
response = requests.get(url, params=params)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"获取图片失败: {response.status_code}")
|
||||
|
||||
image = Image.open(BytesIO(response.content))
|
||||
|
||||
if save_path:
|
||||
os.makedirs(os.path.dirname(
|
||||
os.path.abspath(save_path)), exist_ok=True)
|
||||
image.save(save_path)
|
||||
print(f"图片已保存到: {save_path}")
|
||||
|
||||
return image
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
client = ComfyUIClient(server_url="https://image.ai.faceta.cn")
|
||||
|
||||
input_image_path = "/Users/wandou/Downloads/aa.png"
|
||||
|
||||
with open('FaceImageArtView.json', 'r', encoding='utf-8') as f:
|
||||
workflow = json.load(f)
|
||||
|
||||
# 提交任务
|
||||
prompt_id = client.submit_prompt(workflow, input_image_path)
|
||||
|
||||
# 等待任务完成
|
||||
output_images = client.wait_for_completion(prompt_id)
|
||||
|
||||
# 如果有生成的图片,获取并保存
|
||||
if output_images:
|
||||
for i, img_file in enumerate(output_images):
|
||||
save_path = f"output_{i}.png"
|
||||
client.get_image(img_file['filename'], save_path)
|
||||
|
|
@ -1,615 +0,0 @@
|
|||
# common dependencies
|
||||
import os
|
||||
import warnings
|
||||
import logging
|
||||
from typing import Any, Dict, List, Union, Optional
|
||||
|
||||
# this has to be set before importing tensorflow
|
||||
os.environ["TF_USE_LEGACY_KERAS"] = "1"
|
||||
|
||||
# pylint: disable=wrong-import-position
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import tensorflow as tf
|
||||
|
||||
# package dependencies
|
||||
from deepface.commons import package_utils, folder_utils
|
||||
from deepface.commons.logger import Logger
|
||||
from deepface.modules import (
|
||||
modeling,
|
||||
representation,
|
||||
verification,
|
||||
recognition,
|
||||
demography,
|
||||
detection,
|
||||
streaming,
|
||||
preprocessing,
|
||||
)
|
||||
from deepface import __version__
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# -----------------------------------
|
||||
# configurations for dependencies
|
||||
|
||||
# users should install tf_keras package if they are using tf 2.16 or later versions
|
||||
package_utils.validate_for_keras3()
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 2:
|
||||
tf.get_logger().setLevel(logging.ERROR)
|
||||
# -----------------------------------
|
||||
|
||||
# create required folders if necessary to store model weights
|
||||
folder_utils.initialize_folder()
|
||||
|
||||
|
||||
def build_model(model_name: str, task: str = "facial_recognition") -> Any:
|
||||
"""
|
||||
This function builds a pre-trained model
|
||||
Args:
|
||||
model_name (str): model identifier
|
||||
- VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib,
|
||||
ArcFace, SFace, GhostFaceNet for face recognition
|
||||
- Age, Gender, Emotion, Race for facial attributes
|
||||
- opencv, mtcnn, ssd, dlib, retinaface, mediapipe, yolov8, yunet,
|
||||
fastmtcnn or centerface for face detectors
|
||||
- Fasnet for spoofing
|
||||
task (str): facial_recognition, facial_attribute, face_detector, spoofing
|
||||
default is facial_recognition
|
||||
Returns:
|
||||
built_model
|
||||
"""
|
||||
return modeling.build_model(task=task, model_name=model_name)
|
||||
|
||||
|
||||
def verify(
|
||||
img1_path: Union[str, np.ndarray, List[float]],
|
||||
img2_path: Union[str, np.ndarray, List[float]],
|
||||
model_name: str = "VGG-Face",
|
||||
detector_backend: str = "opencv",
|
||||
distance_metric: str = "cosine",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
normalization: str = "base",
|
||||
silent: bool = False,
|
||||
threshold: Optional[float] = None,
|
||||
anti_spoofing: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Verify if an image pair represents the same person or different persons.
|
||||
Args:
|
||||
img1_path (str or np.ndarray or List[float]): Path to the first image.
|
||||
Accepts exact image path as a string, numpy array (BGR), base64 encoded images
|
||||
or pre-calculated embeddings.
|
||||
|
||||
img2_path (str or np.ndarray or List[float]): Path to the second image.
|
||||
Accepts exact image path as a string, numpy array (BGR), base64 encoded images
|
||||
or pre-calculated embeddings.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2' (default is cosine).
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
align (bool): Flag to enable face alignment (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
normalization (string): Normalize the input image before feeding it to the model.
|
||||
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
|
||||
|
||||
silent (boolean): Suppress or allow some log messages for a quieter analysis process
|
||||
(default is False).
|
||||
|
||||
threshold (float): Specify a threshold to determine whether a pair represents the same
|
||||
person or different individuals. This threshold is used for comparing distances.
|
||||
If left unset, default pre-tuned threshold values will be applied based on the specified
|
||||
model name and distance metric (default is None).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
result (dict): A dictionary containing verification results with following keys.
|
||||
|
||||
- 'verified' (bool): Indicates whether the images represent the same person (True)
|
||||
or different persons (False).
|
||||
|
||||
- 'distance' (float): The distance measure between the face vectors.
|
||||
A lower distance indicates higher similarity.
|
||||
|
||||
- 'threshold' (float): The maximum threshold used for verification.
|
||||
If the distance is below this threshold, the images are considered a match.
|
||||
|
||||
- 'model' (str): The chosen face recognition model.
|
||||
|
||||
- 'distance_metric' (str): The chosen similarity metric for measuring distances.
|
||||
|
||||
- 'facial_areas' (dict): Rectangular regions of interest for faces in both images.
|
||||
- 'img1': {'x': int, 'y': int, 'w': int, 'h': int}
|
||||
Region of interest for the first image.
|
||||
- 'img2': {'x': int, 'y': int, 'w': int, 'h': int}
|
||||
Region of interest for the second image.
|
||||
|
||||
- 'time' (float): Time taken for the verification process in seconds.
|
||||
"""
|
||||
|
||||
return verification.verify(
|
||||
img1_path=img1_path,
|
||||
img2_path=img2_path,
|
||||
model_name=model_name,
|
||||
detector_backend=detector_backend,
|
||||
distance_metric=distance_metric,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
normalization=normalization,
|
||||
silent=silent,
|
||||
threshold=threshold,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
|
||||
def analyze(
|
||||
img_path: Union[str, np.ndarray],
|
||||
actions: Union[tuple, list] = ("emotion", "age", "gender", "race"),
|
||||
enforce_detection: bool = True,
|
||||
detector_backend: str = "opencv",
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
silent: bool = False,
|
||||
anti_spoofing: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze facial attributes such as age, gender, emotion, and race in the provided image.
|
||||
Args:
|
||||
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
|
||||
or a base64 encoded image. If the source image contains multiple faces, the result will
|
||||
include information for each detected face.
|
||||
|
||||
actions (tuple): Attributes to analyze. The default is ('age', 'gender', 'emotion', 'race').
|
||||
You can exclude some of these attributes from the analysis if needed.
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2' (default is cosine).
|
||||
|
||||
align (boolean): Perform alignment based on the eye positions (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
silent (boolean): Suppress or allow some log messages for a quieter analysis process
|
||||
(default is False).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary represents
|
||||
the analysis results for a detected face. Each dictionary in the list contains the
|
||||
following keys:
|
||||
|
||||
- 'region' (dict): Represents the rectangular region of the detected face in the image.
|
||||
- 'x': x-coordinate of the top-left corner of the face.
|
||||
- 'y': y-coordinate of the top-left corner of the face.
|
||||
- 'w': Width of the detected face region.
|
||||
- 'h': Height of the detected face region.
|
||||
|
||||
- 'age' (float): Estimated age of the detected face.
|
||||
|
||||
- 'face_confidence' (float): Confidence score for the detected face.
|
||||
Indicates the reliability of the face detection.
|
||||
|
||||
- 'dominant_gender' (str): The dominant gender in the detected face.
|
||||
Either "Man" or "Woman".
|
||||
|
||||
- 'gender' (dict): Confidence scores for each gender category.
|
||||
- 'Man': Confidence score for the male gender.
|
||||
- 'Woman': Confidence score for the female gender.
|
||||
|
||||
- 'dominant_emotion' (str): The dominant emotion in the detected face.
|
||||
Possible values include "sad," "angry," "surprise," "fear," "happy,"
|
||||
"disgust," and "neutral"
|
||||
|
||||
- 'emotion' (dict): Confidence scores for each emotion category.
|
||||
- 'sad': Confidence score for sadness.
|
||||
- 'angry': Confidence score for anger.
|
||||
- 'surprise': Confidence score for surprise.
|
||||
- 'fear': Confidence score for fear.
|
||||
- 'happy': Confidence score for happiness.
|
||||
- 'disgust': Confidence score for disgust.
|
||||
- 'neutral': Confidence score for neutrality.
|
||||
|
||||
- 'dominant_race' (str): The dominant race in the detected face.
|
||||
Possible values include "indian," "asian," "latino hispanic,"
|
||||
"black," "middle eastern," and "white."
|
||||
|
||||
- 'race' (dict): Confidence scores for each race category.
|
||||
- 'indian': Confidence score for Indian ethnicity.
|
||||
- 'asian': Confidence score for Asian ethnicity.
|
||||
- 'latino hispanic': Confidence score for Latino/Hispanic ethnicity.
|
||||
- 'black': Confidence score for Black ethnicity.
|
||||
- 'middle eastern': Confidence score for Middle Eastern ethnicity.
|
||||
- 'white': Confidence score for White ethnicity.
|
||||
"""
|
||||
return demography.analyze(
|
||||
img_path=img_path,
|
||||
actions=actions,
|
||||
enforce_detection=enforce_detection,
|
||||
detector_backend=detector_backend,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
silent=silent,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
|
||||
def find(
|
||||
img_path: Union[str, np.ndarray],
|
||||
db_path: str,
|
||||
model_name: str = "VGG-Face",
|
||||
distance_metric: str = "cosine",
|
||||
enforce_detection: bool = True,
|
||||
detector_backend: str = "opencv",
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
threshold: Optional[float] = None,
|
||||
normalization: str = "base",
|
||||
silent: bool = False,
|
||||
refresh_database: bool = True,
|
||||
anti_spoofing: bool = False,
|
||||
) -> List[pd.DataFrame]:
|
||||
"""
|
||||
Identify individuals in a database
|
||||
Args:
|
||||
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
|
||||
or a base64 encoded image. If the source image contains multiple faces, the result will
|
||||
include information for each detected face.
|
||||
|
||||
db_path (string): Path to the folder containing image files. All detected faces
|
||||
in the database will be considered in the decision-making process.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2' (default is cosine).
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
align (boolean): Perform alignment based on the eye positions (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
threshold (float): Specify a threshold to determine whether a pair represents the same
|
||||
person or different individuals. This threshold is used for comparing distances.
|
||||
If left unset, default pre-tuned threshold values will be applied based on the specified
|
||||
model name and distance metric (default is None).
|
||||
|
||||
normalization (string): Normalize the input image before feeding it to the model.
|
||||
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base).
|
||||
|
||||
silent (boolean): Suppress or allow some log messages for a quieter analysis process
|
||||
(default is False).
|
||||
|
||||
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
|
||||
directory/db files, if set to false, it will ignore any file changes inside the db_path
|
||||
(default is True).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
|
||||
to the identity information for an individual detected in the source image.
|
||||
The DataFrame columns include:
|
||||
|
||||
- 'identity': Identity label of the detected individual.
|
||||
|
||||
- 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the
|
||||
target face in the database.
|
||||
|
||||
- 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the
|
||||
detected face in the source image.
|
||||
|
||||
- 'threshold': threshold to determine a pair whether same person or different persons
|
||||
|
||||
- 'distance': Similarity score between the faces based on the
|
||||
specified model and distance metric
|
||||
"""
|
||||
return recognition.find(
|
||||
img_path=img_path,
|
||||
db_path=db_path,
|
||||
model_name=model_name,
|
||||
distance_metric=distance_metric,
|
||||
enforce_detection=enforce_detection,
|
||||
detector_backend=detector_backend,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
threshold=threshold,
|
||||
normalization=normalization,
|
||||
silent=silent,
|
||||
refresh_database=refresh_database,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
|
||||
def represent(
|
||||
img_path: Union[str, np.ndarray],
|
||||
model_name: str = "VGG-Face",
|
||||
enforce_detection: bool = True,
|
||||
detector_backend: str = "opencv",
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
normalization: str = "base",
|
||||
anti_spoofing: bool = False,
|
||||
max_faces: Optional[int] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Represent facial images as multi-dimensional vector embeddings.
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
|
||||
or a base64 encoded image. If the source image contains multiple faces, the result will
|
||||
include information for each detected face.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet
|
||||
(default is VGG-Face.).
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Default is True. Set to False to avoid the exception for low-resolution images
|
||||
(default is True).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
align (boolean): Perform alignment based on the eye positions (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
normalization (string): Normalize the input image before feeding it to the model.
|
||||
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
|
||||
(default is base).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
max_faces (int): Set a limit on the number of faces to be processed (default is None).
|
||||
|
||||
Returns:
|
||||
results (List[Dict[str, Any]]): A list of dictionaries, each containing the
|
||||
following fields:
|
||||
|
||||
- embedding (List[float]): Multidimensional vector representing facial features.
|
||||
The number of dimensions varies based on the reference model
|
||||
(e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions).
|
||||
|
||||
- facial_area (dict): Detected facial area by face detection in dictionary format.
|
||||
Contains 'x' and 'y' as the left-corner point, and 'w' and 'h'
|
||||
as the width and height. If `detector_backend` is set to 'skip', it represents
|
||||
the full image area and is nonsensical.
|
||||
|
||||
- face_confidence (float): Confidence score of face detection. If `detector_backend` is set
|
||||
to 'skip', the confidence will be 0 and is nonsensical.
|
||||
"""
|
||||
return representation.represent(
|
||||
img_path=img_path,
|
||||
model_name=model_name,
|
||||
enforce_detection=enforce_detection,
|
||||
detector_backend=detector_backend,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
normalization=normalization,
|
||||
anti_spoofing=anti_spoofing,
|
||||
max_faces=max_faces,
|
||||
)
|
||||
|
||||
|
||||
def stream(
|
||||
db_path: str = "",
|
||||
model_name: str = "VGG-Face",
|
||||
detector_backend: str = "opencv",
|
||||
distance_metric: str = "cosine",
|
||||
enable_face_analysis: bool = True,
|
||||
source: Any = 0,
|
||||
time_threshold: int = 5,
|
||||
frame_threshold: int = 5,
|
||||
anti_spoofing: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Run real time face recognition and facial attribute analysis
|
||||
|
||||
Args:
|
||||
db_path (string): Path to the folder containing image files. All detected faces
|
||||
in the database will be considered in the decision-making process.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2' (default is cosine).
|
||||
|
||||
enable_face_analysis (bool): Flag to enable face analysis (default is True).
|
||||
|
||||
source (Any): The source for the video stream (default is 0, which represents the
|
||||
default camera).
|
||||
|
||||
time_threshold (int): The time threshold (in seconds) for face recognition (default is 5).
|
||||
|
||||
frame_threshold (int): The frame threshold for face recognition (default is 5).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
time_threshold = max(time_threshold, 1)
|
||||
frame_threshold = max(frame_threshold, 1)
|
||||
|
||||
streaming.analysis(
|
||||
db_path=db_path,
|
||||
model_name=model_name,
|
||||
detector_backend=detector_backend,
|
||||
distance_metric=distance_metric,
|
||||
enable_face_analysis=enable_face_analysis,
|
||||
source=source,
|
||||
time_threshold=time_threshold,
|
||||
frame_threshold=frame_threshold,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
|
||||
def extract_faces(
|
||||
img_path: Union[str, np.ndarray],
|
||||
detector_backend: str = "opencv",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
grayscale: bool = False,
|
||||
color_face: str = "rgb",
|
||||
normalize_face: bool = True,
|
||||
anti_spoofing: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract faces from a given image
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
|
||||
as a string, numpy array (BGR), or base64 encoded images.
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
align (bool): Flag to enable face alignment (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
grayscale (boolean): (Deprecated) Flag to convert the output face image to grayscale
|
||||
(default is False).
|
||||
|
||||
color_face (string): Color to return face image output. Options: 'rgb', 'bgr' or 'gray'
|
||||
(default is 'rgb').
|
||||
|
||||
normalize_face (boolean): Flag to enable normalization (divide by 255) of the output
|
||||
face image output face image normalization (default is True).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
|
||||
|
||||
- "face" (np.ndarray): The detected face as a NumPy array.
|
||||
|
||||
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
||||
- keys 'x', 'y', 'w', 'h' with int values
|
||||
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values. left and right eyes
|
||||
are eyes on the left and right respectively with respect to the person itself
|
||||
instead of observer.
|
||||
|
||||
- "confidence" (float): The confidence score associated with the detected face.
|
||||
|
||||
- "is_real" (boolean): antispoofing analyze result. this key is just available in the
|
||||
result only if anti_spoofing is set to True in input arguments.
|
||||
|
||||
- "antispoof_score" (float): score of antispoofing analyze result. this key is
|
||||
just available in the result only if anti_spoofing is set to True in input arguments.
|
||||
"""
|
||||
|
||||
return detection.extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
grayscale=grayscale,
|
||||
color_face=color_face,
|
||||
normalize_face=normalize_face,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
|
||||
def cli() -> None:
|
||||
"""
|
||||
command line interface function will be offered in this block
|
||||
"""
|
||||
import fire
|
||||
|
||||
fire.Fire()
|
||||
|
||||
|
||||
# deprecated function(s)
|
||||
|
||||
|
||||
def detectFace(
|
||||
img_path: Union[str, np.ndarray],
|
||||
target_size: tuple = (224, 224),
|
||||
detector_backend: str = "opencv",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
) -> Union[np.ndarray, None]:
|
||||
"""
|
||||
Deprecated face detection function. Use extract_faces for same functionality.
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
|
||||
as a string, numpy array (BGR), or base64 encoded images.
|
||||
|
||||
target_size (tuple): final shape of facial image. black pixels will be
|
||||
added to resize the image (default is (224, 224)).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
align (bool): Flag to enable face alignment (default is True).
|
||||
|
||||
Returns:
|
||||
img (np.ndarray): detected (and aligned) facial area image as numpy array
|
||||
"""
|
||||
logger.warn("Function detectFace is deprecated. Use extract_faces instead.")
|
||||
face_objs = extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
grayscale=False,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
)
|
||||
extracted_face = None
|
||||
if len(face_objs) > 0:
|
||||
extracted_face = face_objs[0]["face"]
|
||||
extracted_face = preprocessing.resize_image(img=extracted_face, target_size=target_size)
|
||||
return extracted_face
|
||||
|
|
@ -1 +0,0 @@
|
|||
__version__ = "0.0.94"
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
{
|
||||
"info": {
|
||||
"_postman_id": "4c0b144e-4294-4bdd-8072-bcb326b1fed2",
|
||||
"name": "deepface-api",
|
||||
"schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json"
|
||||
},
|
||||
"item": [
|
||||
{
|
||||
"name": "Represent",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": "{\n \"model_name\": \"Facenet\",\n \"img\": \"/Users/sefik/Desktop/deepface/tests/dataset/img1.jpg\"\n}",
|
||||
"options": {
|
||||
"raw": {
|
||||
"language": "json"
|
||||
}
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"raw": "http://127.0.0.1:5000/represent",
|
||||
"protocol": "http",
|
||||
"host": [
|
||||
"127",
|
||||
"0",
|
||||
"0",
|
||||
"1"
|
||||
],
|
||||
"port": "5000",
|
||||
"path": [
|
||||
"represent"
|
||||
]
|
||||
}
|
||||
},
|
||||
"response": []
|
||||
},
|
||||
{
|
||||
"name": "Face verification",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": " {\n \t\"img1_path\": \"/Users/sefik/Desktop/deepface/tests/dataset/img1.jpg\",\n \"img2_path\": \"/Users/sefik/Desktop/deepface/tests/dataset/img2.jpg\",\n \"model_name\": \"Facenet\",\n \"detector_backend\": \"mtcnn\",\n \"distance_metric\": \"euclidean\"\n }",
|
||||
"options": {
|
||||
"raw": {
|
||||
"language": "json"
|
||||
}
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"raw": "http://127.0.0.1:5000/verify",
|
||||
"protocol": "http",
|
||||
"host": [
|
||||
"127",
|
||||
"0",
|
||||
"0",
|
||||
"1"
|
||||
],
|
||||
"port": "5000",
|
||||
"path": [
|
||||
"verify"
|
||||
]
|
||||
}
|
||||
},
|
||||
"response": []
|
||||
},
|
||||
{
|
||||
"name": "Face analysis",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": "{\n \"img_path\": \"/Users/sefik/Desktop/deepface/tests/dataset/couple.jpg\",\n \"actions\": [\"age\", \"gender\", \"emotion\", \"race\"]\n}",
|
||||
"options": {
|
||||
"raw": {
|
||||
"language": "json"
|
||||
}
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"raw": "http://127.0.0.1:5000/analyze",
|
||||
"protocol": "http",
|
||||
"host": [
|
||||
"127",
|
||||
"0",
|
||||
"0",
|
||||
"1"
|
||||
],
|
||||
"port": "5000",
|
||||
"path": [
|
||||
"analyze"
|
||||
]
|
||||
}
|
||||
},
|
||||
"response": []
|
||||
},
|
||||
{
|
||||
"name": "Face extractor",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"header": [],
|
||||
"body": {
|
||||
"mode": "raw",
|
||||
"raw": "{\n \"img_path\": \"/Users/sefik/Desktop/deepface/tests/dataset/couple.jpg\",\n \n}",
|
||||
"options": {
|
||||
"raw": {
|
||||
"language": "json"
|
||||
}
|
||||
}
|
||||
},
|
||||
"url": {
|
||||
"raw": "http://127.0.0.1:5005/extract_faces",
|
||||
"protocol": "http",
|
||||
"host": [
|
||||
"127",
|
||||
"0",
|
||||
"0",
|
||||
"1"
|
||||
],
|
||||
"port": "5005",
|
||||
"path": [
|
||||
"extract_faces"
|
||||
]
|
||||
}
|
||||
},
|
||||
"response": []
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
import argparse
|
||||
import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
deepface_app = app.create_app()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-p", "--port", type=int,
|
||||
default=5000, help="Port of serving api")
|
||||
args = parser.parse_args()
|
||||
deepface_app.run(host="0.0.0.0", port=args.port)
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
# 3rd parth dependencies
|
||||
from flask import Flask
|
||||
from flask_cors import CORS
|
||||
|
||||
# project dependencies
|
||||
from deepface import DeepFace
|
||||
from deepface.commons.logger import Logger
|
||||
from deepface.api.src.modules.core.routes import blueprint
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
def create_app():
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
app.register_blueprint(blueprint)
|
||||
logger.info(f"Welcome to DeepFace API v{DeepFace.__version__}!")
|
||||
return app
|
||||
|
|
@ -1,122 +0,0 @@
|
|||
from flask import Blueprint, request
|
||||
from deepface import DeepFace
|
||||
from deepface.api.src.modules.core import service
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
blueprint = Blueprint("routes", __name__)
|
||||
|
||||
|
||||
@blueprint.route("/")
|
||||
def home():
|
||||
return f"<h1>Welcome to DeepFace API v{DeepFace.__version__}!</h1>"
|
||||
|
||||
|
||||
@blueprint.route("/represent", methods=["POST"])
|
||||
def represent():
|
||||
input_args = request.get_json()
|
||||
|
||||
if input_args is None:
|
||||
return {"message": "empty input set passed"}
|
||||
|
||||
img_path = input_args.get("img") or input_args.get("img_path")
|
||||
if img_path is None:
|
||||
return {"message": "you must pass img_path input,hhhhh"}
|
||||
|
||||
obj = service.represent(
|
||||
img_path=img_path,
|
||||
model_name=input_args.get("model_name", "VGG-Face"),
|
||||
detector_backend=input_args.get("detector_backend", "opencv"),
|
||||
enforce_detection=input_args.get("enforce_detection", True),
|
||||
align=input_args.get("align", True),
|
||||
anti_spoofing=input_args.get("anti_spoofing", False),
|
||||
max_faces=input_args.get("max_faces"),
|
||||
)
|
||||
|
||||
logger.debug(obj)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
@blueprint.route("/verify", methods=["POST"])
|
||||
def verify():
|
||||
input_args = request.get_json()
|
||||
|
||||
if input_args is None:
|
||||
return {"message": "empty input set passed"}
|
||||
|
||||
img1_path = input_args.get("img1") or input_args.get("img1_path")
|
||||
img2_path = input_args.get("img2") or input_args.get("img2_path")
|
||||
|
||||
if img1_path is None:
|
||||
return {"message": "you must pass img1_path input"}
|
||||
|
||||
if img2_path is None:
|
||||
return {"message": "you must pass img2_path input"}
|
||||
|
||||
verification = service.verify(
|
||||
img1_path=img1_path,
|
||||
img2_path=img2_path,
|
||||
model_name=input_args.get("model_name", "VGG-Face"),
|
||||
detector_backend=input_args.get("detector_backend", "opencv"),
|
||||
distance_metric=input_args.get("distance_metric", "cosine"),
|
||||
align=input_args.get("align", True),
|
||||
enforce_detection=input_args.get("enforce_detection", True),
|
||||
anti_spoofing=input_args.get("anti_spoofing", False),
|
||||
)
|
||||
|
||||
logger.debug(verification)
|
||||
|
||||
return verification
|
||||
|
||||
|
||||
@blueprint.route("/analyze", methods=["POST"])
|
||||
def analyze():
|
||||
input_args = request.get_json()
|
||||
|
||||
if input_args is None:
|
||||
return {"message": "empty input set passed"}
|
||||
|
||||
img_path = input_args.get("img") or input_args.get("img_path")
|
||||
if img_path is None:
|
||||
return {"message": "you must pass img_path input"}
|
||||
|
||||
demographies = service.analyze(
|
||||
img_path=img_path,
|
||||
actions=input_args.get(
|
||||
"actions", ["age", "gender", "emotion", "race"]),
|
||||
detector_backend=input_args.get("detector_backend", "opencv"),
|
||||
enforce_detection=input_args.get("enforce_detection", True),
|
||||
align=input_args.get("align", True),
|
||||
anti_spoofing=input_args.get("anti_spoofing", False),
|
||||
)
|
||||
|
||||
logger.debug(demographies)
|
||||
|
||||
return demographies
|
||||
|
||||
|
||||
@blueprint.route("/extract", methods=["POST"])
|
||||
def extract():
|
||||
input_args = request.get_json()
|
||||
|
||||
if input_args is None:
|
||||
return {"message": "empty input set passed"}
|
||||
|
||||
img_path = input_args.get("img") or input_args.get("img_path")
|
||||
if img_path is None:
|
||||
return {"message": "you must pass img_path input"}
|
||||
print('represent:', img_path)
|
||||
|
||||
demographies = service.extract(
|
||||
img_path=img_path,
|
||||
detector_backend=input_args.get("detector_backend", "yolov8"),
|
||||
enforce_detection=input_args.get("enforce_detection", False),
|
||||
align=input_args.get("align", True),
|
||||
anti_spoofing=input_args.get("anti_spoofing", False),
|
||||
)
|
||||
|
||||
logger.debug(demographies)
|
||||
|
||||
return demographies
|
||||
|
|
@ -1,114 +0,0 @@
|
|||
# built-in dependencies
|
||||
import traceback
|
||||
from typing import Optional
|
||||
|
||||
# project dependencies
|
||||
from deepface import DeepFace
|
||||
|
||||
# pylint: disable=broad-except
|
||||
|
||||
|
||||
def represent(
|
||||
img_path: str,
|
||||
model_name: str,
|
||||
detector_backend: str,
|
||||
enforce_detection: bool,
|
||||
align: bool,
|
||||
anti_spoofing: bool,
|
||||
max_faces: Optional[int] = None,
|
||||
):
|
||||
try:
|
||||
result = {}
|
||||
embedding_objs = DeepFace.represent(
|
||||
img_path=img_path,
|
||||
model_name=model_name,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
anti_spoofing=anti_spoofing,
|
||||
max_faces=max_faces,
|
||||
)
|
||||
result["results"] = embedding_objs
|
||||
return result
|
||||
except Exception as err:
|
||||
tb_str = traceback.format_exc()
|
||||
return {"error": f"Exception while representing: {str(err)} - {tb_str}"}, 400
|
||||
|
||||
|
||||
def verify(
|
||||
img1_path: str,
|
||||
img2_path: str,
|
||||
model_name: str,
|
||||
detector_backend: str,
|
||||
distance_metric: str,
|
||||
enforce_detection: bool,
|
||||
align: bool,
|
||||
anti_spoofing: bool,
|
||||
):
|
||||
try:
|
||||
obj = DeepFace.verify(
|
||||
img1_path=img1_path,
|
||||
img2_path=img2_path,
|
||||
model_name=model_name,
|
||||
detector_backend=detector_backend,
|
||||
distance_metric=distance_metric,
|
||||
align=align,
|
||||
enforce_detection=enforce_detection,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
return obj
|
||||
except Exception as err:
|
||||
tb_str = traceback.format_exc()
|
||||
return {"error": f"Exception while verifying: {str(err)} - {tb_str}"}, 400
|
||||
|
||||
|
||||
def analyze(
|
||||
img_path: str,
|
||||
actions: list,
|
||||
detector_backend: str,
|
||||
enforce_detection: bool,
|
||||
align: bool,
|
||||
anti_spoofing: bool,
|
||||
):
|
||||
try:
|
||||
result = {}
|
||||
demographies = DeepFace.analyze(
|
||||
img_path=img_path,
|
||||
actions=actions,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
silent=True,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
result["results"] = demographies
|
||||
return result
|
||||
except Exception as err:
|
||||
tb_str = traceback.format_exc()
|
||||
return {"error": f"Exception while analyzing: {str(err)} - {tb_str}"}, 400
|
||||
|
||||
|
||||
def extract(
|
||||
img_path: str,
|
||||
detector_backend: str,
|
||||
enforce_detection: bool,
|
||||
align: bool,
|
||||
anti_spoofing: bool,
|
||||
):
|
||||
try:
|
||||
result = {}
|
||||
demographies = DeepFace.extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=0,
|
||||
grayscale=False,
|
||||
normalize_face=True,
|
||||
anti_spoofing=anti_spoofing
|
||||
)
|
||||
result["results"] = demographies
|
||||
return result
|
||||
except Exception as err:
|
||||
tb_str = traceback.format_exc()
|
||||
return {"error": f"Exception while detecting: {str(err)} - {tb_str}"}, 400
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
import os
|
||||
|
||||
SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
ROOT_DIR = os.path.dirname(SRC_DIR)
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
import os
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
def initialize_folder() -> None:
|
||||
"""
|
||||
Initialize the folder for storing model weights.
|
||||
|
||||
Raises:
|
||||
OSError: if the folder cannot be created.
|
||||
"""
|
||||
home = get_deepface_home()
|
||||
deepface_home_path = os.path.join(home, ".deepface")
|
||||
weights_path = os.path.join(deepface_home_path, "weights")
|
||||
|
||||
if not os.path.exists(deepface_home_path):
|
||||
os.makedirs(deepface_home_path, exist_ok=True)
|
||||
logger.info(f"Directory {deepface_home_path} has been created")
|
||||
|
||||
if not os.path.exists(weights_path):
|
||||
os.makedirs(weights_path, exist_ok=True)
|
||||
logger.info(f"Directory {weights_path} has been created")
|
||||
|
||||
|
||||
def get_deepface_home() -> str:
|
||||
"""
|
||||
Get the home directory for storing model weights
|
||||
|
||||
Returns:
|
||||
str: the home directory.
|
||||
"""
|
||||
return str(os.getenv("DEEPFACE_HOME", default=os.path.expanduser("~")))
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
# built-in dependencies
|
||||
import os
|
||||
import io
|
||||
from typing import List, Union, Tuple
|
||||
import hashlib
|
||||
import base64
|
||||
from pathlib import Path
|
||||
|
||||
# 3rd party dependencies
|
||||
import requests
|
||||
import numpy as np
|
||||
import cv2
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def list_images(path: str) -> List[str]:
|
||||
"""
|
||||
List images in a given path
|
||||
Args:
|
||||
path (str): path's location
|
||||
Returns:
|
||||
images (list): list of exact image paths
|
||||
"""
|
||||
images = []
|
||||
for r, _, f in os.walk(path):
|
||||
for file in f:
|
||||
exact_path = os.path.join(r, file)
|
||||
|
||||
ext_lower = os.path.splitext(exact_path)[-1].lower()
|
||||
|
||||
if ext_lower not in {".jpg", ".jpeg", ".png"}:
|
||||
continue
|
||||
|
||||
with Image.open(exact_path) as img: # lazy
|
||||
if img.format.lower() in {"jpeg", "png"}:
|
||||
images.append(exact_path)
|
||||
return images
|
||||
|
||||
|
||||
def find_image_hash(file_path: str) -> str:
|
||||
"""
|
||||
Find the hash of given image file with its properties
|
||||
finding the hash of image content is costly operation
|
||||
Args:
|
||||
file_path (str): exact image path
|
||||
Returns:
|
||||
hash (str): digest with sha1 algorithm
|
||||
"""
|
||||
file_stats = os.stat(file_path)
|
||||
|
||||
# some properties
|
||||
file_size = file_stats.st_size
|
||||
creation_time = file_stats.st_ctime
|
||||
modification_time = file_stats.st_mtime
|
||||
|
||||
properties = f"{file_size}-{creation_time}-{modification_time}"
|
||||
|
||||
hasher = hashlib.sha1()
|
||||
hasher.update(properties.encode("utf-8"))
|
||||
return hasher.hexdigest()
|
||||
|
||||
|
||||
def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]:
|
||||
"""
|
||||
Load image from path, url, base64 or numpy array.
|
||||
Args:
|
||||
img: a path, url, base64 or numpy array.
|
||||
Returns:
|
||||
image (numpy array): the loaded image in BGR format
|
||||
image name (str): image name itself
|
||||
"""
|
||||
|
||||
# The image is already a numpy array
|
||||
if isinstance(img, np.ndarray):
|
||||
return img, "numpy array"
|
||||
|
||||
if isinstance(img, Path):
|
||||
img = str(img)
|
||||
|
||||
if not isinstance(img, str):
|
||||
raise ValueError(f"img must be numpy array or str but it is {type(img)}")
|
||||
|
||||
# The image is a base64 string
|
||||
if img.startswith("data:image/"):
|
||||
return load_image_from_base64(img), "base64 encoded string"
|
||||
|
||||
# The image is a url
|
||||
if img.lower().startswith(("http://", "https://")):
|
||||
return load_image_from_web(url=img), img
|
||||
|
||||
# The image is a path
|
||||
if not os.path.isfile(img):
|
||||
raise ValueError(f"Confirm that {img} exists")
|
||||
|
||||
# image must be a file on the system then
|
||||
|
||||
# image name must have english characters
|
||||
if not img.isascii():
|
||||
raise ValueError(f"Input image must not have non-english characters - {img}")
|
||||
|
||||
img_obj_bgr = cv2.imread(img)
|
||||
# img_obj_rgb = cv2.cvtColor(img_obj_bgr, cv2.COLOR_BGR2RGB)
|
||||
return img_obj_bgr, img
|
||||
|
||||
|
||||
def load_image_from_base64(uri: str) -> np.ndarray:
|
||||
"""
|
||||
Load image from base64 string.
|
||||
Args:
|
||||
uri: a base64 string.
|
||||
Returns:
|
||||
numpy array: the loaded image.
|
||||
"""
|
||||
|
||||
encoded_data_parts = uri.split(",")
|
||||
|
||||
if len(encoded_data_parts) < 2:
|
||||
raise ValueError("format error in base64 encoded string")
|
||||
|
||||
encoded_data = encoded_data_parts[1]
|
||||
decoded_bytes = base64.b64decode(encoded_data)
|
||||
|
||||
# similar to find functionality, we are just considering these extensions
|
||||
# content type is safer option than file extension
|
||||
with Image.open(io.BytesIO(decoded_bytes)) as img:
|
||||
file_type = img.format.lower()
|
||||
if file_type not in {"jpeg", "png"}:
|
||||
raise ValueError(f"Input image can be jpg or png, but it is {file_type}")
|
||||
|
||||
nparr = np.fromstring(decoded_bytes, np.uint8)
|
||||
img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
|
||||
# img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
|
||||
return img_bgr
|
||||
|
||||
|
||||
def load_image_from_web(url: str) -> np.ndarray:
|
||||
"""
|
||||
Loading an image from web
|
||||
Args:
|
||||
url: link for the image
|
||||
Returns:
|
||||
img (np.ndarray): equivalent to pre-loaded image from opencv (BGR format)
|
||||
"""
|
||||
response = requests.get(url, stream=True, timeout=60)
|
||||
response.raise_for_status()
|
||||
image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8)
|
||||
img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
|
||||
return img
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
# pylint: disable=broad-except
|
||||
class Logger:
|
||||
"""
|
||||
A Logger class for logging messages with a specific log level.
|
||||
|
||||
The class follows the singleton design pattern, ensuring that only one
|
||||
instance of the Logger is created. The parameters of the first instance
|
||||
are preserved across all instances.
|
||||
"""
|
||||
|
||||
__instance = None
|
||||
|
||||
def __new__(cls):
|
||||
if cls.__instance is None:
|
||||
cls.__instance = super(Logger, cls).__new__(cls)
|
||||
return cls.__instance
|
||||
|
||||
def __init__(self):
|
||||
if not hasattr(self, "_singleton_initialized"):
|
||||
self._singleton_initialized = True # to prevent multiple initializations
|
||||
log_level = os.environ.get("DEEPFACE_LOG_LEVEL", str(logging.INFO))
|
||||
try:
|
||||
self.log_level = int(log_level)
|
||||
except Exception as err:
|
||||
self.dump_log(
|
||||
f"Exception while parsing $DEEPFACE_LOG_LEVEL."
|
||||
f"Expected int but it is {log_level} ({str(err)})."
|
||||
"Setting app log level to info."
|
||||
)
|
||||
self.log_level = logging.INFO
|
||||
|
||||
def info(self, message):
|
||||
if self.log_level <= logging.INFO:
|
||||
self.dump_log(f"{message}")
|
||||
|
||||
def debug(self, message):
|
||||
if self.log_level <= logging.DEBUG:
|
||||
self.dump_log(f"🕷️ {message}")
|
||||
|
||||
def warn(self, message):
|
||||
if self.log_level <= logging.WARNING:
|
||||
self.dump_log(f"⚠️ {message}")
|
||||
|
||||
def error(self, message):
|
||||
if self.log_level <= logging.ERROR:
|
||||
self.dump_log(f"🔴 {message}")
|
||||
|
||||
def critical(self, message):
|
||||
if self.log_level <= logging.CRITICAL:
|
||||
self.dump_log(f"💥 {message}")
|
||||
|
||||
def dump_log(self, message):
|
||||
print(f"{str(datetime.now())[2:-7]} - {message}")
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
# built-in dependencies
|
||||
import hashlib
|
||||
|
||||
# 3rd party dependencies
|
||||
import tensorflow as tf
|
||||
|
||||
# package dependencies
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
def get_tf_major_version() -> int:
|
||||
"""
|
||||
Find tensorflow's major version
|
||||
Returns
|
||||
major_version (int)
|
||||
"""
|
||||
return int(tf.__version__.split(".", maxsplit=1)[0])
|
||||
|
||||
|
||||
def get_tf_minor_version() -> int:
|
||||
"""
|
||||
Find tensorflow's minor version
|
||||
Returns
|
||||
minor_version (int)
|
||||
"""
|
||||
return int(tf.__version__.split(".", maxsplit=-1)[1])
|
||||
|
||||
|
||||
def validate_for_keras3():
|
||||
tf_major = get_tf_major_version()
|
||||
tf_minor = get_tf_minor_version()
|
||||
|
||||
# tf_keras is a must dependency after tf 2.16
|
||||
if tf_major == 1 or (tf_major == 2 and tf_minor < 16):
|
||||
return
|
||||
|
||||
try:
|
||||
import tf_keras
|
||||
|
||||
logger.debug(f"tf_keras is already available - {tf_keras.__version__}")
|
||||
except ImportError as err:
|
||||
# you may consider to install that package here
|
||||
raise ValueError(
|
||||
f"You have tensorflow {tf.__version__} and this requires "
|
||||
"tf-keras package. Please run `pip install tf-keras` "
|
||||
"or downgrade your tensorflow."
|
||||
) from err
|
||||
|
||||
|
||||
def find_file_hash(file_path: str, hash_algorithm: str = "sha256") -> str:
|
||||
"""
|
||||
Find the hash of a given file with its content
|
||||
Args:
|
||||
file_path (str): exact path of a given file
|
||||
hash_algorithm (str): hash algorithm
|
||||
Returns:
|
||||
hash (str)
|
||||
"""
|
||||
hash_func = hashlib.new(hash_algorithm)
|
||||
with open(file_path, "rb") as f:
|
||||
while chunk := f.read(8192):
|
||||
hash_func.update(chunk)
|
||||
return hash_func.hexdigest()
|
||||
|
|
@ -1,97 +0,0 @@
|
|||
# built-in dependencies
|
||||
import os
|
||||
from typing import Optional
|
||||
import zipfile
|
||||
import bz2
|
||||
|
||||
# 3rd party dependencies
|
||||
import gdown
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import folder_utils, package_utils
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 1:
|
||||
from keras.models import Sequential
|
||||
else:
|
||||
from tensorflow.keras.models import Sequential
|
||||
|
||||
logger = Logger()
|
||||
|
||||
ALLOWED_COMPRESS_TYPES = ["zip", "bz2"]
|
||||
|
||||
|
||||
def download_weights_if_necessary(
|
||||
file_name: str, source_url: str, compress_type: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Download the weights of a pre-trained model from external source if not downloaded yet.
|
||||
Args:
|
||||
file_name (str): target file name with extension
|
||||
source_url (url): source url to be downloaded
|
||||
compress_type (optional str): compress type e.g. zip or bz2
|
||||
Returns
|
||||
target_file (str): exact path for the target file
|
||||
"""
|
||||
home = folder_utils.get_deepface_home()
|
||||
|
||||
target_file = os.path.join(home, ".deepface/weights", file_name)
|
||||
|
||||
if os.path.isfile(target_file):
|
||||
logger.debug(f"{file_name} is already available at {target_file}")
|
||||
return target_file
|
||||
|
||||
if compress_type is not None and compress_type not in ALLOWED_COMPRESS_TYPES:
|
||||
raise ValueError(f"unimplemented compress type - {compress_type}")
|
||||
|
||||
try:
|
||||
logger.info(f"🔗 {file_name} will be downloaded from {source_url} to {target_file}...")
|
||||
|
||||
if compress_type is None:
|
||||
gdown.download(source_url, target_file, quiet=False)
|
||||
elif compress_type is not None and compress_type in ALLOWED_COMPRESS_TYPES:
|
||||
gdown.download(source_url, f"{target_file}.{compress_type}", quiet=False)
|
||||
|
||||
except Exception as err:
|
||||
raise ValueError(
|
||||
f"⛓️💥 An exception occurred while downloading {file_name} from {source_url}. "
|
||||
f"Consider downloading it manually to {target_file}."
|
||||
) from err
|
||||
|
||||
# uncompress downloaded file
|
||||
if compress_type == "zip":
|
||||
with zipfile.ZipFile(f"{target_file}.zip", "r") as zip_ref:
|
||||
zip_ref.extractall(os.path.join(home, ".deepface/weights"))
|
||||
logger.info(f"{target_file}.zip unzipped")
|
||||
elif compress_type == "bz2":
|
||||
bz2file = bz2.BZ2File(f"{target_file}.bz2")
|
||||
data = bz2file.read()
|
||||
with open(target_file, "wb") as f:
|
||||
f.write(data)
|
||||
logger.info(f"{target_file}.bz2 unzipped")
|
||||
|
||||
return target_file
|
||||
|
||||
|
||||
def load_model_weights(model: Sequential, weight_file: str) -> Sequential:
|
||||
"""
|
||||
Load pre-trained weights for a given model
|
||||
Args:
|
||||
model (keras.models.Sequential): pre-built model
|
||||
weight_file (str): exact path of pre-trained weights
|
||||
Returns:
|
||||
model (keras.models.Sequential): pre-built model with
|
||||
updated weights
|
||||
"""
|
||||
try:
|
||||
model.load_weights(weight_file)
|
||||
except Exception as err:
|
||||
raise ValueError(
|
||||
f"An exception occurred while loading the pre-trained weights from {weight_file}."
|
||||
"This might have happened due to an interruption during the download."
|
||||
"You may want to delete it and allow DeepFace to download it again during the next run."
|
||||
"If the issue persists, consider downloading the file directly from the source "
|
||||
"and copying it to the target folder."
|
||||
) from err
|
||||
return model
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
from typing import Union
|
||||
from abc import ABC, abstractmethod
|
||||
import numpy as np
|
||||
from deepface.commons import package_utils
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 1:
|
||||
from keras.models import Model
|
||||
else:
|
||||
from tensorflow.keras.models import Model
|
||||
|
||||
# Notice that all facial attribute analysis models must be inherited from this class
|
||||
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class Demography(ABC):
|
||||
model: Model
|
||||
model_name: str
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, img: np.ndarray) -> Union[np.ndarray, np.float64]:
|
||||
pass
|
||||
|
|
@ -1,69 +0,0 @@
|
|||
from typing import List, Tuple, Optional
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
import numpy as np
|
||||
|
||||
# Notice that all facial detector models must be inherited from this class
|
||||
|
||||
|
||||
# pylint: disable=unnecessary-pass, too-few-public-methods
|
||||
class Detector(ABC):
|
||||
@abstractmethod
|
||||
def detect_faces(self, img: np.ndarray) -> List["FacialAreaRegion"]:
|
||||
"""
|
||||
Interface for detect and align face
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
where each object contains:
|
||||
|
||||
- facial_area (FacialAreaRegion): The facial area region represented
|
||||
as x, y, w, h, left_eye and right_eye. left eye and right eye are
|
||||
eyes on the left and right respectively with respect to the person
|
||||
instead of observer.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FacialAreaRegion:
|
||||
"""
|
||||
Initialize a Face object.
|
||||
|
||||
Args:
|
||||
x (int): The x-coordinate of the top-left corner of the bounding box.
|
||||
y (int): The y-coordinate of the top-left corner of the bounding box.
|
||||
w (int): The width of the bounding box.
|
||||
h (int): The height of the bounding box.
|
||||
left_eye (tuple): The coordinates (x, y) of the left eye with respect to
|
||||
the person instead of observer. Default is None.
|
||||
right_eye (tuple): The coordinates (x, y) of the right eye with respect to
|
||||
the person instead of observer. Default is None.
|
||||
confidence (float, optional): Confidence score associated with the face detection.
|
||||
Default is None.
|
||||
"""
|
||||
x: int
|
||||
y: int
|
||||
w: int
|
||||
h: int
|
||||
left_eye: Optional[Tuple[int, int]] = None
|
||||
right_eye: Optional[Tuple[int, int]] = None
|
||||
confidence: Optional[float] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectedFace:
|
||||
"""
|
||||
Initialize detected face object.
|
||||
|
||||
Args:
|
||||
img (np.ndarray): detected face image as numpy array
|
||||
facial_area (FacialAreaRegion): detected face's metadata (e.g. bounding box)
|
||||
confidence (float): confidence score for face detection
|
||||
"""
|
||||
img: np.ndarray
|
||||
facial_area: FacialAreaRegion
|
||||
confidence: float
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
from abc import ABC
|
||||
from typing import Any, Union, List, Tuple
|
||||
import numpy as np
|
||||
from deepface.commons import package_utils
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 2:
|
||||
from tensorflow.keras.models import Model
|
||||
else:
|
||||
from keras.models import Model
|
||||
|
||||
# Notice that all facial recognition models must be inherited from this class
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class FacialRecognition(ABC):
|
||||
model: Union[Model, Any]
|
||||
model_name: str
|
||||
input_shape: Tuple[int, int]
|
||||
output_shape: int
|
||||
|
||||
def forward(self, img: np.ndarray) -> List[float]:
|
||||
if not isinstance(self.model, Model):
|
||||
raise ValueError(
|
||||
"You must overwrite forward method if it is not a keras model,"
|
||||
f"but {self.model_name} not overwritten!"
|
||||
)
|
||||
# model.predict causes memory issue when it is called in a for loop
|
||||
# embedding = model.predict(img, verbose=0)[0].tolist()
|
||||
return self.model(img, training=False).numpy()[0].tolist()
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.facial_recognition import VGGFace
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.Demography import Demography
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# ----------------------------------------
|
||||
# dependency configurations
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
|
||||
if tf_version == 1:
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Convolution2D, Flatten, Activation
|
||||
else:
|
||||
from tensorflow.keras.models import Model, Sequential
|
||||
from tensorflow.keras.layers import Convolution2D, Flatten, Activation
|
||||
|
||||
# ----------------------------------------
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class ApparentAgeClient(Demography):
|
||||
"""
|
||||
Age model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "Age"
|
||||
|
||||
def predict(self, img: np.ndarray) -> np.float64:
|
||||
# model.predict causes memory issue when it is called in a for loop
|
||||
# age_predictions = self.model.predict(img, verbose=0)[0, :]
|
||||
age_predictions = self.model(img, training=False).numpy()[0, :]
|
||||
return find_apparent_age(age_predictions)
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/age_model_weights.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Construct age model, download its weights and load
|
||||
Returns:
|
||||
model (Model)
|
||||
"""
|
||||
|
||||
model = VGGFace.base_model()
|
||||
|
||||
# --------------------------
|
||||
|
||||
classes = 101
|
||||
base_model_output = Sequential()
|
||||
base_model_output = Convolution2D(classes, (1, 1), name="predictions")(model.layers[-4].output)
|
||||
base_model_output = Flatten()(base_model_output)
|
||||
base_model_output = Activation("softmax")(base_model_output)
|
||||
|
||||
# --------------------------
|
||||
|
||||
age_model = Model(inputs=model.input, outputs=base_model_output)
|
||||
|
||||
# --------------------------
|
||||
|
||||
# load weights
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="age_model_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
age_model = weight_utils.load_model_weights(
|
||||
model=age_model, weight_file=weight_file
|
||||
)
|
||||
|
||||
return age_model
|
||||
|
||||
def find_apparent_age(age_predictions: np.ndarray) -> np.float64:
|
||||
"""
|
||||
Find apparent age prediction from a given probas of ages
|
||||
Args:
|
||||
age_predictions (?)
|
||||
Returns:
|
||||
apparent_age (float)
|
||||
"""
|
||||
output_indexes = np.arange(0, 101)
|
||||
apparent_age = np.sum(age_predictions * output_indexes)
|
||||
return apparent_age
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.Demography import Demography
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# -------------------------------------------
|
||||
# pylint: disable=line-too-long
|
||||
# -------------------------------------------
|
||||
# dependency configuration
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
|
||||
if tf_version == 1:
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout
|
||||
else:
|
||||
from tensorflow.keras.models import Sequential
|
||||
from tensorflow.keras.layers import (
|
||||
Conv2D,
|
||||
MaxPooling2D,
|
||||
AveragePooling2D,
|
||||
Flatten,
|
||||
Dense,
|
||||
Dropout,
|
||||
)
|
||||
# -------------------------------------------
|
||||
|
||||
# Labels for the emotions that can be detected by the model.
|
||||
labels = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class EmotionClient(Demography):
|
||||
"""
|
||||
Emotion model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "Emotion"
|
||||
|
||||
def predict(self, img: np.ndarray) -> np.ndarray:
|
||||
img_gray = cv2.cvtColor(img[0], cv2.COLOR_BGR2GRAY)
|
||||
img_gray = cv2.resize(img_gray, (48, 48))
|
||||
img_gray = np.expand_dims(img_gray, axis=0)
|
||||
|
||||
# model.predict causes memory issue when it is called in a for loop
|
||||
# emotion_predictions = self.model.predict(img_gray, verbose=0)[0, :]
|
||||
emotion_predictions = self.model(img_gray, training=False).numpy()[0, :]
|
||||
|
||||
return emotion_predictions
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/facial_expression_model_weights.h5",
|
||||
) -> Sequential:
|
||||
"""
|
||||
Consruct emotion model, download and load weights
|
||||
"""
|
||||
|
||||
num_classes = 7
|
||||
|
||||
model = Sequential()
|
||||
|
||||
# 1st convolution layer
|
||||
model.add(Conv2D(64, (5, 5), activation="relu", input_shape=(48, 48, 1)))
|
||||
model.add(MaxPooling2D(pool_size=(5, 5), strides=(2, 2)))
|
||||
|
||||
# 2nd convolution layer
|
||||
model.add(Conv2D(64, (3, 3), activation="relu"))
|
||||
model.add(Conv2D(64, (3, 3), activation="relu"))
|
||||
model.add(AveragePooling2D(pool_size=(3, 3), strides=(2, 2)))
|
||||
|
||||
# 3rd convolution layer
|
||||
model.add(Conv2D(128, (3, 3), activation="relu"))
|
||||
model.add(Conv2D(128, (3, 3), activation="relu"))
|
||||
model.add(AveragePooling2D(pool_size=(3, 3), strides=(2, 2)))
|
||||
|
||||
model.add(Flatten())
|
||||
|
||||
# fully connected neural networks
|
||||
model.add(Dense(1024, activation="relu"))
|
||||
model.add(Dropout(0.2))
|
||||
model.add(Dense(1024, activation="relu"))
|
||||
model.add(Dropout(0.2))
|
||||
|
||||
model.add(Dense(num_classes, activation="softmax"))
|
||||
|
||||
# ----------------------------
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="facial_expression_model_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
model = weight_utils.load_model_weights(
|
||||
model=model, weight_file=weight_file
|
||||
)
|
||||
|
||||
return model
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.facial_recognition import VGGFace
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.Demography import Demography
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# -------------------------------------
|
||||
# pylint: disable=line-too-long
|
||||
# -------------------------------------
|
||||
# dependency configurations
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 1:
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Convolution2D, Flatten, Activation
|
||||
else:
|
||||
from tensorflow.keras.models import Model, Sequential
|
||||
from tensorflow.keras.layers import Convolution2D, Flatten, Activation
|
||||
# -------------------------------------
|
||||
|
||||
# Labels for the genders that can be detected by the model.
|
||||
labels = ["Woman", "Man"]
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class GenderClient(Demography):
|
||||
"""
|
||||
Gender model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "Gender"
|
||||
|
||||
def predict(self, img: np.ndarray) -> np.ndarray:
|
||||
# model.predict causes memory issue when it is called in a for loop
|
||||
# return self.model.predict(img, verbose=0)[0, :]
|
||||
return self.model(img, training=False).numpy()[0, :]
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/gender_model_weights.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Construct gender model, download its weights and load
|
||||
Returns:
|
||||
model (Model)
|
||||
"""
|
||||
|
||||
model = VGGFace.base_model()
|
||||
|
||||
# --------------------------
|
||||
|
||||
classes = 2
|
||||
base_model_output = Sequential()
|
||||
base_model_output = Convolution2D(classes, (1, 1), name="predictions")(model.layers[-4].output)
|
||||
base_model_output = Flatten()(base_model_output)
|
||||
base_model_output = Activation("softmax")(base_model_output)
|
||||
|
||||
# --------------------------
|
||||
|
||||
gender_model = Model(inputs=model.input, outputs=base_model_output)
|
||||
|
||||
# --------------------------
|
||||
|
||||
# load weights
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="gender_model_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
gender_model = weight_utils.load_model_weights(
|
||||
model=gender_model, weight_file=weight_file
|
||||
)
|
||||
|
||||
return gender_model
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.facial_recognition import VGGFace
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.Demography import Demography
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# --------------------------
|
||||
# pylint: disable=line-too-long
|
||||
# --------------------------
|
||||
# dependency configurations
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
|
||||
if tf_version == 1:
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import Convolution2D, Flatten, Activation
|
||||
else:
|
||||
from tensorflow.keras.models import Model, Sequential
|
||||
from tensorflow.keras.layers import Convolution2D, Flatten, Activation
|
||||
# --------------------------
|
||||
# Labels for the ethnic phenotypes that can be detected by the model.
|
||||
labels = ["asian", "indian", "black", "white", "middle eastern", "latino hispanic"]
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class RaceClient(Demography):
|
||||
"""
|
||||
Race model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "Race"
|
||||
|
||||
def predict(self, img: np.ndarray) -> np.ndarray:
|
||||
# model.predict causes memory issue when it is called in a for loop
|
||||
# return self.model.predict(img, verbose=0)[0, :]
|
||||
return self.model(img, training=False).numpy()[0, :]
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/race_model_single_batch.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Construct race model, download its weights and load
|
||||
"""
|
||||
|
||||
model = VGGFace.base_model()
|
||||
|
||||
# --------------------------
|
||||
|
||||
classes = 6
|
||||
base_model_output = Sequential()
|
||||
base_model_output = Convolution2D(classes, (1, 1), name="predictions")(model.layers[-4].output)
|
||||
base_model_output = Flatten()(base_model_output)
|
||||
base_model_output = Activation("softmax")(base_model_output)
|
||||
|
||||
# --------------------------
|
||||
|
||||
race_model = Model(inputs=model.input, outputs=base_model_output)
|
||||
|
||||
# --------------------------
|
||||
|
||||
# load weights
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="race_model_single_batch.h5", source_url=url
|
||||
)
|
||||
|
||||
race_model = weight_utils.load_model_weights(
|
||||
model=race_model, weight_file=weight_file
|
||||
)
|
||||
|
||||
return race_model
|
||||
|
|
@ -1,208 +0,0 @@
|
|||
# built-in dependencies
|
||||
import os
|
||||
from typing import List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=c-extension-no-member
|
||||
|
||||
WEIGHTS_URL = "https://github.com/Star-Clouds/CenterFace/raw/master/models/onnx/centerface.onnx"
|
||||
|
||||
|
||||
class CenterFaceClient(Detector):
|
||||
def __init__(self):
|
||||
# BUG: model must be flushed for each call
|
||||
# self.model = self.build_model()
|
||||
pass
|
||||
|
||||
def build_model(self):
|
||||
"""
|
||||
Download pre-trained weights of CenterFace model if necessary and load built model
|
||||
"""
|
||||
weights_path = weight_utils.download_weights_if_necessary(
|
||||
file_name="centerface.onnx", source_url=WEIGHTS_URL
|
||||
)
|
||||
|
||||
return CenterFace(weight_path=weights_path)
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List["FacialAreaRegion"]:
|
||||
"""
|
||||
Detect and align face with CenterFace
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
threshold = float(os.getenv("CENTERFACE_THRESHOLD", "0.80"))
|
||||
|
||||
# BUG: model causes problematic results from 2nd call if it is not flushed
|
||||
# detections, landmarks = self.model.forward(
|
||||
# img, img.shape[0], img.shape[1], threshold=threshold
|
||||
# )
|
||||
detections, landmarks = self.build_model().forward(
|
||||
img, img.shape[0], img.shape[1], threshold=threshold
|
||||
)
|
||||
|
||||
for i, detection in enumerate(detections):
|
||||
boxes, confidence = detection[:4], detection[4]
|
||||
|
||||
x = boxes[0]
|
||||
y = boxes[1]
|
||||
w = boxes[2] - x
|
||||
h = boxes[3] - y
|
||||
|
||||
landmark = landmarks[i]
|
||||
|
||||
right_eye = (int(landmark[0]), int(landmark[1]))
|
||||
left_eye = (int(landmark[2]), int(landmark[3]))
|
||||
# nose = (int(landmark[4]), int(landmark [5]))
|
||||
# mouth_right = (int(landmark[6]), int(landmark [7]))
|
||||
# mouth_left = (int(landmark[8]), int(landmark [9]))
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=int(x),
|
||||
y=int(y),
|
||||
w=int(w),
|
||||
h=int(h),
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=min(max(0, float(confidence)), 1.0),
|
||||
)
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
||||
|
||||
class CenterFace:
|
||||
"""
|
||||
This class is heavily inspired from
|
||||
github.com/Star-Clouds/CenterFace/blob/master/prj-python/centerface.py
|
||||
"""
|
||||
|
||||
def __init__(self, weight_path: str):
|
||||
self.net = cv2.dnn.readNetFromONNX(weight_path)
|
||||
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0
|
||||
|
||||
def forward(self, img, height, width, threshold=0.5):
|
||||
self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
|
||||
return self.inference_opencv(img, threshold)
|
||||
|
||||
def inference_opencv(self, img, threshold):
|
||||
blob = cv2.dnn.blobFromImage(
|
||||
img,
|
||||
scalefactor=1.0,
|
||||
size=(self.img_w_new, self.img_h_new),
|
||||
mean=(0, 0, 0),
|
||||
swapRB=True,
|
||||
crop=False,
|
||||
)
|
||||
self.net.setInput(blob)
|
||||
heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", "540"])
|
||||
return self.postprocess(heatmap, lms, offset, scale, threshold)
|
||||
|
||||
def transform(self, h, w):
|
||||
img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
|
||||
scale_h, scale_w = img_h_new / h, img_w_new / w
|
||||
return img_h_new, img_w_new, scale_h, scale_w
|
||||
|
||||
def postprocess(self, heatmap, lms, offset, scale, threshold):
|
||||
dets, lms = self.decode(
|
||||
heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold
|
||||
)
|
||||
if len(dets) > 0:
|
||||
dets[:, 0:4:2], dets[:, 1:4:2] = (
|
||||
dets[:, 0:4:2] / self.scale_w,
|
||||
dets[:, 1:4:2] / self.scale_h,
|
||||
)
|
||||
lms[:, 0:10:2], lms[:, 1:10:2] = (
|
||||
lms[:, 0:10:2] / self.scale_w,
|
||||
lms[:, 1:10:2] / self.scale_h,
|
||||
)
|
||||
else:
|
||||
dets = np.empty(shape=[0, 5], dtype=np.float32)
|
||||
lms = np.empty(shape=[0, 10], dtype=np.float32)
|
||||
return dets, lms
|
||||
|
||||
def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):
|
||||
heatmap = np.squeeze(heatmap)
|
||||
scale0, scale1 = scale[0, 0, :, :], scale[0, 1, :, :]
|
||||
offset0, offset1 = offset[0, 0, :, :], offset[0, 1, :, :]
|
||||
c0, c1 = np.where(heatmap > threshold)
|
||||
boxes, lms = [], []
|
||||
if len(c0) > 0:
|
||||
# pylint:disable=consider-using-enumerate
|
||||
for i in range(len(c0)):
|
||||
s0, s1 = np.exp(scale0[c0[i], c1[i]]) * 4, np.exp(scale1[c0[i], c1[i]]) * 4
|
||||
o0, o1 = offset0[c0[i], c1[i]], offset1[c0[i], c1[i]]
|
||||
s = heatmap[c0[i], c1[i]]
|
||||
x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(
|
||||
0, (c0[i] + o0 + 0.5) * 4 - s0 / 2
|
||||
)
|
||||
x1, y1 = min(x1, size[1]), min(y1, size[0])
|
||||
boxes.append([x1, y1, min(x1 + s1, size[1]), min(y1 + s0, size[0]), s])
|
||||
lm = []
|
||||
for j in range(5):
|
||||
lm.append(landmark[0, j * 2 + 1, c0[i], c1[i]] * s1 + x1)
|
||||
lm.append(landmark[0, j * 2, c0[i], c1[i]] * s0 + y1)
|
||||
lms.append(lm)
|
||||
boxes = np.asarray(boxes, dtype=np.float32)
|
||||
keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
|
||||
boxes = boxes[keep, :]
|
||||
lms = np.asarray(lms, dtype=np.float32)
|
||||
lms = lms[keep, :]
|
||||
return boxes, lms
|
||||
|
||||
def nms(self, boxes, scores, nms_thresh):
|
||||
x1 = boxes[:, 0]
|
||||
y1 = boxes[:, 1]
|
||||
x2 = boxes[:, 2]
|
||||
y2 = boxes[:, 3]
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = np.argsort(scores)[::-1]
|
||||
num_detections = boxes.shape[0]
|
||||
suppressed = np.zeros((num_detections,), dtype=bool)
|
||||
|
||||
keep = []
|
||||
for _i in range(num_detections):
|
||||
i = order[_i]
|
||||
if suppressed[i]:
|
||||
continue
|
||||
keep.append(i)
|
||||
|
||||
ix1 = x1[i]
|
||||
iy1 = y1[i]
|
||||
ix2 = x2[i]
|
||||
iy2 = y2[i]
|
||||
iarea = areas[i]
|
||||
|
||||
for _j in range(_i + 1, num_detections):
|
||||
j = order[_j]
|
||||
if suppressed[j]:
|
||||
continue
|
||||
|
||||
xx1 = max(ix1, x1[j])
|
||||
yy1 = max(iy1, y1[j])
|
||||
xx2 = min(ix2, x2[j])
|
||||
yy2 = min(iy2, y2[j])
|
||||
w = max(0, xx2 - xx1 + 1)
|
||||
h = max(0, yy2 - yy1 + 1)
|
||||
|
||||
inter = w * h
|
||||
ovr = inter / (iarea + areas[j] - inter)
|
||||
if ovr >= nms_thresh:
|
||||
suppressed[j] = True
|
||||
|
||||
return keep
|
||||
|
|
@ -1,104 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
class DlibClient(Detector):
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def build_model(self) -> dict:
|
||||
"""
|
||||
Build a dlib hog face detector model
|
||||
Returns:
|
||||
model (Any)
|
||||
"""
|
||||
# this is not a must dependency. do not import it in the global level.
|
||||
try:
|
||||
import dlib
|
||||
except ModuleNotFoundError as e:
|
||||
raise ImportError(
|
||||
"Dlib is an optional detector, ensure the library is installed. "
|
||||
"Please install using 'pip install dlib'"
|
||||
) from e
|
||||
|
||||
# check required file exists in the home/.deepface/weights folder
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="shape_predictor_5_face_landmarks.dat",
|
||||
source_url="http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2",
|
||||
compress_type="bz2",
|
||||
)
|
||||
|
||||
face_detector = dlib.get_frontal_face_detector()
|
||||
sp = dlib.shape_predictor(weight_file)
|
||||
|
||||
detector = {}
|
||||
detector["face_detector"] = face_detector
|
||||
detector["sp"] = sp
|
||||
return detector
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with dlib
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
face_detector = self.model["face_detector"]
|
||||
|
||||
# note that, by design, dlib's fhog face detector scores are >0 but not capped at 1
|
||||
detections, scores, _ = face_detector.run(img, 1)
|
||||
|
||||
if len(detections) > 0:
|
||||
|
||||
for idx, detection in enumerate(detections):
|
||||
left = detection.left()
|
||||
right = detection.right()
|
||||
top = detection.top()
|
||||
bottom = detection.bottom()
|
||||
|
||||
y = int(max(0, top))
|
||||
h = int(min(bottom, img.shape[0]) - y)
|
||||
x = int(max(0, left))
|
||||
w = int(min(right, img.shape[1]) - x)
|
||||
|
||||
shape = self.model["sp"](img, detection)
|
||||
|
||||
right_eye = (
|
||||
int((shape.part(2).x + shape.part(3).x) // 2),
|
||||
int((shape.part(2).y + shape.part(3).y) // 2),
|
||||
)
|
||||
left_eye = (
|
||||
int((shape.part(0).x + shape.part(1).x) // 2),
|
||||
int((shape.part(0).y + shape.part(1).y) // 2),
|
||||
)
|
||||
|
||||
# never saw confidence higher than +3.5 github.com/davisking/dlib/issues/761
|
||||
confidence = scores[idx]
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=min(max(0, confidence), 1.0),
|
||||
)
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
|
@ -1,95 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, Union, List
|
||||
|
||||
# 3rd party dependencies
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
|
||||
|
||||
class FastMtCnnClient(Detector):
|
||||
"""
|
||||
Fast MtCnn Detector from github.com/timesler/facenet-pytorch
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with mtcnn
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # mtcnn expects RGB but OpenCV read BGR
|
||||
detections = self.model.detect(
|
||||
img_rgb, landmarks=True
|
||||
) # returns boundingbox, prob, landmark
|
||||
if (
|
||||
detections is not None
|
||||
and len(detections) > 0
|
||||
and not any(detection is None for detection in detections) # issue 1043
|
||||
):
|
||||
for regions, confidence, eyes in zip(*detections):
|
||||
x, y, w, h = xyxy_to_xywh(regions)
|
||||
right_eye = eyes[0]
|
||||
left_eye = eyes[1]
|
||||
|
||||
left_eye = tuple(int(i) for i in left_eye)
|
||||
right_eye = tuple(int(i) for i in right_eye)
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=confidence,
|
||||
)
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
||||
def build_model(self) -> Any:
|
||||
"""
|
||||
Build a fast mtcnn face detector model
|
||||
Returns:
|
||||
model (Any)
|
||||
"""
|
||||
# this is not a must dependency. do not import it in the global level.
|
||||
try:
|
||||
from facenet_pytorch import MTCNN as fast_mtcnn
|
||||
import torch
|
||||
except ModuleNotFoundError as e:
|
||||
raise ImportError(
|
||||
"FastMtcnn is an optional detector, ensure the library is installed. "
|
||||
"Please install using 'pip install facenet-pytorch'"
|
||||
) from e
|
||||
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
face_detector = fast_mtcnn(device=device)
|
||||
|
||||
return face_detector
|
||||
|
||||
|
||||
def xyxy_to_xywh(regions: Union[list, tuple]) -> tuple:
|
||||
"""
|
||||
Convert (x1, y1, x2, y2) format to (x, y, w, h) format.
|
||||
Args:
|
||||
regions (list or tuple): facial area coordinates as x, y, x+w, y+h
|
||||
Returns:
|
||||
regions (tuple): facial area coordinates as x, y, w, h
|
||||
"""
|
||||
x, y, x_plus_w, y_plus_h = regions[0], regions[1], regions[2], regions[3]
|
||||
w = x_plus_w - x
|
||||
h = y_plus_h - y
|
||||
return (x, y, w, h)
|
||||
|
|
@ -1,89 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
|
||||
|
||||
class MediaPipeClient(Detector):
|
||||
"""
|
||||
MediaPipe from google.github.io/mediapipe/solutions/face_detection
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def build_model(self) -> Any:
|
||||
"""
|
||||
Build a mediapipe face detector model
|
||||
Returns:
|
||||
model (Any)
|
||||
"""
|
||||
# this is not a must dependency. do not import it in the global level.
|
||||
try:
|
||||
import mediapipe as mp
|
||||
except ModuleNotFoundError as e:
|
||||
raise ImportError(
|
||||
"MediaPipe is an optional detector, ensure the library is installed. "
|
||||
"Please install using 'pip install mediapipe'"
|
||||
) from e
|
||||
|
||||
mp_face_detection = mp.solutions.face_detection
|
||||
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.7)
|
||||
return face_detection
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with mediapipe
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
img_width = img.shape[1]
|
||||
img_height = img.shape[0]
|
||||
|
||||
results = self.model.process(img)
|
||||
|
||||
# If no face has been detected, return an empty list
|
||||
if results.detections is None:
|
||||
return resp
|
||||
|
||||
# Extract the bounding box, the landmarks and the confidence score
|
||||
for current_detection in results.detections:
|
||||
(confidence,) = current_detection.score
|
||||
|
||||
bounding_box = current_detection.location_data.relative_bounding_box
|
||||
landmarks = current_detection.location_data.relative_keypoints
|
||||
|
||||
x = int(bounding_box.xmin * img_width)
|
||||
w = int(bounding_box.width * img_width)
|
||||
y = int(bounding_box.ymin * img_height)
|
||||
h = int(bounding_box.height * img_height)
|
||||
|
||||
right_eye = (int(landmarks[0].x * img_width), int(landmarks[0].y * img_height))
|
||||
left_eye = (int(landmarks[1].x * img_width), int(landmarks[1].y * img_height))
|
||||
# nose = (int(landmarks[2].x * img_width), int(landmarks[2].y * img_height))
|
||||
# mouth = (int(landmarks[3].x * img_width), int(landmarks[3].y * img_height))
|
||||
# right_ear = (int(landmarks[4].x * img_width), int(landmarks[4].y * img_height))
|
||||
# left_ear = (int(landmarks[5].x * img_width), int(landmarks[5].y * img_height))
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=float(confidence),
|
||||
)
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
from mtcnn import MTCNN
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class MtCnnClient(Detector):
|
||||
"""
|
||||
Class to cover common face detection functionalitiy for MtCnn backend
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = MTCNN()
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with mtcnn
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
|
||||
resp = []
|
||||
|
||||
# mtcnn expects RGB but OpenCV read BGR
|
||||
# img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img_rgb = img[:, :, ::-1]
|
||||
detections = self.model.detect_faces(img_rgb)
|
||||
|
||||
if detections is not None and len(detections) > 0:
|
||||
|
||||
for current_detection in detections:
|
||||
x, y, w, h = current_detection["box"]
|
||||
confidence = current_detection["confidence"]
|
||||
# mtcnn detector assigns left eye with respect to the observer
|
||||
# but we are setting it with respect to the person itself
|
||||
left_eye = current_detection["keypoints"]["right_eye"]
|
||||
right_eye = current_detection["keypoints"]["left_eye"]
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
|
@ -1,176 +0,0 @@
|
|||
# built-in dependencies
|
||||
import os
|
||||
from typing import Any, List
|
||||
|
||||
# 3rd party dependencies
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
#project dependencies
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
|
||||
|
||||
class OpenCvClient(Detector):
|
||||
"""
|
||||
Class to cover common face detection functionalitiy for OpenCv backend
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def build_model(self):
|
||||
"""
|
||||
Build opencv's face and eye detector models
|
||||
Returns:
|
||||
model (dict): including face_detector and eye_detector keys
|
||||
"""
|
||||
detector = {}
|
||||
detector["face_detector"] = self.__build_cascade("haarcascade")
|
||||
detector["eye_detector"] = self.__build_cascade("haarcascade_eye")
|
||||
return detector
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with opencv
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
detected_face = None
|
||||
|
||||
faces = []
|
||||
try:
|
||||
# faces = detector["face_detector"].detectMultiScale(img, 1.3, 5)
|
||||
|
||||
# note that, by design, opencv's haarcascade scores are >0 but not capped at 1
|
||||
faces, _, scores = self.model["face_detector"].detectMultiScale3(
|
||||
img, 1.1, 10, outputRejectLevels=True
|
||||
)
|
||||
except:
|
||||
pass
|
||||
|
||||
if len(faces) > 0:
|
||||
for (x, y, w, h), confidence in zip(faces, scores):
|
||||
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
|
||||
left_eye, right_eye = self.find_eyes(img=detected_face)
|
||||
|
||||
# eyes found in the detected face instead image itself
|
||||
# detected face's coordinates should be added
|
||||
if left_eye is not None:
|
||||
left_eye = (int(x + left_eye[0]), int(y + left_eye[1]))
|
||||
if right_eye is not None:
|
||||
right_eye = (int(x + right_eye[0]), int(y + right_eye[1]))
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=(100 - confidence) / 100,
|
||||
)
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
||||
def find_eyes(self, img: np.ndarray) -> tuple:
|
||||
"""
|
||||
Find the left and right eye coordinates of given image
|
||||
Args:
|
||||
img (np.ndarray): given image
|
||||
Returns:
|
||||
left and right eye (tuple)
|
||||
"""
|
||||
left_eye = None
|
||||
right_eye = None
|
||||
|
||||
# if image has unexpectedly 0 dimension then skip alignment
|
||||
if img.shape[0] == 0 or img.shape[1] == 0:
|
||||
return left_eye, right_eye
|
||||
|
||||
detected_face_gray = cv2.cvtColor(
|
||||
img, cv2.COLOR_BGR2GRAY
|
||||
) # eye detector expects gray scale image
|
||||
|
||||
eyes = self.model["eye_detector"].detectMultiScale(detected_face_gray, 1.1, 10)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
|
||||
# opencv eye detection module is not strong. it might find more than 2 eyes!
|
||||
# besides, it returns eyes with different order in each call (issue 435)
|
||||
# this is an important issue because opencv is the default detector and ssd also uses this
|
||||
# find the largest 2 eye. Thanks to @thelostpeace
|
||||
|
||||
eyes = sorted(eyes, key=lambda v: abs(v[2] * v[3]), reverse=True)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
if len(eyes) >= 2:
|
||||
# decide left and right eye
|
||||
|
||||
eye_1 = eyes[0]
|
||||
eye_2 = eyes[1]
|
||||
|
||||
if eye_1[0] < eye_2[0]:
|
||||
right_eye = eye_1
|
||||
left_eye = eye_2
|
||||
else:
|
||||
right_eye = eye_2
|
||||
left_eye = eye_1
|
||||
|
||||
# -----------------------
|
||||
# find center of eyes
|
||||
left_eye = (
|
||||
int(left_eye[0] + (left_eye[2] / 2)),
|
||||
int(left_eye[1] + (left_eye[3] / 2)),
|
||||
)
|
||||
right_eye = (
|
||||
int(right_eye[0] + (right_eye[2] / 2)),
|
||||
int(right_eye[1] + (right_eye[3] / 2)),
|
||||
)
|
||||
return left_eye, right_eye
|
||||
|
||||
def __build_cascade(self, model_name="haarcascade") -> Any:
|
||||
"""
|
||||
Build a opencv face&eye detector models
|
||||
Returns:
|
||||
model (Any)
|
||||
"""
|
||||
opencv_path = self.__get_opencv_path()
|
||||
if model_name == "haarcascade":
|
||||
face_detector_path = os.path.join(opencv_path, "haarcascade_frontalface_default.xml")
|
||||
if not os.path.isfile(face_detector_path):
|
||||
raise ValueError(
|
||||
"Confirm that opencv is installed on your environment! Expected path ",
|
||||
face_detector_path,
|
||||
" violated.",
|
||||
)
|
||||
detector = cv2.CascadeClassifier(face_detector_path)
|
||||
|
||||
elif model_name == "haarcascade_eye":
|
||||
eye_detector_path = os.path.join(opencv_path, "haarcascade_eye.xml")
|
||||
if not os.path.isfile(eye_detector_path):
|
||||
raise ValueError(
|
||||
"Confirm that opencv is installed on your environment! Expected path ",
|
||||
eye_detector_path,
|
||||
" violated.",
|
||||
)
|
||||
detector = cv2.CascadeClassifier(eye_detector_path)
|
||||
|
||||
else:
|
||||
raise ValueError(f"unimplemented model_name for build_cascade - {model_name}")
|
||||
|
||||
return detector
|
||||
|
||||
def __get_opencv_path(self) -> str:
|
||||
"""
|
||||
Returns where opencv installed
|
||||
Returns:
|
||||
installation_path (str)
|
||||
"""
|
||||
return os.path.join(os.path.dirname(cv2.__file__), "data")
|
||||
|
|
@ -1,64 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
from retinaface import RetinaFace as rf
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class RetinaFaceClient(Detector):
|
||||
def __init__(self):
|
||||
self.model = rf.build_model()
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with retinaface
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
obj = rf.detect_faces(img, model=self.model, threshold=0.9)
|
||||
|
||||
if not isinstance(obj, dict):
|
||||
return resp
|
||||
|
||||
for face_idx in obj.keys():
|
||||
identity = obj[face_idx]
|
||||
detection = identity["facial_area"]
|
||||
|
||||
y = detection[1]
|
||||
h = detection[3] - y
|
||||
x = detection[0]
|
||||
w = detection[2] - x
|
||||
|
||||
# retinaface sets left and right eyes with respect to the person
|
||||
left_eye = identity["landmarks"]["left_eye"]
|
||||
right_eye = identity["landmarks"]["right_eye"]
|
||||
|
||||
# eyes are list of float, need to cast them tuple of int
|
||||
left_eye = tuple(int(i) for i in left_eye)
|
||||
right_eye = tuple(int(i) for i in right_eye)
|
||||
|
||||
confidence = identity["score"]
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
|
@ -1,133 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import List
|
||||
from enum import IntEnum
|
||||
|
||||
# 3rd party dependencies
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.face_detection import OpenCv
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=line-too-long, c-extension-no-member
|
||||
|
||||
|
||||
class SsdClient(Detector):
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def build_model(self) -> dict:
|
||||
"""
|
||||
Build a ssd detector model
|
||||
Returns:
|
||||
model (dict)
|
||||
"""
|
||||
|
||||
# model structure
|
||||
output_model = weight_utils.download_weights_if_necessary(
|
||||
file_name="deploy.prototxt",
|
||||
source_url="https://github.com/opencv/opencv/raw/3.4.0/samples/dnn/face_detector/deploy.prototxt",
|
||||
)
|
||||
|
||||
# pre-trained weights
|
||||
output_weights = weight_utils.download_weights_if_necessary(
|
||||
file_name="res10_300x300_ssd_iter_140000.caffemodel",
|
||||
source_url="https://github.com/opencv/opencv_3rdparty/raw/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel",
|
||||
)
|
||||
|
||||
try:
|
||||
face_detector = cv2.dnn.readNetFromCaffe(output_model, output_weights)
|
||||
except Exception as err:
|
||||
raise ValueError(
|
||||
"Exception while calling opencv.dnn module."
|
||||
+ "This is an optional dependency."
|
||||
+ "You can install it as pip install opencv-contrib-python."
|
||||
) from err
|
||||
|
||||
return {"face_detector": face_detector, "opencv_module": OpenCv.OpenCvClient()}
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with ssd
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
|
||||
# Because cv2.dnn.blobFromImage expects CV_8U (8-bit unsigned integer) values
|
||||
if img.dtype != np.uint8:
|
||||
img = img.astype(np.uint8)
|
||||
|
||||
opencv_module: OpenCv.OpenCvClient = self.model["opencv_module"]
|
||||
|
||||
target_size = (300, 300)
|
||||
|
||||
original_size = img.shape
|
||||
|
||||
current_img = cv2.resize(img, target_size)
|
||||
|
||||
aspect_ratio_x = original_size[1] / target_size[1]
|
||||
aspect_ratio_y = original_size[0] / target_size[0]
|
||||
|
||||
imageBlob = cv2.dnn.blobFromImage(image=current_img)
|
||||
|
||||
face_detector = self.model["face_detector"]
|
||||
face_detector.setInput(imageBlob)
|
||||
detections = face_detector.forward()
|
||||
|
||||
class ssd_labels(IntEnum):
|
||||
img_id = 0
|
||||
is_face = 1
|
||||
confidence = 2
|
||||
left = 3
|
||||
top = 4
|
||||
right = 5
|
||||
bottom = 6
|
||||
|
||||
faces = detections[0][0]
|
||||
faces = faces[
|
||||
(faces[:, ssd_labels.is_face] == 1) & (faces[:, ssd_labels.confidence] >= 0.90)
|
||||
]
|
||||
margins = [ssd_labels.left, ssd_labels.top, ssd_labels.right, ssd_labels.bottom]
|
||||
faces[:, margins] = np.int32(faces[:, margins] * 300)
|
||||
faces[:, margins] = np.int32(
|
||||
faces[:, margins] * [aspect_ratio_x, aspect_ratio_y, aspect_ratio_x, aspect_ratio_y]
|
||||
)
|
||||
faces[:, [ssd_labels.right, ssd_labels.bottom]] -= faces[
|
||||
:, [ssd_labels.left, ssd_labels.top]
|
||||
]
|
||||
|
||||
resp = []
|
||||
for face in faces:
|
||||
confidence = float(face[ssd_labels.confidence])
|
||||
x, y, w, h = map(int, face[margins])
|
||||
detected_face = img[y : y + h, x : x + w]
|
||||
|
||||
left_eye, right_eye = opencv_module.find_eyes(detected_face)
|
||||
|
||||
# eyes found in the detected face instead image itself
|
||||
# detected face's coordinates should be added
|
||||
if left_eye is not None:
|
||||
left_eye = x + int(left_eye[0]), y + int(left_eye[1])
|
||||
if right_eye is not None:
|
||||
right_eye = x + int(right_eye[0]), y + int(right_eye[1])
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=confidence,
|
||||
)
|
||||
resp.append(facial_area)
|
||||
return resp
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# Model's weights paths
|
||||
PATH = ".deepface/weights/yolov8n-face.pt"
|
||||
|
||||
# Google Drive URL from repo (https://github.com/derronqi/yolov8-face) ~6MB
|
||||
WEIGHT_URL = "https://drive.google.com/uc?id=1qcr9DbgsX3ryrz2uU8w4Xm3cOrRywXqb"
|
||||
|
||||
|
||||
class YoloClient(Detector):
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def build_model(self) -> Any:
|
||||
"""
|
||||
Build a yolo detector model
|
||||
Returns:
|
||||
model (Any)
|
||||
"""
|
||||
|
||||
# Import the optional Ultralytics YOLO model
|
||||
try:
|
||||
from ultralytics import YOLO
|
||||
except ModuleNotFoundError as e:
|
||||
raise ImportError(
|
||||
"Yolo is an optional detector, ensure the library is installed. "
|
||||
"Please install using 'pip install ultralytics'"
|
||||
) from e
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="yolov8n-face.pt", source_url=WEIGHT_URL
|
||||
)
|
||||
|
||||
# Return face_detector
|
||||
return YOLO(weight_file)
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with yolo
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
resp = []
|
||||
|
||||
# Detect faces
|
||||
results = self.model.predict(img, verbose=False, show=False, conf=0.25)[0]
|
||||
|
||||
# For each face, extract the bounding box, the landmarks and confidence
|
||||
for result in results:
|
||||
|
||||
if result.boxes is None or result.keypoints is None:
|
||||
continue
|
||||
|
||||
# Extract the bounding box and the confidence
|
||||
x, y, w, h = result.boxes.xywh.tolist()[0]
|
||||
confidence = result.boxes.conf.tolist()[0]
|
||||
|
||||
# right_eye_conf = result.keypoints.conf[0][0]
|
||||
# left_eye_conf = result.keypoints.conf[0][1]
|
||||
right_eye = result.keypoints.xy[0][0].tolist()
|
||||
left_eye = result.keypoints.xy[0][1].tolist()
|
||||
|
||||
# eyes are list of float, need to cast them tuple of int
|
||||
left_eye = tuple(int(i) for i in left_eye)
|
||||
right_eye = tuple(int(i) for i in right_eye)
|
||||
|
||||
x, y, w, h = int(x - w / 2), int(y - h / 2), int(w), int(h)
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
left_eye=left_eye,
|
||||
right_eye=right_eye,
|
||||
confidence=confidence,
|
||||
)
|
||||
resp.append(facial_area)
|
||||
|
||||
return resp
|
||||
|
|
@ -1,127 +0,0 @@
|
|||
# built-in dependencies
|
||||
import os
|
||||
from typing import Any, List
|
||||
|
||||
# 3rd party dependencies
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.models.Detector import Detector, FacialAreaRegion
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
class YuNetClient(Detector):
|
||||
def __init__(self):
|
||||
self.model = self.build_model()
|
||||
|
||||
def build_model(self) -> Any:
|
||||
"""
|
||||
Build a yunet detector model
|
||||
Returns:
|
||||
model (Any)
|
||||
"""
|
||||
|
||||
opencv_version = cv2.__version__.split(".")
|
||||
if not len(opencv_version) >= 2:
|
||||
raise ValueError(
|
||||
f"OpenCv's version must have major and minor values but it is {opencv_version}"
|
||||
)
|
||||
|
||||
opencv_version_major = int(opencv_version[0])
|
||||
opencv_version_minor = int(opencv_version[1])
|
||||
|
||||
if opencv_version_major < 4 or (opencv_version_major == 4 and opencv_version_minor < 8):
|
||||
# min requirement: https://github.com/opencv/opencv_zoo/issues/172
|
||||
raise ValueError(f"YuNet requires opencv-python >= 4.8 but you have {cv2.__version__}")
|
||||
|
||||
# pylint: disable=C0301
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="face_detection_yunet_2023mar.onnx",
|
||||
source_url="https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx",
|
||||
)
|
||||
|
||||
try:
|
||||
face_detector = cv2.FaceDetectorYN_create(weight_file, "", (0, 0))
|
||||
except Exception as err:
|
||||
raise ValueError(
|
||||
"Exception while calling opencv.FaceDetectorYN_create module."
|
||||
+ "This is an optional dependency."
|
||||
+ "You can install it as pip install opencv-contrib-python."
|
||||
) from err
|
||||
return face_detector
|
||||
|
||||
def detect_faces(self, img: np.ndarray) -> List[FacialAreaRegion]:
|
||||
"""
|
||||
Detect and align face with yunet
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
|
||||
Returns:
|
||||
results (List[FacialAreaRegion]): A list of FacialAreaRegion objects
|
||||
"""
|
||||
# FaceDetector.detect_faces does not support score_threshold parameter.
|
||||
# We can set it via environment variable.
|
||||
score_threshold = float(os.environ.get("yunet_score_threshold", "0.9"))
|
||||
resp = []
|
||||
faces = []
|
||||
height, width = img.shape[0], img.shape[1]
|
||||
# resize image if it is too large (Yunet fails to detect faces on large input sometimes)
|
||||
# I picked 640 as a threshold because it is the default value of max_size in Yunet.
|
||||
resized = False
|
||||
r = 1 # resize factor
|
||||
if height > 640 or width > 640:
|
||||
r = 640.0 / max(height, width)
|
||||
img = cv2.resize(img, (int(width * r), int(height * r)))
|
||||
height, width = img.shape[0], img.shape[1]
|
||||
resized = True
|
||||
self.model.setInputSize((width, height))
|
||||
self.model.setScoreThreshold(score_threshold)
|
||||
_, faces = self.model.detect(img)
|
||||
if faces is None:
|
||||
return resp
|
||||
for face in faces:
|
||||
# pylint: disable=W0105
|
||||
"""
|
||||
The detection output faces is a two-dimension array of type CV_32F,
|
||||
whose rows are the detected face instances, columns are the location
|
||||
of a face and 5 facial landmarks.
|
||||
The format of each row is as follows:
|
||||
x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt,
|
||||
x_rcm, y_rcm, x_lcm, y_lcm,
|
||||
where x1, y1, w, h are the top-left coordinates, width and height of
|
||||
the face bounding box,
|
||||
{x, y}_{re, le, nt, rcm, lcm} stands for the coordinates of right eye,
|
||||
left eye, nose tip, the right corner and left corner of the mouth respectively.
|
||||
"""
|
||||
(x, y, w, h, x_le, y_le, x_re, y_re) = list(map(int, face[:8]))
|
||||
|
||||
# YuNet returns negative coordinates if it thinks part of the detected face
|
||||
# is outside the frame.
|
||||
x = max(x, 0)
|
||||
y = max(y, 0)
|
||||
if resized:
|
||||
x, y, w, h = int(x / r), int(y / r), int(w / r), int(h / r)
|
||||
x_re, y_re, x_le, y_le = (
|
||||
int(x_re / r),
|
||||
int(y_re / r),
|
||||
int(x_le / r),
|
||||
int(y_le / r),
|
||||
)
|
||||
confidence = float(face[-1])
|
||||
|
||||
facial_area = FacialAreaRegion(
|
||||
x=x,
|
||||
y=y,
|
||||
w=w,
|
||||
h=h,
|
||||
confidence=confidence,
|
||||
left_eye=(x_re, y_re),
|
||||
right_eye=(x_le, y_le),
|
||||
)
|
||||
resp.append(facial_area)
|
||||
return resp
|
||||
|
|
@ -1,169 +0,0 @@
|
|||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=unsubscriptable-object
|
||||
|
||||
# --------------------------------
|
||||
# dependency configuration
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
|
||||
if tf_version == 1:
|
||||
from keras.models import Model
|
||||
from keras.engine import training
|
||||
from keras.layers import (
|
||||
ZeroPadding2D,
|
||||
Input,
|
||||
Conv2D,
|
||||
BatchNormalization,
|
||||
PReLU,
|
||||
Add,
|
||||
Dropout,
|
||||
Flatten,
|
||||
Dense,
|
||||
)
|
||||
else:
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.python.keras.engine import training
|
||||
from tensorflow.keras.layers import (
|
||||
ZeroPadding2D,
|
||||
Input,
|
||||
Conv2D,
|
||||
BatchNormalization,
|
||||
PReLU,
|
||||
Add,
|
||||
Dropout,
|
||||
Flatten,
|
||||
Dense,
|
||||
)
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class ArcFaceClient(FacialRecognition):
|
||||
"""
|
||||
ArcFace model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "ArcFace"
|
||||
self.input_shape = (112, 112)
|
||||
self.output_shape = 512
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/arcface_weights.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Construct ArcFace model, download its weights and load
|
||||
Returns:
|
||||
model (Model)
|
||||
"""
|
||||
base_model = ResNet34()
|
||||
inputs = base_model.inputs[0]
|
||||
arcface_model = base_model.outputs[0]
|
||||
arcface_model = BatchNormalization(momentum=0.9, epsilon=2e-5)(arcface_model)
|
||||
arcface_model = Dropout(0.4)(arcface_model)
|
||||
arcface_model = Flatten()(arcface_model)
|
||||
arcface_model = Dense(512, activation=None, use_bias=True, kernel_initializer="glorot_normal")(
|
||||
arcface_model
|
||||
)
|
||||
embedding = BatchNormalization(momentum=0.9, epsilon=2e-5, name="embedding", scale=True)(
|
||||
arcface_model
|
||||
)
|
||||
model = Model(inputs, embedding, name=base_model.name)
|
||||
|
||||
# ---------------------------------------
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="arcface_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
model = weight_utils.load_model_weights(model=model, weight_file=weight_file)
|
||||
# ---------------------------------------
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def ResNet34() -> Model:
|
||||
"""
|
||||
ResNet34 model
|
||||
Returns:
|
||||
model (Model)
|
||||
"""
|
||||
img_input = Input(shape=(112, 112, 3))
|
||||
|
||||
x = ZeroPadding2D(padding=1, name="conv1_pad")(img_input)
|
||||
x = Conv2D(
|
||||
64, 3, strides=1, use_bias=False, kernel_initializer="glorot_normal", name="conv1_conv"
|
||||
)(x)
|
||||
x = BatchNormalization(axis=3, epsilon=2e-5, momentum=0.9, name="conv1_bn")(x)
|
||||
x = PReLU(shared_axes=[1, 2], name="conv1_prelu")(x)
|
||||
x = stack_fn(x)
|
||||
|
||||
model = training.Model(img_input, x, name="ResNet34")
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
|
||||
bn_axis = 3
|
||||
|
||||
if conv_shortcut:
|
||||
shortcut = Conv2D(
|
||||
filters,
|
||||
1,
|
||||
strides=stride,
|
||||
use_bias=False,
|
||||
kernel_initializer="glorot_normal",
|
||||
name=name + "_0_conv",
|
||||
)(x)
|
||||
shortcut = BatchNormalization(
|
||||
axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_0_bn"
|
||||
)(shortcut)
|
||||
else:
|
||||
shortcut = x
|
||||
|
||||
x = BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_1_bn")(x)
|
||||
x = ZeroPadding2D(padding=1, name=name + "_1_pad")(x)
|
||||
x = Conv2D(
|
||||
filters,
|
||||
3,
|
||||
strides=1,
|
||||
kernel_initializer="glorot_normal",
|
||||
use_bias=False,
|
||||
name=name + "_1_conv",
|
||||
)(x)
|
||||
x = BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_2_bn")(x)
|
||||
x = PReLU(shared_axes=[1, 2], name=name + "_1_prelu")(x)
|
||||
|
||||
x = ZeroPadding2D(padding=1, name=name + "_2_pad")(x)
|
||||
x = Conv2D(
|
||||
filters,
|
||||
kernel_size,
|
||||
strides=stride,
|
||||
kernel_initializer="glorot_normal",
|
||||
use_bias=False,
|
||||
name=name + "_2_conv",
|
||||
)(x)
|
||||
x = BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + "_3_bn")(x)
|
||||
|
||||
x = Add(name=name + "_add")([shortcut, x])
|
||||
return x
|
||||
|
||||
|
||||
def stack1(x, filters, blocks, stride1=2, name=None):
|
||||
x = block1(x, filters, stride=stride1, name=name + "_block1")
|
||||
for i in range(2, blocks + 1):
|
||||
x = block1(x, filters, conv_shortcut=False, name=name + "_block" + str(i))
|
||||
return x
|
||||
|
||||
|
||||
def stack_fn(x):
|
||||
x = stack1(x, 64, 3, name="conv2")
|
||||
x = stack1(x, 128, 4, name="conv3")
|
||||
x = stack1(x, 256, 6, name="conv4")
|
||||
return stack1(x, 512, 3, name="conv5")
|
||||
|
|
@ -1,96 +0,0 @@
|
|||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
|
||||
if tf_version == 1:
|
||||
from keras.models import Model
|
||||
from keras.layers import (
|
||||
Conv2D,
|
||||
Activation,
|
||||
Input,
|
||||
Add,
|
||||
MaxPooling2D,
|
||||
Flatten,
|
||||
Dense,
|
||||
Dropout,
|
||||
)
|
||||
else:
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import (
|
||||
Conv2D,
|
||||
Activation,
|
||||
Input,
|
||||
Add,
|
||||
MaxPooling2D,
|
||||
Flatten,
|
||||
Dense,
|
||||
Dropout,
|
||||
)
|
||||
|
||||
# pylint: disable=line-too-long
|
||||
|
||||
|
||||
# -------------------------------------
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class DeepIdClient(FacialRecognition):
|
||||
"""
|
||||
DeepId model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "DeepId"
|
||||
self.input_shape = (47, 55)
|
||||
self.output_shape = 160
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/deepid_keras_weights.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Construct DeepId model, download its weights and load
|
||||
"""
|
||||
|
||||
myInput = Input(shape=(55, 47, 3))
|
||||
|
||||
x = Conv2D(20, (4, 4), name="Conv1", activation="relu", input_shape=(55, 47, 3))(myInput)
|
||||
x = MaxPooling2D(pool_size=2, strides=2, name="Pool1")(x)
|
||||
x = Dropout(rate=0.99, name="D1")(x)
|
||||
|
||||
x = Conv2D(40, (3, 3), name="Conv2", activation="relu")(x)
|
||||
x = MaxPooling2D(pool_size=2, strides=2, name="Pool2")(x)
|
||||
x = Dropout(rate=0.99, name="D2")(x)
|
||||
|
||||
x = Conv2D(60, (3, 3), name="Conv3", activation="relu")(x)
|
||||
x = MaxPooling2D(pool_size=2, strides=2, name="Pool3")(x)
|
||||
x = Dropout(rate=0.99, name="D3")(x)
|
||||
|
||||
x1 = Flatten()(x)
|
||||
fc11 = Dense(160, name="fc11")(x1)
|
||||
|
||||
x2 = Conv2D(80, (2, 2), name="Conv4", activation="relu")(x)
|
||||
x2 = Flatten()(x2)
|
||||
fc12 = Dense(160, name="fc12")(x2)
|
||||
|
||||
y = Add()([fc11, fc12])
|
||||
y = Activation("relu", name="deepid")(y)
|
||||
|
||||
model = Model(inputs=[myInput], outputs=y)
|
||||
|
||||
# ---------------------------------
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="deepid_keras_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
model = weight_utils.load_model_weights(
|
||||
model=model, weight_file=weight_file
|
||||
)
|
||||
|
||||
return model
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
|
||||
|
||||
class DlibClient(FacialRecognition):
|
||||
"""
|
||||
Dlib model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = DlibResNet()
|
||||
self.model_name = "Dlib"
|
||||
self.input_shape = (150, 150)
|
||||
self.output_shape = 128
|
||||
|
||||
def forward(self, img: np.ndarray) -> List[float]:
|
||||
"""
|
||||
Find embeddings with Dlib model.
|
||||
This model necessitates the override of the forward method
|
||||
because it is not a keras model.
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image in BGR
|
||||
Returns
|
||||
embeddings (list): multi-dimensional vector
|
||||
"""
|
||||
# return self.model.predict(img)[0].tolist()
|
||||
|
||||
# extract_faces returns 4 dimensional images
|
||||
if len(img.shape) == 4:
|
||||
img = img[0]
|
||||
|
||||
# bgr to rgb
|
||||
img = img[:, :, ::-1] # bgr to rgb
|
||||
|
||||
# img is in scale of [0, 1] but expected [0, 255]
|
||||
if img.max() <= 1:
|
||||
img = img * 255
|
||||
|
||||
img = img.astype(np.uint8)
|
||||
|
||||
img_representation = self.model.model.compute_face_descriptor(img)
|
||||
img_representation = np.array(img_representation)
|
||||
img_representation = np.expand_dims(img_representation, axis=0)
|
||||
return img_representation[0].tolist()
|
||||
|
||||
|
||||
class DlibResNet:
|
||||
def __init__(self):
|
||||
|
||||
# This is not a must dependency. Don't import it in the global level.
|
||||
try:
|
||||
import dlib
|
||||
except ModuleNotFoundError as e:
|
||||
raise ImportError(
|
||||
"Dlib is an optional dependency, ensure the library is installed."
|
||||
"Please install using 'pip install dlib' "
|
||||
) from e
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="dlib_face_recognition_resnet_model_v1.dat",
|
||||
source_url="http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2",
|
||||
compress_type="bz2",
|
||||
)
|
||||
|
||||
self.model = dlib.face_recognition_model_v1(weight_file)
|
||||
|
||||
# return None # classes must return None
|
||||
|
|
@ -1,94 +0,0 @@
|
|||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# --------------------------------
|
||||
# dependency configuration
|
||||
|
||||
tf_major = package_utils.get_tf_major_version()
|
||||
tf_minor = package_utils.get_tf_minor_version()
|
||||
|
||||
if tf_major == 1:
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import (
|
||||
Convolution2D,
|
||||
MaxPooling2D,
|
||||
Flatten,
|
||||
Dense,
|
||||
Dropout,
|
||||
)
|
||||
else:
|
||||
from tensorflow.keras.models import Model, Sequential
|
||||
from tensorflow.keras.layers import (
|
||||
Convolution2D,
|
||||
MaxPooling2D,
|
||||
Flatten,
|
||||
Dense,
|
||||
Dropout,
|
||||
)
|
||||
|
||||
|
||||
# -------------------------------------
|
||||
# pylint: disable=line-too-long, too-few-public-methods
|
||||
class DeepFaceClient(FacialRecognition):
|
||||
"""
|
||||
Fb's DeepFace model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# DeepFace requires tf 2.12 or less
|
||||
if tf_major == 2 and tf_minor > 12:
|
||||
# Ref: https://github.com/serengil/deepface/pull/1079
|
||||
raise ValueError(
|
||||
"DeepFace model requires LocallyConnected2D but it is no longer supported"
|
||||
f" after tf 2.12 but you have {tf_major}.{tf_minor}. You need to downgrade your tf."
|
||||
)
|
||||
|
||||
self.model = load_model()
|
||||
self.model_name = "DeepFace"
|
||||
self.input_shape = (152, 152)
|
||||
self.output_shape = 4096
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip",
|
||||
) -> Model:
|
||||
"""
|
||||
Construct DeepFace model, download its weights and load
|
||||
"""
|
||||
# we have some checks for this dependency in the init of client
|
||||
# putting this in global causes library initialization
|
||||
if tf_major == 1:
|
||||
from keras.layers import LocallyConnected2D
|
||||
else:
|
||||
from tensorflow.keras.layers import LocallyConnected2D
|
||||
|
||||
base_model = Sequential()
|
||||
base_model.add(
|
||||
Convolution2D(32, (11, 11), activation="relu", name="C1", input_shape=(152, 152, 3))
|
||||
)
|
||||
base_model.add(MaxPooling2D(pool_size=3, strides=2, padding="same", name="M2"))
|
||||
base_model.add(Convolution2D(16, (9, 9), activation="relu", name="C3"))
|
||||
base_model.add(LocallyConnected2D(16, (9, 9), activation="relu", name="L4"))
|
||||
base_model.add(LocallyConnected2D(16, (7, 7), strides=2, activation="relu", name="L5"))
|
||||
base_model.add(LocallyConnected2D(16, (5, 5), activation="relu", name="L6"))
|
||||
base_model.add(Flatten(name="F0"))
|
||||
base_model.add(Dense(4096, activation="relu", name="F7"))
|
||||
base_model.add(Dropout(rate=0.5, name="D0"))
|
||||
base_model.add(Dense(8631, activation="softmax", name="F8"))
|
||||
|
||||
# ---------------------------------
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="VGGFace2_DeepFace_weights_val-0.9034.h5", source_url=url, compress_type="zip"
|
||||
)
|
||||
|
||||
base_model = weight_utils.load_model_weights(model=base_model, weight_file=weight_file)
|
||||
|
||||
# drop F8 and D0. F7 is the representation layer.
|
||||
deepface_model = Model(inputs=base_model.layers[0].input, outputs=base_model.layers[-3].output)
|
||||
|
||||
return deepface_model
|
||||
|
|
@ -1,306 +0,0 @@
|
|||
# 3rd party dependencies
|
||||
import tensorflow as tf
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
tf_major = package_utils.get_tf_major_version()
|
||||
if tf_major == 1:
|
||||
import keras
|
||||
from keras import backend as K
|
||||
from keras.models import Model
|
||||
from keras.layers import (
|
||||
Activation,
|
||||
Add,
|
||||
BatchNormalization,
|
||||
Concatenate,
|
||||
Conv2D,
|
||||
DepthwiseConv2D,
|
||||
GlobalAveragePooling2D,
|
||||
Input,
|
||||
Reshape,
|
||||
Multiply,
|
||||
ReLU,
|
||||
PReLU,
|
||||
)
|
||||
else:
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import (
|
||||
Activation,
|
||||
Add,
|
||||
BatchNormalization,
|
||||
Concatenate,
|
||||
Conv2D,
|
||||
DepthwiseConv2D,
|
||||
GlobalAveragePooling2D,
|
||||
Input,
|
||||
Reshape,
|
||||
Multiply,
|
||||
ReLU,
|
||||
PReLU,
|
||||
)
|
||||
|
||||
|
||||
# pylint: disable=line-too-long, too-few-public-methods, no-else-return, unsubscriptable-object, comparison-with-callable
|
||||
PRETRAINED_WEIGHTS = "https://github.com/HamadYA/GhostFaceNets/releases/download/v1.2/GhostFaceNet_W1.3_S1_ArcFace.h5"
|
||||
|
||||
|
||||
class GhostFaceNetClient(FacialRecognition):
|
||||
"""
|
||||
GhostFaceNet model (GhostFaceNetV1 backbone)
|
||||
Repo: https://github.com/HamadYA/GhostFaceNets
|
||||
Pre-trained weights: https://github.com/HamadYA/GhostFaceNets/releases/tag/v1.2
|
||||
GhostFaceNet_W1.3_S1_ArcFace.h5 ~ 16.5MB
|
||||
Author declared that this backbone and pre-trained weights got 99.7667% accuracy on LFW
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model_name = "GhostFaceNet"
|
||||
self.input_shape = (112, 112)
|
||||
self.output_shape = 512
|
||||
self.model = load_model()
|
||||
|
||||
|
||||
def load_model():
|
||||
model = GhostFaceNetV1()
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="ghostfacenet_v1.h5", source_url=PRETRAINED_WEIGHTS
|
||||
)
|
||||
|
||||
model = weight_utils.load_model_weights(
|
||||
model=model, weight_file=weight_file
|
||||
)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def GhostFaceNetV1() -> Model:
|
||||
"""
|
||||
Build GhostFaceNetV1 model. Refactored from
|
||||
github.com/HamadYA/GhostFaceNets/blob/main/backbones/ghost_model.py
|
||||
Returns:
|
||||
model (Model)
|
||||
"""
|
||||
inputs = Input(shape=(112, 112, 3))
|
||||
|
||||
out_channel = 20
|
||||
|
||||
nn = Conv2D(
|
||||
out_channel,
|
||||
(3, 3),
|
||||
strides=1,
|
||||
padding="same",
|
||||
use_bias=False,
|
||||
kernel_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(inputs)
|
||||
|
||||
nn = BatchNormalization(axis=-1)(nn)
|
||||
nn = Activation("relu")(nn)
|
||||
|
||||
dwkernels = [3, 3, 3, 5, 5, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5]
|
||||
exps = [20, 64, 92, 92, 156, 312, 260, 240, 240, 624, 872, 872, 1248, 1248, 1248, 664]
|
||||
outs = [20, 32, 32, 52, 52, 104, 104, 104, 104, 144, 144, 208, 208, 208, 208, 208]
|
||||
strides_set = [1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1]
|
||||
reductions = [0, 0, 0, 24, 40, 0, 0, 0, 0, 156, 220, 220, 0, 312, 0, 168]
|
||||
|
||||
pre_out = out_channel
|
||||
for dwk, stride, exp, out, reduction in zip(dwkernels, strides_set, exps, outs, reductions):
|
||||
shortcut = not (out == pre_out and stride == 1)
|
||||
nn = ghost_bottleneck(nn, dwk, stride, exp, out, reduction, shortcut)
|
||||
pre_out = out
|
||||
|
||||
nn = Conv2D(
|
||||
664,
|
||||
(1, 1),
|
||||
strides=(1, 1),
|
||||
padding="valid",
|
||||
use_bias=False,
|
||||
kernel_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(nn)
|
||||
nn = BatchNormalization(axis=-1)(nn)
|
||||
nn = Activation("relu")(nn)
|
||||
|
||||
xx = Model(inputs=inputs, outputs=nn, name="GhostFaceNetV1")
|
||||
|
||||
# post modelling
|
||||
inputs = xx.inputs[0]
|
||||
nn = xx.outputs[0]
|
||||
|
||||
nn = keras.layers.DepthwiseConv2D(nn.shape[1], use_bias=False, name="GDC_dw")(nn)
|
||||
nn = keras.layers.BatchNormalization(momentum=0.99, epsilon=0.001, name="GDC_batchnorm")(nn)
|
||||
nn = keras.layers.Conv2D(
|
||||
512, 1, use_bias=True, kernel_initializer="glorot_normal", name="GDC_conv"
|
||||
)(nn)
|
||||
nn = keras.layers.Flatten(name="GDC_flatten")(nn)
|
||||
|
||||
embedding = keras.layers.BatchNormalization(
|
||||
momentum=0.99, epsilon=0.001, scale=True, name="pre_embedding"
|
||||
)(nn)
|
||||
embedding_fp32 = keras.layers.Activation("linear", dtype="float32", name="embedding")(embedding)
|
||||
|
||||
model = keras.models.Model(inputs, embedding_fp32, name=xx.name)
|
||||
model = replace_relu_with_prelu(model=model)
|
||||
return model
|
||||
|
||||
|
||||
def se_module(inputs, reduction):
|
||||
"""
|
||||
Refactored from github.com/HamadYA/GhostFaceNets/blob/main/backbones/ghost_model.py
|
||||
"""
|
||||
# get the channel axis
|
||||
channel_axis = 1 if K.image_data_format() == "channels_first" else -1
|
||||
# filters = channel axis shape
|
||||
filters = inputs.shape[channel_axis]
|
||||
|
||||
# from None x H x W x C to None x C
|
||||
se = GlobalAveragePooling2D()(inputs)
|
||||
|
||||
# Reshape None x C to None 1 x 1 x C
|
||||
se = Reshape((1, 1, filters))(se)
|
||||
|
||||
# Squeeze by using C*se_ratio. The size will be 1 x 1 x C*se_ratio
|
||||
se = Conv2D(
|
||||
reduction,
|
||||
kernel_size=1,
|
||||
use_bias=True,
|
||||
kernel_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(se)
|
||||
se = Activation("relu")(se)
|
||||
|
||||
# Excitation using C filters. The size will be 1 x 1 x C
|
||||
se = Conv2D(
|
||||
filters,
|
||||
kernel_size=1,
|
||||
use_bias=True,
|
||||
kernel_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(se)
|
||||
se = Activation("hard_sigmoid")(se)
|
||||
|
||||
return Multiply()([inputs, se])
|
||||
|
||||
|
||||
def ghost_module(inputs, out, convkernel=1, dwkernel=3, add_activation=True):
|
||||
"""
|
||||
Refactored from github.com/HamadYA/GhostFaceNets/blob/main/backbones/ghost_model.py
|
||||
"""
|
||||
conv_out_channel = out // 2
|
||||
cc = Conv2D(
|
||||
conv_out_channel,
|
||||
convkernel,
|
||||
use_bias=False,
|
||||
strides=(1, 1),
|
||||
padding="same",
|
||||
kernel_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(inputs)
|
||||
cc = BatchNormalization(axis=-1)(cc)
|
||||
if add_activation:
|
||||
cc = Activation("relu")(cc)
|
||||
|
||||
nn = DepthwiseConv2D(
|
||||
dwkernel,
|
||||
1,
|
||||
padding="same",
|
||||
use_bias=False,
|
||||
depthwise_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(cc)
|
||||
nn = BatchNormalization(axis=-1)(nn)
|
||||
if add_activation:
|
||||
nn = Activation("relu")(nn)
|
||||
return Concatenate()([cc, nn])
|
||||
|
||||
|
||||
def ghost_bottleneck(inputs, dwkernel, strides, exp, out, reduction, shortcut=True):
|
||||
"""
|
||||
Refactored from github.com/HamadYA/GhostFaceNets/blob/main/backbones/ghost_model.py
|
||||
"""
|
||||
nn = ghost_module(inputs, exp, add_activation=True)
|
||||
if strides > 1:
|
||||
# Extra depth conv if strides higher than 1
|
||||
nn = DepthwiseConv2D(
|
||||
dwkernel,
|
||||
strides,
|
||||
padding="same",
|
||||
use_bias=False,
|
||||
depthwise_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(nn)
|
||||
nn = BatchNormalization(axis=-1)(nn)
|
||||
|
||||
if reduction > 0:
|
||||
# Squeeze and excite
|
||||
nn = se_module(nn, reduction)
|
||||
|
||||
# Point-wise linear projection
|
||||
nn = ghost_module(nn, out, add_activation=False) # ghost2 = GhostModule(exp, out, relu=False)
|
||||
|
||||
if shortcut:
|
||||
xx = DepthwiseConv2D(
|
||||
dwkernel,
|
||||
strides,
|
||||
padding="same",
|
||||
use_bias=False,
|
||||
depthwise_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(inputs)
|
||||
xx = BatchNormalization(axis=-1)(xx)
|
||||
xx = Conv2D(
|
||||
out,
|
||||
(1, 1),
|
||||
strides=(1, 1),
|
||||
padding="valid",
|
||||
use_bias=False,
|
||||
kernel_initializer=keras.initializers.VarianceScaling(
|
||||
scale=2.0, mode="fan_out", distribution="truncated_normal"
|
||||
),
|
||||
)(xx)
|
||||
xx = BatchNormalization(axis=-1)(xx)
|
||||
else:
|
||||
xx = inputs
|
||||
return Add()([xx, nn])
|
||||
|
||||
|
||||
def replace_relu_with_prelu(model) -> Model:
|
||||
"""
|
||||
Replaces relu activation function in the built model with prelu.
|
||||
Refactored from github.com/HamadYA/GhostFaceNets/blob/main/backbones/ghost_model.py
|
||||
Args:
|
||||
model (Model): built model with relu activation functions
|
||||
Returns
|
||||
model (Model): built model with prelu activation functions
|
||||
"""
|
||||
|
||||
def convert_relu(layer):
|
||||
if isinstance(layer, ReLU) or (
|
||||
isinstance(layer, Activation) and layer.activation == keras.activations.relu
|
||||
):
|
||||
layer_name = layer.name.replace("_relu", "_prelu")
|
||||
return PReLU(
|
||||
shared_axes=[1, 2],
|
||||
alpha_initializer=tf.initializers.Constant(0.25),
|
||||
name=layer_name,
|
||||
)
|
||||
return layer
|
||||
|
||||
input_tensors = keras.layers.Input(model.input_shape[1:])
|
||||
return keras.models.clone_model(model, input_tensors=input_tensors, clone_function=convert_relu)
|
||||
|
|
@ -1,394 +0,0 @@
|
|||
# 3rd party dependencies
|
||||
import tensorflow as tf
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 1:
|
||||
from keras.models import Model
|
||||
from keras.layers import Conv2D, ZeroPadding2D, Input, concatenate
|
||||
from keras.layers import Dense, Activation, Lambda, Flatten, BatchNormalization
|
||||
from keras.layers import MaxPooling2D, AveragePooling2D
|
||||
from keras import backend as K
|
||||
else:
|
||||
from tensorflow.keras.models import Model
|
||||
from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Input, concatenate
|
||||
from tensorflow.keras.layers import Dense, Activation, Lambda, Flatten, BatchNormalization
|
||||
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
|
||||
from tensorflow.keras import backend as K
|
||||
|
||||
# pylint: disable=unnecessary-lambda
|
||||
|
||||
# ---------------------------------------
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class OpenFaceClient(FacialRecognition):
|
||||
"""
|
||||
OpenFace model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "OpenFace"
|
||||
self.input_shape = (96, 96)
|
||||
self.output_shape = 128
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/openface_weights.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Consturct OpenFace model, download its weights and load
|
||||
Returns:
|
||||
model (Model)
|
||||
"""
|
||||
myInput = Input(shape=(96, 96, 3))
|
||||
|
||||
x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput)
|
||||
x = Conv2D(64, (7, 7), strides=(2, 2), name="conv1")(x)
|
||||
x = BatchNormalization(axis=3, epsilon=0.00001, name="bn1")(x)
|
||||
x = Activation("relu")(x)
|
||||
x = ZeroPadding2D(padding=(1, 1))(x)
|
||||
x = MaxPooling2D(pool_size=3, strides=2)(x)
|
||||
x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name="lrn_1")(x)
|
||||
x = Conv2D(64, (1, 1), name="conv2")(x)
|
||||
x = BatchNormalization(axis=3, epsilon=0.00001, name="bn2")(x)
|
||||
x = Activation("relu")(x)
|
||||
x = ZeroPadding2D(padding=(1, 1))(x)
|
||||
x = Conv2D(192, (3, 3), name="conv3")(x)
|
||||
x = BatchNormalization(axis=3, epsilon=0.00001, name="bn3")(x)
|
||||
x = Activation("relu")(x)
|
||||
x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name="lrn_2")(x) # x is equal added
|
||||
x = ZeroPadding2D(padding=(1, 1))(x)
|
||||
x = MaxPooling2D(pool_size=3, strides=2)(x)
|
||||
|
||||
# Inception3a
|
||||
inception_3a_3x3 = Conv2D(96, (1, 1), name="inception_3a_3x3_conv1")(x)
|
||||
inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3a_3x3_bn1")(
|
||||
inception_3a_3x3
|
||||
)
|
||||
inception_3a_3x3 = Activation("relu")(inception_3a_3x3)
|
||||
inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3)
|
||||
inception_3a_3x3 = Conv2D(128, (3, 3), name="inception_3a_3x3_conv2")(inception_3a_3x3)
|
||||
inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3a_3x3_bn2")(
|
||||
inception_3a_3x3
|
||||
)
|
||||
inception_3a_3x3 = Activation("relu")(inception_3a_3x3)
|
||||
|
||||
inception_3a_5x5 = Conv2D(16, (1, 1), name="inception_3a_5x5_conv1")(x)
|
||||
inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3a_5x5_bn1")(
|
||||
inception_3a_5x5
|
||||
)
|
||||
inception_3a_5x5 = Activation("relu")(inception_3a_5x5)
|
||||
inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5)
|
||||
inception_3a_5x5 = Conv2D(32, (5, 5), name="inception_3a_5x5_conv2")(inception_3a_5x5)
|
||||
inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3a_5x5_bn2")(
|
||||
inception_3a_5x5
|
||||
)
|
||||
inception_3a_5x5 = Activation("relu")(inception_3a_5x5)
|
||||
|
||||
inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x)
|
||||
inception_3a_pool = Conv2D(32, (1, 1), name="inception_3a_pool_conv")(inception_3a_pool)
|
||||
inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3a_pool_bn")(
|
||||
inception_3a_pool
|
||||
)
|
||||
inception_3a_pool = Activation("relu")(inception_3a_pool)
|
||||
inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool)
|
||||
|
||||
inception_3a_1x1 = Conv2D(64, (1, 1), name="inception_3a_1x1_conv")(x)
|
||||
inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3a_1x1_bn")(
|
||||
inception_3a_1x1
|
||||
)
|
||||
inception_3a_1x1 = Activation("relu")(inception_3a_1x1)
|
||||
|
||||
inception_3a = concatenate(
|
||||
[inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3
|
||||
)
|
||||
|
||||
# Inception3b
|
||||
inception_3b_3x3 = Conv2D(96, (1, 1), name="inception_3b_3x3_conv1")(inception_3a)
|
||||
inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3b_3x3_bn1")(
|
||||
inception_3b_3x3
|
||||
)
|
||||
inception_3b_3x3 = Activation("relu")(inception_3b_3x3)
|
||||
inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3)
|
||||
inception_3b_3x3 = Conv2D(128, (3, 3), name="inception_3b_3x3_conv2")(inception_3b_3x3)
|
||||
inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3b_3x3_bn2")(
|
||||
inception_3b_3x3
|
||||
)
|
||||
inception_3b_3x3 = Activation("relu")(inception_3b_3x3)
|
||||
|
||||
inception_3b_5x5 = Conv2D(32, (1, 1), name="inception_3b_5x5_conv1")(inception_3a)
|
||||
inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3b_5x5_bn1")(
|
||||
inception_3b_5x5
|
||||
)
|
||||
inception_3b_5x5 = Activation("relu")(inception_3b_5x5)
|
||||
inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5)
|
||||
inception_3b_5x5 = Conv2D(64, (5, 5), name="inception_3b_5x5_conv2")(inception_3b_5x5)
|
||||
inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3b_5x5_bn2")(
|
||||
inception_3b_5x5
|
||||
)
|
||||
inception_3b_5x5 = Activation("relu")(inception_3b_5x5)
|
||||
|
||||
inception_3b_pool = Lambda(lambda x: x**2, name="power2_3b")(inception_3a)
|
||||
inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3b_pool)
|
||||
inception_3b_pool = Lambda(lambda x: x * 9, name="mult9_3b")(inception_3b_pool)
|
||||
inception_3b_pool = Lambda(lambda x: K.sqrt(x), name="sqrt_3b")(inception_3b_pool)
|
||||
inception_3b_pool = Conv2D(64, (1, 1), name="inception_3b_pool_conv")(inception_3b_pool)
|
||||
inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3b_pool_bn")(
|
||||
inception_3b_pool
|
||||
)
|
||||
inception_3b_pool = Activation("relu")(inception_3b_pool)
|
||||
inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool)
|
||||
|
||||
inception_3b_1x1 = Conv2D(64, (1, 1), name="inception_3b_1x1_conv")(inception_3a)
|
||||
inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3b_1x1_bn")(
|
||||
inception_3b_1x1
|
||||
)
|
||||
inception_3b_1x1 = Activation("relu")(inception_3b_1x1)
|
||||
|
||||
inception_3b = concatenate(
|
||||
[inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3
|
||||
)
|
||||
|
||||
# Inception3c
|
||||
inception_3c_3x3 = Conv2D(128, (1, 1), strides=(1, 1), name="inception_3c_3x3_conv1")(
|
||||
inception_3b
|
||||
)
|
||||
inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3c_3x3_bn1")(
|
||||
inception_3c_3x3
|
||||
)
|
||||
inception_3c_3x3 = Activation("relu")(inception_3c_3x3)
|
||||
inception_3c_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3c_3x3)
|
||||
inception_3c_3x3 = Conv2D(256, (3, 3), strides=(2, 2), name="inception_3c_3x3_conv" + "2")(
|
||||
inception_3c_3x3
|
||||
)
|
||||
inception_3c_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_3c_3x3_bn" + "2"
|
||||
)(inception_3c_3x3)
|
||||
inception_3c_3x3 = Activation("relu")(inception_3c_3x3)
|
||||
|
||||
inception_3c_5x5 = Conv2D(32, (1, 1), strides=(1, 1), name="inception_3c_5x5_conv1")(
|
||||
inception_3b
|
||||
)
|
||||
inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_3c_5x5_bn1")(
|
||||
inception_3c_5x5
|
||||
)
|
||||
inception_3c_5x5 = Activation("relu")(inception_3c_5x5)
|
||||
inception_3c_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3c_5x5)
|
||||
inception_3c_5x5 = Conv2D(64, (5, 5), strides=(2, 2), name="inception_3c_5x5_conv" + "2")(
|
||||
inception_3c_5x5
|
||||
)
|
||||
inception_3c_5x5 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_3c_5x5_bn" + "2"
|
||||
)(inception_3c_5x5)
|
||||
inception_3c_5x5 = Activation("relu")(inception_3c_5x5)
|
||||
|
||||
inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b)
|
||||
inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool)
|
||||
|
||||
inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3)
|
||||
|
||||
# inception 4a
|
||||
inception_4a_3x3 = Conv2D(96, (1, 1), strides=(1, 1), name="inception_4a_3x3_conv" + "1")(
|
||||
inception_3c
|
||||
)
|
||||
inception_4a_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4a_3x3_bn" + "1"
|
||||
)(inception_4a_3x3)
|
||||
inception_4a_3x3 = Activation("relu")(inception_4a_3x3)
|
||||
inception_4a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4a_3x3)
|
||||
inception_4a_3x3 = Conv2D(192, (3, 3), strides=(1, 1), name="inception_4a_3x3_conv" + "2")(
|
||||
inception_4a_3x3
|
||||
)
|
||||
inception_4a_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4a_3x3_bn" + "2"
|
||||
)(inception_4a_3x3)
|
||||
inception_4a_3x3 = Activation("relu")(inception_4a_3x3)
|
||||
|
||||
inception_4a_5x5 = Conv2D(32, (1, 1), strides=(1, 1), name="inception_4a_5x5_conv1")(
|
||||
inception_3c
|
||||
)
|
||||
inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_4a_5x5_bn1")(
|
||||
inception_4a_5x5
|
||||
)
|
||||
inception_4a_5x5 = Activation("relu")(inception_4a_5x5)
|
||||
inception_4a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_4a_5x5)
|
||||
inception_4a_5x5 = Conv2D(64, (5, 5), strides=(1, 1), name="inception_4a_5x5_conv" + "2")(
|
||||
inception_4a_5x5
|
||||
)
|
||||
inception_4a_5x5 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4a_5x5_bn" + "2"
|
||||
)(inception_4a_5x5)
|
||||
inception_4a_5x5 = Activation("relu")(inception_4a_5x5)
|
||||
|
||||
inception_4a_pool = Lambda(lambda x: x**2, name="power2_4a")(inception_3c)
|
||||
inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4a_pool)
|
||||
inception_4a_pool = Lambda(lambda x: x * 9, name="mult9_4a")(inception_4a_pool)
|
||||
inception_4a_pool = Lambda(lambda x: K.sqrt(x), name="sqrt_4a")(inception_4a_pool)
|
||||
|
||||
inception_4a_pool = Conv2D(128, (1, 1), strides=(1, 1), name="inception_4a_pool_conv" + "")(
|
||||
inception_4a_pool
|
||||
)
|
||||
inception_4a_pool = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4a_pool_bn" + ""
|
||||
)(inception_4a_pool)
|
||||
inception_4a_pool = Activation("relu")(inception_4a_pool)
|
||||
inception_4a_pool = ZeroPadding2D(padding=(2, 2))(inception_4a_pool)
|
||||
|
||||
inception_4a_1x1 = Conv2D(256, (1, 1), strides=(1, 1), name="inception_4a_1x1_conv" + "")(
|
||||
inception_3c
|
||||
)
|
||||
inception_4a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_4a_1x1_bn" + "")(
|
||||
inception_4a_1x1
|
||||
)
|
||||
inception_4a_1x1 = Activation("relu")(inception_4a_1x1)
|
||||
|
||||
inception_4a = concatenate(
|
||||
[inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3
|
||||
)
|
||||
|
||||
# inception4e
|
||||
inception_4e_3x3 = Conv2D(160, (1, 1), strides=(1, 1), name="inception_4e_3x3_conv" + "1")(
|
||||
inception_4a
|
||||
)
|
||||
inception_4e_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4e_3x3_bn" + "1"
|
||||
)(inception_4e_3x3)
|
||||
inception_4e_3x3 = Activation("relu")(inception_4e_3x3)
|
||||
inception_4e_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4e_3x3)
|
||||
inception_4e_3x3 = Conv2D(256, (3, 3), strides=(2, 2), name="inception_4e_3x3_conv" + "2")(
|
||||
inception_4e_3x3
|
||||
)
|
||||
inception_4e_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4e_3x3_bn" + "2"
|
||||
)(inception_4e_3x3)
|
||||
inception_4e_3x3 = Activation("relu")(inception_4e_3x3)
|
||||
|
||||
inception_4e_5x5 = Conv2D(64, (1, 1), strides=(1, 1), name="inception_4e_5x5_conv" + "1")(
|
||||
inception_4a
|
||||
)
|
||||
inception_4e_5x5 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4e_5x5_bn" + "1"
|
||||
)(inception_4e_5x5)
|
||||
inception_4e_5x5 = Activation("relu")(inception_4e_5x5)
|
||||
inception_4e_5x5 = ZeroPadding2D(padding=(2, 2))(inception_4e_5x5)
|
||||
inception_4e_5x5 = Conv2D(128, (5, 5), strides=(2, 2), name="inception_4e_5x5_conv" + "2")(
|
||||
inception_4e_5x5
|
||||
)
|
||||
inception_4e_5x5 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_4e_5x5_bn" + "2"
|
||||
)(inception_4e_5x5)
|
||||
inception_4e_5x5 = Activation("relu")(inception_4e_5x5)
|
||||
|
||||
inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a)
|
||||
inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool)
|
||||
|
||||
inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3)
|
||||
|
||||
# inception5a
|
||||
inception_5a_3x3 = Conv2D(96, (1, 1), strides=(1, 1), name="inception_5a_3x3_conv" + "1")(
|
||||
inception_4e
|
||||
)
|
||||
inception_5a_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_5a_3x3_bn" + "1"
|
||||
)(inception_5a_3x3)
|
||||
inception_5a_3x3 = Activation("relu")(inception_5a_3x3)
|
||||
inception_5a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_5a_3x3)
|
||||
inception_5a_3x3 = Conv2D(384, (3, 3), strides=(1, 1), name="inception_5a_3x3_conv" + "2")(
|
||||
inception_5a_3x3
|
||||
)
|
||||
inception_5a_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_5a_3x3_bn" + "2"
|
||||
)(inception_5a_3x3)
|
||||
inception_5a_3x3 = Activation("relu")(inception_5a_3x3)
|
||||
|
||||
inception_5a_pool = Lambda(lambda x: x**2, name="power2_5a")(inception_4e)
|
||||
inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_5a_pool)
|
||||
inception_5a_pool = Lambda(lambda x: x * 9, name="mult9_5a")(inception_5a_pool)
|
||||
inception_5a_pool = Lambda(lambda x: K.sqrt(x), name="sqrt_5a")(inception_5a_pool)
|
||||
|
||||
inception_5a_pool = Conv2D(96, (1, 1), strides=(1, 1), name="inception_5a_pool_conv" + "")(
|
||||
inception_5a_pool
|
||||
)
|
||||
inception_5a_pool = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_5a_pool_bn" + ""
|
||||
)(inception_5a_pool)
|
||||
inception_5a_pool = Activation("relu")(inception_5a_pool)
|
||||
inception_5a_pool = ZeroPadding2D(padding=(1, 1))(inception_5a_pool)
|
||||
|
||||
inception_5a_1x1 = Conv2D(256, (1, 1), strides=(1, 1), name="inception_5a_1x1_conv" + "")(
|
||||
inception_4e
|
||||
)
|
||||
inception_5a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_5a_1x1_bn" + "")(
|
||||
inception_5a_1x1
|
||||
)
|
||||
inception_5a_1x1 = Activation("relu")(inception_5a_1x1)
|
||||
|
||||
inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3)
|
||||
|
||||
# inception_5b
|
||||
inception_5b_3x3 = Conv2D(96, (1, 1), strides=(1, 1), name="inception_5b_3x3_conv" + "1")(
|
||||
inception_5a
|
||||
)
|
||||
inception_5b_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_5b_3x3_bn" + "1"
|
||||
)(inception_5b_3x3)
|
||||
inception_5b_3x3 = Activation("relu")(inception_5b_3x3)
|
||||
inception_5b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_5b_3x3)
|
||||
inception_5b_3x3 = Conv2D(384, (3, 3), strides=(1, 1), name="inception_5b_3x3_conv" + "2")(
|
||||
inception_5b_3x3
|
||||
)
|
||||
inception_5b_3x3 = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_5b_3x3_bn" + "2"
|
||||
)(inception_5b_3x3)
|
||||
inception_5b_3x3 = Activation("relu")(inception_5b_3x3)
|
||||
|
||||
inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a)
|
||||
|
||||
inception_5b_pool = Conv2D(96, (1, 1), strides=(1, 1), name="inception_5b_pool_conv" + "")(
|
||||
inception_5b_pool
|
||||
)
|
||||
inception_5b_pool = BatchNormalization(
|
||||
axis=3, epsilon=0.00001, name="inception_5b_pool_bn" + ""
|
||||
)(inception_5b_pool)
|
||||
inception_5b_pool = Activation("relu")(inception_5b_pool)
|
||||
|
||||
inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool)
|
||||
|
||||
inception_5b_1x1 = Conv2D(256, (1, 1), strides=(1, 1), name="inception_5b_1x1_conv" + "")(
|
||||
inception_5a
|
||||
)
|
||||
inception_5b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name="inception_5b_1x1_bn" + "")(
|
||||
inception_5b_1x1
|
||||
)
|
||||
inception_5b_1x1 = Activation("relu")(inception_5b_1x1)
|
||||
|
||||
inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3)
|
||||
|
||||
av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b)
|
||||
reshape_layer = Flatten()(av_pool)
|
||||
dense_layer = Dense(128, name="dense_layer")(reshape_layer)
|
||||
norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(dense_layer)
|
||||
|
||||
# Final Model
|
||||
model = Model(inputs=[myInput], outputs=norm_layer)
|
||||
|
||||
# -----------------------------------
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="openface_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
model = weight_utils.load_model_weights(
|
||||
model=model, weight_file=weight_file
|
||||
)
|
||||
|
||||
# -----------------------------------
|
||||
|
||||
return model
|
||||
|
|
@ -1,79 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
import cv2 as cv
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=line-too-long, too-few-public-methods
|
||||
|
||||
|
||||
class SFaceClient(FacialRecognition):
|
||||
"""
|
||||
SFace model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "SFace"
|
||||
self.input_shape = (112, 112)
|
||||
self.output_shape = 128
|
||||
|
||||
def forward(self, img: np.ndarray) -> List[float]:
|
||||
"""
|
||||
Find embeddings with SFace model
|
||||
This model necessitates the override of the forward method
|
||||
because it is not a keras model.
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image in BGR
|
||||
Returns
|
||||
embeddings (list): multi-dimensional vector
|
||||
"""
|
||||
# return self.model.predict(img)[0].tolist()
|
||||
|
||||
# revert the image to original format and preprocess using the model
|
||||
input_blob = (img[0] * 255).astype(np.uint8)
|
||||
|
||||
embeddings = self.model.model.feature(input_blob)
|
||||
|
||||
return embeddings[0].tolist()
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/opencv/opencv_zoo/raw/main/models/face_recognition_sface/face_recognition_sface_2021dec.onnx",
|
||||
) -> Any:
|
||||
"""
|
||||
Construct SFace model, download its weights and load
|
||||
"""
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="face_recognition_sface_2021dec.onnx", source_url=url
|
||||
)
|
||||
|
||||
model = SFaceWrapper(model_path=weight_file)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
class SFaceWrapper:
|
||||
def __init__(self, model_path):
|
||||
"""
|
||||
SFace wrapper covering model construction, layer infos and predict
|
||||
"""
|
||||
try:
|
||||
self.model = cv.FaceRecognizerSF.create(
|
||||
model=model_path, config="", backend_id=0, target_id=0
|
||||
)
|
||||
except Exception as err:
|
||||
raise ValueError(
|
||||
"Exception while calling opencv.FaceRecognizerSF module."
|
||||
+ "This is an optional dependency."
|
||||
+ "You can install it as pip install opencv-contrib-python."
|
||||
) from err
|
||||
|
|
@ -1,162 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import List
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import package_utils, weight_utils
|
||||
from deepface.modules import verification
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# ---------------------------------------
|
||||
|
||||
tf_version = package_utils.get_tf_major_version()
|
||||
if tf_version == 1:
|
||||
from keras.models import Model, Sequential
|
||||
from keras.layers import (
|
||||
Convolution2D,
|
||||
ZeroPadding2D,
|
||||
MaxPooling2D,
|
||||
Flatten,
|
||||
Dropout,
|
||||
Activation,
|
||||
)
|
||||
else:
|
||||
from tensorflow.keras.models import Model, Sequential
|
||||
from tensorflow.keras.layers import (
|
||||
Convolution2D,
|
||||
ZeroPadding2D,
|
||||
MaxPooling2D,
|
||||
Flatten,
|
||||
Dropout,
|
||||
Activation,
|
||||
)
|
||||
|
||||
# ---------------------------------------
|
||||
|
||||
# pylint: disable=too-few-public-methods
|
||||
class VggFaceClient(FacialRecognition):
|
||||
"""
|
||||
VGG-Face model class
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.model = load_model()
|
||||
self.model_name = "VGG-Face"
|
||||
self.input_shape = (224, 224)
|
||||
self.output_shape = 4096
|
||||
|
||||
def forward(self, img: np.ndarray) -> List[float]:
|
||||
"""
|
||||
Generates embeddings using the VGG-Face model.
|
||||
This method incorporates an additional normalization layer,
|
||||
necessitating the override of the forward method.
|
||||
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image in BGR
|
||||
Returns
|
||||
embeddings (list): multi-dimensional vector
|
||||
"""
|
||||
# model.predict causes memory issue when it is called in a for loop
|
||||
# embedding = model.predict(img, verbose=0)[0].tolist()
|
||||
|
||||
# having normalization layer in descriptor troubles for some gpu users (e.g. issue 957, 966)
|
||||
# instead we are now calculating it with traditional way not with keras backend
|
||||
embedding = self.model(img, training=False).numpy()[0].tolist()
|
||||
embedding = verification.l2_normalize(embedding)
|
||||
return embedding.tolist()
|
||||
|
||||
|
||||
def base_model() -> Sequential:
|
||||
"""
|
||||
Base model of VGG-Face being used for classification - not to find embeddings
|
||||
Returns:
|
||||
model (Sequential): model was trained to classify 2622 identities
|
||||
"""
|
||||
model = Sequential()
|
||||
model.add(ZeroPadding2D((1, 1), input_shape=(224, 224, 3)))
|
||||
model.add(Convolution2D(64, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(64, (3, 3), activation="relu"))
|
||||
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
|
||||
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(128, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(128, (3, 3), activation="relu"))
|
||||
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
|
||||
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(256, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(256, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(256, (3, 3), activation="relu"))
|
||||
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
|
||||
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(512, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(512, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(512, (3, 3), activation="relu"))
|
||||
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
|
||||
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(512, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(512, (3, 3), activation="relu"))
|
||||
model.add(ZeroPadding2D((1, 1)))
|
||||
model.add(Convolution2D(512, (3, 3), activation="relu"))
|
||||
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
|
||||
|
||||
model.add(Convolution2D(4096, (7, 7), activation="relu"))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Convolution2D(4096, (1, 1), activation="relu"))
|
||||
model.add(Dropout(0.5))
|
||||
model.add(Convolution2D(2622, (1, 1)))
|
||||
model.add(Flatten())
|
||||
model.add(Activation("softmax"))
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def load_model(
|
||||
url="https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5",
|
||||
) -> Model:
|
||||
"""
|
||||
Final VGG-Face model being used for finding embeddings
|
||||
Returns:
|
||||
model (Model): returning 4096 dimensional vectors
|
||||
"""
|
||||
|
||||
model = base_model()
|
||||
|
||||
weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="vgg_face_weights.h5", source_url=url
|
||||
)
|
||||
|
||||
model = weight_utils.load_model_weights(
|
||||
model=model, weight_file=weight_file
|
||||
)
|
||||
|
||||
# 2622d dimensional model
|
||||
# vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
|
||||
|
||||
# 4096 dimensional model offers 6% to 14% increasement on accuracy!
|
||||
# - softmax causes underfitting
|
||||
# - added normalization layer to avoid underfitting with euclidean
|
||||
# as described here: https://github.com/serengil/deepface/issues/944
|
||||
base_model_output = Sequential()
|
||||
base_model_output = Flatten()(model.layers[-5].output)
|
||||
# keras backend's l2 normalization layer troubles some gpu users (e.g. issue 957, 966)
|
||||
# base_model_output = Lambda(lambda x: K.l2_normalize(x, axis=1), name="norm_layer")(
|
||||
# base_model_output
|
||||
# )
|
||||
vgg_face_descriptor = Model(inputs=model.input, outputs=base_model_output)
|
||||
|
||||
return vgg_face_descriptor
|
||||
|
|
@ -1,215 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Union
|
||||
|
||||
# 3rd party dependencies
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import weight_utils
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=line-too-long, too-few-public-methods, nested-min-max
|
||||
class Fasnet:
|
||||
"""
|
||||
Mini Face Anti Spoofing Net Library from repo: github.com/minivision-ai/Silent-Face-Anti-Spoofing
|
||||
|
||||
Minivision's Silent-Face-Anti-Spoofing Repo licensed under Apache License 2.0
|
||||
Ref: github.com/minivision-ai/Silent-Face-Anti-Spoofing/blob/master/src/model_lib/MiniFASNet.py
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# pytorch is an opitonal dependency, enforce it to be installed if class imported
|
||||
try:
|
||||
import torch
|
||||
except Exception as err:
|
||||
raise ValueError(
|
||||
"You must install torch with `pip install pytorch` command to use face anti spoofing module"
|
||||
) from err
|
||||
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
self.device = device
|
||||
|
||||
# download pre-trained models if not installed yet
|
||||
first_model_weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="2.7_80x80_MiniFASNetV2.pth",
|
||||
source_url="https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/raw/master/resources/anti_spoof_models/2.7_80x80_MiniFASNetV2.pth",
|
||||
)
|
||||
|
||||
second_model_weight_file = weight_utils.download_weights_if_necessary(
|
||||
file_name="4_0_0_80x80_MiniFASNetV1SE.pth",
|
||||
source_url="https://github.com/minivision-ai/Silent-Face-Anti-Spoofing/raw/master/resources/anti_spoof_models/4_0_0_80x80_MiniFASNetV1SE.pth",
|
||||
)
|
||||
|
||||
# guarantees Fasnet imported and torch installed
|
||||
from deepface.models.spoofing import FasNetBackbone
|
||||
|
||||
# Fasnet will use 2 distinct models to predict, then it will find the sum of predictions
|
||||
# to make a final prediction
|
||||
|
||||
first_model = FasNetBackbone.MiniFASNetV2(conv6_kernel=(5, 5)).to(device)
|
||||
second_model = FasNetBackbone.MiniFASNetV1SE(conv6_kernel=(5, 5)).to(device)
|
||||
|
||||
# load model weight for first model
|
||||
state_dict = torch.load(first_model_weight_file, map_location=device)
|
||||
keys = iter(state_dict)
|
||||
first_layer_name = keys.__next__()
|
||||
|
||||
if first_layer_name.find("module.") >= 0:
|
||||
from collections import OrderedDict
|
||||
|
||||
new_state_dict = OrderedDict()
|
||||
for key, value in state_dict.items():
|
||||
name_key = key[7:]
|
||||
new_state_dict[name_key] = value
|
||||
first_model.load_state_dict(new_state_dict)
|
||||
else:
|
||||
first_model.load_state_dict(state_dict)
|
||||
|
||||
# load model weight for second model
|
||||
state_dict = torch.load(second_model_weight_file, map_location=device)
|
||||
keys = iter(state_dict)
|
||||
first_layer_name = keys.__next__()
|
||||
|
||||
if first_layer_name.find("module.") >= 0:
|
||||
from collections import OrderedDict
|
||||
|
||||
new_state_dict = OrderedDict()
|
||||
for key, value in state_dict.items():
|
||||
name_key = key[7:]
|
||||
new_state_dict[name_key] = value
|
||||
second_model.load_state_dict(new_state_dict)
|
||||
else:
|
||||
second_model.load_state_dict(state_dict)
|
||||
|
||||
# evaluate models
|
||||
_ = first_model.eval()
|
||||
_ = second_model.eval()
|
||||
|
||||
self.first_model = first_model
|
||||
self.second_model = second_model
|
||||
|
||||
def analyze(self, img: np.ndarray, facial_area: Union[list, tuple]):
|
||||
"""
|
||||
Analyze a given image spoofed or not
|
||||
Args:
|
||||
img (np.ndarray): pre loaded image
|
||||
facial_area (list or tuple): facial rectangle area coordinates with x, y, w, h respectively
|
||||
Returns:
|
||||
result (tuple): a result tuple consisting of is_real and score
|
||||
"""
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
|
||||
x, y, w, h = facial_area
|
||||
first_img = crop(img, (x, y, w, h), 2.7, 80, 80)
|
||||
second_img = crop(img, (x, y, w, h), 4, 80, 80)
|
||||
|
||||
test_transform = Compose(
|
||||
[
|
||||
ToTensor(),
|
||||
]
|
||||
)
|
||||
|
||||
first_img = test_transform(first_img)
|
||||
first_img = first_img.unsqueeze(0).to(self.device)
|
||||
|
||||
second_img = test_transform(second_img)
|
||||
second_img = second_img.unsqueeze(0).to(self.device)
|
||||
|
||||
with torch.no_grad():
|
||||
first_result = self.first_model.forward(first_img)
|
||||
first_result = F.softmax(first_result).cpu().numpy()
|
||||
|
||||
second_result = self.second_model.forward(second_img)
|
||||
second_result = F.softmax(second_result).cpu().numpy()
|
||||
|
||||
prediction = np.zeros((1, 3))
|
||||
prediction += first_result
|
||||
prediction += second_result
|
||||
|
||||
label = np.argmax(prediction)
|
||||
is_real = True if label == 1 else False # pylint: disable=simplifiable-if-expression
|
||||
score = prediction[0][label] / 2
|
||||
|
||||
return is_real, score
|
||||
|
||||
|
||||
# subsdiary classes and functions
|
||||
|
||||
|
||||
def to_tensor(pic):
|
||||
"""Convert a ``numpy.ndarray`` to tensor.
|
||||
|
||||
See ``ToTensor`` for more details.
|
||||
|
||||
Args:
|
||||
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
|
||||
|
||||
Returns:
|
||||
Tensor: Converted image.
|
||||
"""
|
||||
import torch
|
||||
|
||||
# handle numpy array
|
||||
# IR image channel=1: modify by lzc --> 20190730
|
||||
if pic.ndim == 2:
|
||||
pic = pic.reshape((pic.shape[0], pic.shape[1], 1))
|
||||
|
||||
img = torch.from_numpy(pic.transpose((2, 0, 1)))
|
||||
# backward compatibility
|
||||
# return img.float().div(255) modify by zkx
|
||||
return img.float()
|
||||
|
||||
|
||||
class Compose:
|
||||
def __init__(self, transforms):
|
||||
self.transforms = transforms
|
||||
|
||||
def __call__(self, img):
|
||||
for t in self.transforms:
|
||||
img = t(img)
|
||||
return img
|
||||
|
||||
|
||||
class ToTensor:
|
||||
def __call__(self, pic):
|
||||
return to_tensor(pic)
|
||||
|
||||
|
||||
def _get_new_box(src_w, src_h, bbox, scale):
|
||||
x = bbox[0]
|
||||
y = bbox[1]
|
||||
box_w = bbox[2]
|
||||
box_h = bbox[3]
|
||||
scale = min((src_h - 1) / box_h, min((src_w - 1) / box_w, scale))
|
||||
new_width = box_w * scale
|
||||
new_height = box_h * scale
|
||||
center_x, center_y = box_w / 2 + x, box_h / 2 + y
|
||||
left_top_x = center_x - new_width / 2
|
||||
left_top_y = center_y - new_height / 2
|
||||
right_bottom_x = center_x + new_width / 2
|
||||
right_bottom_y = center_y + new_height / 2
|
||||
if left_top_x < 0:
|
||||
right_bottom_x -= left_top_x
|
||||
left_top_x = 0
|
||||
if left_top_y < 0:
|
||||
right_bottom_y -= left_top_y
|
||||
left_top_y = 0
|
||||
if right_bottom_x > src_w - 1:
|
||||
left_top_x -= right_bottom_x - src_w + 1
|
||||
right_bottom_x = src_w - 1
|
||||
if right_bottom_y > src_h - 1:
|
||||
left_top_y -= right_bottom_y - src_h + 1
|
||||
right_bottom_y = src_h - 1
|
||||
return int(left_top_x), int(left_top_y), int(right_bottom_x), int(right_bottom_y)
|
||||
|
||||
|
||||
def crop(org_img, bbox, scale, out_w, out_h):
|
||||
src_h, src_w, _ = np.shape(org_img)
|
||||
left_top_x, left_top_y, right_bottom_x, right_bottom_y = _get_new_box(src_w, src_h, bbox, scale)
|
||||
img = org_img[left_top_y : right_bottom_y + 1, left_top_x : right_bottom_x + 1]
|
||||
dst_img = cv2.resize(img, (out_w, out_h))
|
||||
return dst_img
|
||||
|
|
@ -1,524 +0,0 @@
|
|||
# These classes are copied from Minivision's Silent-Face-Anti-Spoofing Repo
|
||||
# licensed under Apache License 2.0
|
||||
# Ref: github.com/minivision-ai/Silent-Face-Anti-Spoofing/blob/master/src/model_lib/MiniFASNet.py
|
||||
|
||||
# 3rd party dependencies
|
||||
import torch
|
||||
from torch.nn import (
|
||||
Linear,
|
||||
Conv2d,
|
||||
BatchNorm1d,
|
||||
BatchNorm2d,
|
||||
PReLU,
|
||||
ReLU,
|
||||
Sigmoid,
|
||||
AdaptiveAvgPool2d,
|
||||
Sequential,
|
||||
Module,
|
||||
)
|
||||
|
||||
# pylint: disable=super-with-arguments, too-many-instance-attributes, unused-argument, redefined-builtin, too-few-public-methods
|
||||
|
||||
keep_dict = {
|
||||
"1.8M": [
|
||||
32,
|
||||
32,
|
||||
103,
|
||||
103,
|
||||
64,
|
||||
13,
|
||||
13,
|
||||
64,
|
||||
26,
|
||||
26,
|
||||
64,
|
||||
13,
|
||||
13,
|
||||
64,
|
||||
52,
|
||||
52,
|
||||
64,
|
||||
231,
|
||||
231,
|
||||
128,
|
||||
154,
|
||||
154,
|
||||
128,
|
||||
52,
|
||||
52,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
52,
|
||||
52,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
308,
|
||||
308,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
512,
|
||||
512,
|
||||
],
|
||||
"1.8M_": [
|
||||
32,
|
||||
32,
|
||||
103,
|
||||
103,
|
||||
64,
|
||||
13,
|
||||
13,
|
||||
64,
|
||||
13,
|
||||
13,
|
||||
64,
|
||||
13,
|
||||
13,
|
||||
64,
|
||||
13,
|
||||
13,
|
||||
64,
|
||||
231,
|
||||
231,
|
||||
128,
|
||||
231,
|
||||
231,
|
||||
128,
|
||||
52,
|
||||
52,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
77,
|
||||
77,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
308,
|
||||
308,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
26,
|
||||
26,
|
||||
128,
|
||||
512,
|
||||
512,
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def MiniFASNetV2(embedding_size=128, conv6_kernel=(7, 7), drop_p=0.2, num_classes=3, img_channel=3):
|
||||
return MiniFASNet(
|
||||
keep_dict["1.8M_"], embedding_size, conv6_kernel, drop_p, num_classes, img_channel
|
||||
)
|
||||
|
||||
|
||||
def MiniFASNetV1SE(
|
||||
embedding_size=128, conv6_kernel=(7, 7), drop_p=0.75, num_classes=3, img_channel=3
|
||||
):
|
||||
return MiniFASNetSE(
|
||||
keep_dict["1.8M"], embedding_size, conv6_kernel, drop_p, num_classes, img_channel
|
||||
)
|
||||
|
||||
|
||||
class Flatten(Module):
|
||||
def forward(self, input):
|
||||
return input.view(input.size(0), -1)
|
||||
|
||||
|
||||
class Conv_block(Module):
|
||||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||
super(Conv_block, self).__init__()
|
||||
self.conv = Conv2d(
|
||||
in_c,
|
||||
out_c,
|
||||
kernel_size=kernel,
|
||||
groups=groups,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias=False,
|
||||
)
|
||||
self.bn = BatchNorm2d(out_c)
|
||||
self.prelu = PReLU(out_c)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
x = self.prelu(x)
|
||||
return x
|
||||
|
||||
|
||||
class Linear_block(Module):
|
||||
def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
|
||||
super(Linear_block, self).__init__()
|
||||
self.conv = Conv2d(
|
||||
in_c,
|
||||
out_channels=out_c,
|
||||
kernel_size=kernel,
|
||||
groups=groups,
|
||||
stride=stride,
|
||||
padding=padding,
|
||||
bias=False,
|
||||
)
|
||||
self.bn = BatchNorm2d(out_c)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv(x)
|
||||
x = self.bn(x)
|
||||
return x
|
||||
|
||||
|
||||
class Depth_Wise(Module):
|
||||
def __init__(
|
||||
self, c1, c2, c3, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1
|
||||
):
|
||||
super(Depth_Wise, self).__init__()
|
||||
c1_in, c1_out = c1
|
||||
c2_in, c2_out = c2
|
||||
c3_in, c3_out = c3
|
||||
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.conv_dw = Conv_block(
|
||||
c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride
|
||||
)
|
||||
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.residual = residual
|
||||
|
||||
def forward(self, x):
|
||||
if self.residual:
|
||||
short_cut = x
|
||||
x = self.conv(x)
|
||||
x = self.conv_dw(x)
|
||||
x = self.project(x)
|
||||
if self.residual:
|
||||
output = short_cut + x
|
||||
else:
|
||||
output = x
|
||||
return output
|
||||
|
||||
|
||||
class Depth_Wise_SE(Module):
|
||||
def __init__(
|
||||
self,
|
||||
c1,
|
||||
c2,
|
||||
c3,
|
||||
residual=False,
|
||||
kernel=(3, 3),
|
||||
stride=(2, 2),
|
||||
padding=(1, 1),
|
||||
groups=1,
|
||||
se_reduct=8,
|
||||
):
|
||||
super(Depth_Wise_SE, self).__init__()
|
||||
c1_in, c1_out = c1
|
||||
c2_in, c2_out = c2
|
||||
c3_in, c3_out = c3
|
||||
self.conv = Conv_block(c1_in, out_c=c1_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.conv_dw = Conv_block(
|
||||
c2_in, c2_out, groups=c2_in, kernel=kernel, padding=padding, stride=stride
|
||||
)
|
||||
self.project = Linear_block(c3_in, c3_out, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
|
||||
self.residual = residual
|
||||
self.se_module = SEModule(c3_out, se_reduct)
|
||||
|
||||
def forward(self, x):
|
||||
if self.residual:
|
||||
short_cut = x
|
||||
x = self.conv(x)
|
||||
x = self.conv_dw(x)
|
||||
x = self.project(x)
|
||||
if self.residual:
|
||||
x = self.se_module(x)
|
||||
output = short_cut + x
|
||||
else:
|
||||
output = x
|
||||
return output
|
||||
|
||||
|
||||
class SEModule(Module):
|
||||
def __init__(self, channels, reduction):
|
||||
super(SEModule, self).__init__()
|
||||
self.avg_pool = AdaptiveAvgPool2d(1)
|
||||
self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False)
|
||||
self.bn1 = BatchNorm2d(channels // reduction)
|
||||
self.relu = ReLU(inplace=True)
|
||||
self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False)
|
||||
self.bn2 = BatchNorm2d(channels)
|
||||
self.sigmoid = Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
module_input = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.fc1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
x = self.bn2(x)
|
||||
x = self.sigmoid(x)
|
||||
return module_input * x
|
||||
|
||||
|
||||
class Residual(Module):
|
||||
def __init__(self, c1, c2, c3, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
|
||||
super(Residual, self).__init__()
|
||||
modules = []
|
||||
for i in range(num_block):
|
||||
c1_tuple = c1[i]
|
||||
c2_tuple = c2[i]
|
||||
c3_tuple = c3[i]
|
||||
modules.append(
|
||||
Depth_Wise(
|
||||
c1_tuple,
|
||||
c2_tuple,
|
||||
c3_tuple,
|
||||
residual=True,
|
||||
kernel=kernel,
|
||||
padding=padding,
|
||||
stride=stride,
|
||||
groups=groups,
|
||||
)
|
||||
)
|
||||
self.model = Sequential(*modules)
|
||||
|
||||
def forward(self, x):
|
||||
return self.model(x)
|
||||
|
||||
|
||||
class ResidualSE(Module):
|
||||
def __init__(
|
||||
self,
|
||||
c1,
|
||||
c2,
|
||||
c3,
|
||||
num_block,
|
||||
groups,
|
||||
kernel=(3, 3),
|
||||
stride=(1, 1),
|
||||
padding=(1, 1),
|
||||
se_reduct=4,
|
||||
):
|
||||
super(ResidualSE, self).__init__()
|
||||
modules = []
|
||||
for i in range(num_block):
|
||||
c1_tuple = c1[i]
|
||||
c2_tuple = c2[i]
|
||||
c3_tuple = c3[i]
|
||||
if i == num_block - 1:
|
||||
modules.append(
|
||||
Depth_Wise_SE(
|
||||
c1_tuple,
|
||||
c2_tuple,
|
||||
c3_tuple,
|
||||
residual=True,
|
||||
kernel=kernel,
|
||||
padding=padding,
|
||||
stride=stride,
|
||||
groups=groups,
|
||||
se_reduct=se_reduct,
|
||||
)
|
||||
)
|
||||
else:
|
||||
modules.append(
|
||||
Depth_Wise(
|
||||
c1_tuple,
|
||||
c2_tuple,
|
||||
c3_tuple,
|
||||
residual=True,
|
||||
kernel=kernel,
|
||||
padding=padding,
|
||||
stride=stride,
|
||||
groups=groups,
|
||||
)
|
||||
)
|
||||
self.model = Sequential(*modules)
|
||||
|
||||
def forward(self, x):
|
||||
return self.model(x)
|
||||
|
||||
|
||||
class MiniFASNet(Module):
|
||||
def __init__(
|
||||
self, keep, embedding_size, conv6_kernel=(7, 7), drop_p=0.0, num_classes=3, img_channel=3
|
||||
):
|
||||
super(MiniFASNet, self).__init__()
|
||||
self.embedding_size = embedding_size
|
||||
|
||||
self.conv1 = Conv_block(img_channel, keep[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1))
|
||||
self.conv2_dw = Conv_block(
|
||||
keep[0], keep[1], kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=keep[1]
|
||||
)
|
||||
|
||||
c1 = [(keep[1], keep[2])]
|
||||
c2 = [(keep[2], keep[3])]
|
||||
c3 = [(keep[3], keep[4])]
|
||||
|
||||
self.conv_23 = Depth_Wise(
|
||||
c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[3]
|
||||
)
|
||||
|
||||
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
|
||||
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
|
||||
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
|
||||
|
||||
self.conv_3 = Residual(
|
||||
c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1)
|
||||
)
|
||||
|
||||
c1 = [(keep[16], keep[17])]
|
||||
c2 = [(keep[17], keep[18])]
|
||||
c3 = [(keep[18], keep[19])]
|
||||
|
||||
self.conv_34 = Depth_Wise(
|
||||
c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[19]
|
||||
)
|
||||
|
||||
c1 = [
|
||||
(keep[19], keep[20]),
|
||||
(keep[22], keep[23]),
|
||||
(keep[25], keep[26]),
|
||||
(keep[28], keep[29]),
|
||||
(keep[31], keep[32]),
|
||||
(keep[34], keep[35]),
|
||||
]
|
||||
c2 = [
|
||||
(keep[20], keep[21]),
|
||||
(keep[23], keep[24]),
|
||||
(keep[26], keep[27]),
|
||||
(keep[29], keep[30]),
|
||||
(keep[32], keep[33]),
|
||||
(keep[35], keep[36]),
|
||||
]
|
||||
c3 = [
|
||||
(keep[21], keep[22]),
|
||||
(keep[24], keep[25]),
|
||||
(keep[27], keep[28]),
|
||||
(keep[30], keep[31]),
|
||||
(keep[33], keep[34]),
|
||||
(keep[36], keep[37]),
|
||||
]
|
||||
|
||||
self.conv_4 = Residual(
|
||||
c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1)
|
||||
)
|
||||
|
||||
c1 = [(keep[37], keep[38])]
|
||||
c2 = [(keep[38], keep[39])]
|
||||
c3 = [(keep[39], keep[40])]
|
||||
|
||||
self.conv_45 = Depth_Wise(
|
||||
c1[0], c2[0], c3[0], kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=keep[40]
|
||||
)
|
||||
|
||||
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
|
||||
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
|
||||
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
|
||||
|
||||
self.conv_5 = Residual(
|
||||
c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1)
|
||||
)
|
||||
self.conv_6_sep = Conv_block(
|
||||
keep[46], keep[47], kernel=(1, 1), stride=(1, 1), padding=(0, 0)
|
||||
)
|
||||
self.conv_6_dw = Linear_block(
|
||||
keep[47], keep[48], groups=keep[48], kernel=conv6_kernel, stride=(1, 1), padding=(0, 0)
|
||||
)
|
||||
self.conv_6_flatten = Flatten()
|
||||
self.linear = Linear(512, embedding_size, bias=False)
|
||||
self.bn = BatchNorm1d(embedding_size)
|
||||
self.drop = torch.nn.Dropout(p=drop_p)
|
||||
self.prob = Linear(embedding_size, num_classes, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
out = self.conv1(x)
|
||||
out = self.conv2_dw(out)
|
||||
out = self.conv_23(out)
|
||||
out = self.conv_3(out)
|
||||
out = self.conv_34(out)
|
||||
out = self.conv_4(out)
|
||||
out = self.conv_45(out)
|
||||
out = self.conv_5(out)
|
||||
out = self.conv_6_sep(out)
|
||||
out = self.conv_6_dw(out)
|
||||
out = self.conv_6_flatten(out)
|
||||
if self.embedding_size != 512:
|
||||
out = self.linear(out)
|
||||
out = self.bn(out)
|
||||
out = self.drop(out)
|
||||
out = self.prob(out)
|
||||
return out
|
||||
|
||||
|
||||
class MiniFASNetSE(MiniFASNet):
|
||||
def __init__(
|
||||
self, keep, embedding_size, conv6_kernel=(7, 7), drop_p=0.75, num_classes=4, img_channel=3
|
||||
):
|
||||
super(MiniFASNetSE, self).__init__(
|
||||
keep=keep,
|
||||
embedding_size=embedding_size,
|
||||
conv6_kernel=conv6_kernel,
|
||||
drop_p=drop_p,
|
||||
num_classes=num_classes,
|
||||
img_channel=img_channel,
|
||||
)
|
||||
|
||||
c1 = [(keep[4], keep[5]), (keep[7], keep[8]), (keep[10], keep[11]), (keep[13], keep[14])]
|
||||
c2 = [(keep[5], keep[6]), (keep[8], keep[9]), (keep[11], keep[12]), (keep[14], keep[15])]
|
||||
c3 = [(keep[6], keep[7]), (keep[9], keep[10]), (keep[12], keep[13]), (keep[15], keep[16])]
|
||||
|
||||
self.conv_3 = ResidualSE(
|
||||
c1, c2, c3, num_block=4, groups=keep[4], kernel=(3, 3), stride=(1, 1), padding=(1, 1)
|
||||
)
|
||||
|
||||
c1 = [
|
||||
(keep[19], keep[20]),
|
||||
(keep[22], keep[23]),
|
||||
(keep[25], keep[26]),
|
||||
(keep[28], keep[29]),
|
||||
(keep[31], keep[32]),
|
||||
(keep[34], keep[35]),
|
||||
]
|
||||
c2 = [
|
||||
(keep[20], keep[21]),
|
||||
(keep[23], keep[24]),
|
||||
(keep[26], keep[27]),
|
||||
(keep[29], keep[30]),
|
||||
(keep[32], keep[33]),
|
||||
(keep[35], keep[36]),
|
||||
]
|
||||
c3 = [
|
||||
(keep[21], keep[22]),
|
||||
(keep[24], keep[25]),
|
||||
(keep[27], keep[28]),
|
||||
(keep[30], keep[31]),
|
||||
(keep[33], keep[34]),
|
||||
(keep[36], keep[37]),
|
||||
]
|
||||
|
||||
self.conv_4 = ResidualSE(
|
||||
c1, c2, c3, num_block=6, groups=keep[19], kernel=(3, 3), stride=(1, 1), padding=(1, 1)
|
||||
)
|
||||
|
||||
c1 = [(keep[40], keep[41]), (keep[43], keep[44])]
|
||||
c2 = [(keep[41], keep[42]), (keep[44], keep[45])]
|
||||
c3 = [(keep[42], keep[43]), (keep[45], keep[46])]
|
||||
self.conv_5 = ResidualSE(
|
||||
c1, c2, c3, num_block=2, groups=keep[40], kernel=(3, 3), stride=(1, 1), padding=(1, 1)
|
||||
)
|
||||
|
|
@ -1,212 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
# project dependencies
|
||||
from deepface.modules import modeling, detection, preprocessing
|
||||
from deepface.models.demography import Gender, Race, Emotion
|
||||
|
||||
|
||||
def analyze(
|
||||
img_path: Union[str, np.ndarray],
|
||||
actions: Union[tuple, list] = ("emotion", "age", "gender", "race"),
|
||||
enforce_detection: bool = True,
|
||||
detector_backend: str = "opencv",
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
silent: bool = False,
|
||||
anti_spoofing: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Analyze facial attributes such as age, gender, emotion, and race in the provided image.
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
|
||||
or a base64 encoded image. If the source image contains multiple faces, the result will
|
||||
include information for each detected face.
|
||||
|
||||
actions (tuple): Attributes to analyze. The default is ('age', 'gender', 'emotion', 'race').
|
||||
You can exclude some of these attributes from the analysis if needed.
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv).
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2' (default is cosine).
|
||||
|
||||
align (boolean): Perform alignment based on the eye positions (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
silent (boolean): Suppress or allow some log messages for a quieter analysis process
|
||||
(default is False).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary represents
|
||||
the analysis results for a detected face.
|
||||
|
||||
Each dictionary in the list contains the following keys:
|
||||
|
||||
- 'region' (dict): Represents the rectangular region of the detected face in the image.
|
||||
- 'x': x-coordinate of the top-left corner of the face.
|
||||
- 'y': y-coordinate of the top-left corner of the face.
|
||||
- 'w': Width of the detected face region.
|
||||
- 'h': Height of the detected face region.
|
||||
|
||||
- 'age' (float): Estimated age of the detected face.
|
||||
|
||||
- 'face_confidence' (float): Confidence score for the detected face.
|
||||
Indicates the reliability of the face detection.
|
||||
|
||||
- 'dominant_gender' (str): The dominant gender in the detected face.
|
||||
Either "Man" or "Woman."
|
||||
|
||||
- 'gender' (dict): Confidence scores for each gender category.
|
||||
- 'Man': Confidence score for the male gender.
|
||||
- 'Woman': Confidence score for the female gender.
|
||||
|
||||
- 'dominant_emotion' (str): The dominant emotion in the detected face.
|
||||
Possible values include "sad," "angry," "surprise," "fear," "happy,"
|
||||
"disgust," and "neutral."
|
||||
|
||||
- 'emotion' (dict): Confidence scores for each emotion category.
|
||||
- 'sad': Confidence score for sadness.
|
||||
- 'angry': Confidence score for anger.
|
||||
- 'surprise': Confidence score for surprise.
|
||||
- 'fear': Confidence score for fear.
|
||||
- 'happy': Confidence score for happiness.
|
||||
- 'disgust': Confidence score for disgust.
|
||||
- 'neutral': Confidence score for neutrality.
|
||||
|
||||
- 'dominant_race' (str): The dominant race in the detected face.
|
||||
Possible values include "indian," "asian," "latino hispanic,"
|
||||
"black," "middle eastern," and "white."
|
||||
|
||||
- 'race' (dict): Confidence scores for each race category.
|
||||
- 'indian': Confidence score for Indian ethnicity.
|
||||
- 'asian': Confidence score for Asian ethnicity.
|
||||
- 'latino hispanic': Confidence score for Latino/Hispanic ethnicity.
|
||||
- 'black': Confidence score for Black ethnicity.
|
||||
- 'middle eastern': Confidence score for Middle Eastern ethnicity.
|
||||
- 'white': Confidence score for White ethnicity.
|
||||
"""
|
||||
|
||||
# if actions is passed as tuple with single item, interestingly it becomes str here
|
||||
if isinstance(actions, str):
|
||||
actions = (actions,)
|
||||
|
||||
# check if actions is not an iterable or empty.
|
||||
if not hasattr(actions, "__getitem__") or not actions:
|
||||
raise ValueError("`actions` must be a list of strings.")
|
||||
|
||||
actions = list(actions)
|
||||
|
||||
# For each action, check if it is valid
|
||||
for action in actions:
|
||||
if action not in ("emotion", "age", "gender", "race"):
|
||||
raise ValueError(
|
||||
f"Invalid action passed ({repr(action)})). "
|
||||
"Valid actions are `emotion`, `age`, `gender`, `race`."
|
||||
)
|
||||
# ---------------------------------
|
||||
resp_objects = []
|
||||
|
||||
img_objs = detection.extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
grayscale=False,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
for img_obj in img_objs:
|
||||
if anti_spoofing is True and img_obj.get("is_real", True) is False:
|
||||
raise ValueError("Spoof detected in the given image.")
|
||||
|
||||
img_content = img_obj["face"]
|
||||
img_region = img_obj["facial_area"]
|
||||
img_confidence = img_obj["confidence"]
|
||||
if img_content.shape[0] == 0 or img_content.shape[1] == 0:
|
||||
continue
|
||||
|
||||
# rgb to bgr
|
||||
img_content = img_content[:, :, ::-1]
|
||||
|
||||
# resize input image
|
||||
img_content = preprocessing.resize_image(img=img_content, target_size=(224, 224))
|
||||
|
||||
obj = {}
|
||||
# facial attribute analysis
|
||||
pbar = tqdm(
|
||||
range(0, len(actions)),
|
||||
desc="Finding actions",
|
||||
disable=silent if len(actions) > 1 else True,
|
||||
)
|
||||
for index in pbar:
|
||||
action = actions[index]
|
||||
pbar.set_description(f"Action: {action}")
|
||||
|
||||
if action == "emotion":
|
||||
emotion_predictions = modeling.build_model(
|
||||
task="facial_attribute", model_name="Emotion"
|
||||
).predict(img_content)
|
||||
sum_of_predictions = emotion_predictions.sum()
|
||||
|
||||
obj["emotion"] = {}
|
||||
for i, emotion_label in enumerate(Emotion.labels):
|
||||
emotion_prediction = 100 * emotion_predictions[i] / sum_of_predictions
|
||||
obj["emotion"][emotion_label] = emotion_prediction
|
||||
|
||||
obj["dominant_emotion"] = Emotion.labels[np.argmax(emotion_predictions)]
|
||||
|
||||
elif action == "age":
|
||||
apparent_age = modeling.build_model(
|
||||
task="facial_attribute", model_name="Age"
|
||||
).predict(img_content)
|
||||
# int cast is for exception - object of type 'float32' is not JSON serializable
|
||||
obj["age"] = int(apparent_age)
|
||||
|
||||
elif action == "gender":
|
||||
gender_predictions = modeling.build_model(
|
||||
task="facial_attribute", model_name="Gender"
|
||||
).predict(img_content)
|
||||
obj["gender"] = {}
|
||||
for i, gender_label in enumerate(Gender.labels):
|
||||
gender_prediction = 100 * gender_predictions[i]
|
||||
obj["gender"][gender_label] = gender_prediction
|
||||
|
||||
obj["dominant_gender"] = Gender.labels[np.argmax(gender_predictions)]
|
||||
|
||||
elif action == "race":
|
||||
race_predictions = modeling.build_model(
|
||||
task="facial_attribute", model_name="Race"
|
||||
).predict(img_content)
|
||||
sum_of_predictions = race_predictions.sum()
|
||||
|
||||
obj["race"] = {}
|
||||
for i, race_label in enumerate(Race.labels):
|
||||
race_prediction = 100 * race_predictions[i] / sum_of_predictions
|
||||
obj["race"][race_label] = race_prediction
|
||||
|
||||
obj["dominant_race"] = Race.labels[np.argmax(race_predictions)]
|
||||
|
||||
# -----------------------------
|
||||
# mention facial areas
|
||||
obj["region"] = img_region
|
||||
# include image confidence
|
||||
obj["face_confidence"] = img_confidence
|
||||
|
||||
resp_objects.append(obj)
|
||||
|
||||
return resp_objects
|
||||
|
|
@ -1,410 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, Dict, List, Tuple, Union, Optional
|
||||
|
||||
# 3rd part dependencies
|
||||
from heapq import nlargest
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
# project dependencies
|
||||
from deepface.modules import modeling
|
||||
from deepface.models.Detector import Detector, DetectedFace, FacialAreaRegion
|
||||
from deepface.commons import image_utils
|
||||
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
# pylint: disable=no-else-raise
|
||||
|
||||
|
||||
def extract_faces(
|
||||
img_path: Union[str, np.ndarray],
|
||||
detector_backend: str = "opencv",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
grayscale: bool = False,
|
||||
color_face: str = "rgb",
|
||||
normalize_face: bool = True,
|
||||
anti_spoofing: bool = False,
|
||||
max_faces: Optional[int] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract faces from a given image
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): Path to the first image. Accepts exact image path
|
||||
as a string, numpy array (BGR), or base64 encoded images.
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv)
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Default is True. Set to False to avoid the exception for low-resolution images.
|
||||
|
||||
align (bool): Flag to enable face alignment (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage.
|
||||
|
||||
grayscale (boolean): (Deprecated) Flag to convert the output face image to grayscale
|
||||
(default is False).
|
||||
|
||||
color_face (string): Color to return face image output. Options: 'rgb', 'bgr' or 'gray'
|
||||
(default is 'rgb').
|
||||
|
||||
normalize_face (boolean): Flag to enable normalization (divide by 255) of the output
|
||||
face image output face image normalization (default is True).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
results (List[Dict[str, Any]]): A list of dictionaries, where each dictionary contains:
|
||||
|
||||
- "face" (np.ndarray): The detected face as a NumPy array in RGB format.
|
||||
|
||||
- "facial_area" (Dict[str, Any]): The detected face's regions as a dictionary containing:
|
||||
- keys 'x', 'y', 'w', 'h' with int values
|
||||
- keys 'left_eye', 'right_eye' with a tuple of 2 ints as values.
|
||||
left eye and right eye are eyes on the left and right respectively with respect
|
||||
to the person itself instead of observer.
|
||||
|
||||
- "confidence" (float): The confidence score associated with the detected face.
|
||||
|
||||
- "is_real" (boolean): antispoofing analyze result. this key is just available in the
|
||||
result only if anti_spoofing is set to True in input arguments.
|
||||
|
||||
- "antispoof_score" (float): score of antispoofing analyze result. this key is
|
||||
just available in the result only if anti_spoofing is set to True in input arguments.
|
||||
"""
|
||||
|
||||
resp_objs = []
|
||||
|
||||
# img might be path, base64 or numpy array. Convert it to numpy whatever it is.
|
||||
img, img_name = image_utils.load_image(img_path)
|
||||
|
||||
if img is None:
|
||||
raise ValueError(f"Exception while loading {img_name}")
|
||||
|
||||
height, width, _ = img.shape
|
||||
|
||||
base_region = FacialAreaRegion(x=0, y=0, w=width, h=height, confidence=0)
|
||||
|
||||
if detector_backend == "skip":
|
||||
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
|
||||
else:
|
||||
face_objs = detect_faces(
|
||||
detector_backend=detector_backend,
|
||||
img=img,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
max_faces=max_faces,
|
||||
)
|
||||
|
||||
# in case of no face found
|
||||
if len(face_objs) == 0 and enforce_detection is True:
|
||||
if img_name is not None:
|
||||
raise ValueError(
|
||||
f"Face could not be detected in {img_name}."
|
||||
"Please confirm that the picture is a face photo "
|
||||
"or consider to set enforce_detection param to False."
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Face could not be detected. Please confirm that the picture is a face photo "
|
||||
"or consider to set enforce_detection param to False."
|
||||
)
|
||||
|
||||
if len(face_objs) == 0 and enforce_detection is False:
|
||||
face_objs = [DetectedFace(img=img, facial_area=base_region, confidence=0)]
|
||||
|
||||
for face_obj in face_objs:
|
||||
current_img = face_obj.img
|
||||
current_region = face_obj.facial_area
|
||||
|
||||
if current_img.shape[0] == 0 or current_img.shape[1] == 0:
|
||||
continue
|
||||
|
||||
if grayscale is True:
|
||||
logger.warn("Parameter grayscale is deprecated. Use color_face instead.")
|
||||
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
if color_face == "rgb":
|
||||
current_img = current_img[:, :, ::-1]
|
||||
elif color_face == "bgr":
|
||||
pass # image is in BGR
|
||||
elif color_face == "gray":
|
||||
current_img = cv2.cvtColor(current_img, cv2.COLOR_BGR2GRAY)
|
||||
else:
|
||||
raise ValueError(f"The color_face can be rgb, bgr or gray, but it is {color_face}.")
|
||||
|
||||
if normalize_face:
|
||||
current_img = current_img / 255 # normalize input in [0, 1]
|
||||
|
||||
# cast to int for flask, and do final checks for borders
|
||||
x = max(0, int(current_region.x))
|
||||
y = max(0, int(current_region.y))
|
||||
w = min(width - x - 1, int(current_region.w))
|
||||
h = min(height - y - 1, int(current_region.h))
|
||||
|
||||
resp_obj = {
|
||||
"face": current_img,
|
||||
"facial_area": {
|
||||
"x": x,
|
||||
"y": y,
|
||||
"w": w,
|
||||
"h": h,
|
||||
"left_eye": current_region.left_eye,
|
||||
"right_eye": current_region.right_eye,
|
||||
},
|
||||
"confidence": round(float(current_region.confidence or 0), 2),
|
||||
}
|
||||
|
||||
if anti_spoofing is True:
|
||||
antispoof_model = modeling.build_model(task="spoofing", model_name="Fasnet")
|
||||
is_real, antispoof_score = antispoof_model.analyze(img=img, facial_area=(x, y, w, h))
|
||||
resp_obj["is_real"] = is_real
|
||||
resp_obj["antispoof_score"] = antispoof_score
|
||||
|
||||
resp_objs.append(resp_obj)
|
||||
|
||||
if len(resp_objs) == 0 and enforce_detection == True:
|
||||
raise ValueError(
|
||||
f"Exception while extracting faces from {img_name}."
|
||||
"Consider to set enforce_detection arg to False."
|
||||
)
|
||||
|
||||
return resp_objs
|
||||
|
||||
|
||||
def detect_faces(
|
||||
detector_backend: str,
|
||||
img: np.ndarray,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
max_faces: Optional[int] = None,
|
||||
) -> List[DetectedFace]:
|
||||
"""
|
||||
Detect face(s) from a given image
|
||||
Args:
|
||||
detector_backend (str): detector name
|
||||
|
||||
img (np.ndarray): pre-loaded image
|
||||
|
||||
align (bool): enable or disable alignment after detection
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
Returns:
|
||||
results (List[DetectedFace]): A list of DetectedFace objects
|
||||
where each object contains:
|
||||
|
||||
- img (np.ndarray): The detected face as a NumPy array.
|
||||
|
||||
- facial_area (FacialAreaRegion): The facial area region represented as x, y, w, h,
|
||||
left_eye and right eye. left eye and right eye are eyes on the left and right
|
||||
with respect to the person instead of observer.
|
||||
|
||||
- confidence (float): The confidence score associated with the detected face.
|
||||
"""
|
||||
height, width, _ = img.shape
|
||||
face_detector: Detector = modeling.build_model(
|
||||
task="face_detector", model_name=detector_backend
|
||||
)
|
||||
|
||||
# validate expand percentage score
|
||||
if expand_percentage < 0:
|
||||
logger.warn(
|
||||
f"Expand percentage cannot be negative but you set it to {expand_percentage}."
|
||||
"Overwritten it to 0."
|
||||
)
|
||||
expand_percentage = 0
|
||||
|
||||
# If faces are close to the upper boundary, alignment move them outside
|
||||
# Add a black border around an image to avoid this.
|
||||
height_border = int(0.5 * height)
|
||||
width_border = int(0.5 * width)
|
||||
if align is True:
|
||||
img = cv2.copyMakeBorder(
|
||||
img,
|
||||
height_border,
|
||||
height_border,
|
||||
width_border,
|
||||
width_border,
|
||||
cv2.BORDER_CONSTANT,
|
||||
value=[0, 0, 0], # Color of the border (black)
|
||||
)
|
||||
|
||||
# find facial areas of given image
|
||||
facial_areas = face_detector.detect_faces(img)
|
||||
|
||||
if max_faces is not None and max_faces < len(facial_areas):
|
||||
facial_areas = nlargest(
|
||||
max_faces, facial_areas, key=lambda facial_area: facial_area.w * facial_area.h
|
||||
)
|
||||
|
||||
return [
|
||||
expand_and_align_face(
|
||||
facial_area=facial_area,
|
||||
img=img,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
width_border=width_border,
|
||||
height_border=height_border,
|
||||
)
|
||||
for facial_area in facial_areas
|
||||
]
|
||||
|
||||
|
||||
def expand_and_align_face(
|
||||
facial_area: FacialAreaRegion,
|
||||
img: np.ndarray,
|
||||
align: bool,
|
||||
expand_percentage: int,
|
||||
width_border: int,
|
||||
height_border: int,
|
||||
) -> DetectedFace:
|
||||
x = facial_area.x
|
||||
y = facial_area.y
|
||||
w = facial_area.w
|
||||
h = facial_area.h
|
||||
left_eye = facial_area.left_eye
|
||||
right_eye = facial_area.right_eye
|
||||
confidence = facial_area.confidence
|
||||
|
||||
if expand_percentage > 0:
|
||||
# Expand the facial region height and width by the provided percentage
|
||||
# ensuring that the expanded region stays within img.shape limits
|
||||
expanded_w = w + int(w * expand_percentage / 100)
|
||||
expanded_h = h + int(h * expand_percentage / 100)
|
||||
|
||||
x = max(0, x - int((expanded_w - w) / 2))
|
||||
y = max(0, y - int((expanded_h - h) / 2))
|
||||
w = min(img.shape[1] - x, expanded_w)
|
||||
h = min(img.shape[0] - y, expanded_h)
|
||||
|
||||
# extract detected face unaligned
|
||||
detected_face = img[int(y) : int(y + h), int(x) : int(x + w)]
|
||||
# align original image, then find projection of detected face area after alignment
|
||||
if align is True: # and left_eye is not None and right_eye is not None:
|
||||
aligned_img, angle = align_img_wrt_eyes(img=img, left_eye=left_eye, right_eye=right_eye)
|
||||
|
||||
rotated_x1, rotated_y1, rotated_x2, rotated_y2 = project_facial_area(
|
||||
facial_area=(x, y, x + w, y + h), angle=angle, size=(img.shape[0], img.shape[1])
|
||||
)
|
||||
detected_face = aligned_img[
|
||||
int(rotated_y1) : int(rotated_y2), int(rotated_x1) : int(rotated_x2)
|
||||
]
|
||||
|
||||
# restore x, y, le and re before border added
|
||||
x = x - width_border
|
||||
y = y - height_border
|
||||
# w and h will not change
|
||||
if left_eye is not None:
|
||||
left_eye = (left_eye[0] - width_border, left_eye[1] - height_border)
|
||||
if right_eye is not None:
|
||||
right_eye = (right_eye[0] - width_border, right_eye[1] - height_border)
|
||||
|
||||
return DetectedFace(
|
||||
img=detected_face,
|
||||
facial_area=FacialAreaRegion(
|
||||
x=x, y=y, h=h, w=w, confidence=confidence, left_eye=left_eye, right_eye=right_eye
|
||||
),
|
||||
confidence=confidence,
|
||||
)
|
||||
|
||||
|
||||
def align_img_wrt_eyes(
|
||||
img: np.ndarray,
|
||||
left_eye: Union[list, tuple],
|
||||
right_eye: Union[list, tuple],
|
||||
) -> Tuple[np.ndarray, float]:
|
||||
"""
|
||||
Align a given image horizantally with respect to their left and right eye locations
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image with detected face
|
||||
left_eye (list or tuple): coordinates of left eye with respect to the person itself
|
||||
right_eye(list or tuple): coordinates of right eye with respect to the person itself
|
||||
Returns:
|
||||
img (np.ndarray): aligned facial image
|
||||
"""
|
||||
# if eye could not be detected for the given image, return image itself
|
||||
if left_eye is None or right_eye is None:
|
||||
return img, 0
|
||||
|
||||
# sometimes unexpectedly detected images come with nil dimensions
|
||||
if img.shape[0] == 0 or img.shape[1] == 0:
|
||||
return img, 0
|
||||
|
||||
angle = float(np.degrees(np.arctan2(left_eye[1] - right_eye[1], left_eye[0] - right_eye[0])))
|
||||
|
||||
(h, w) = img.shape[:2]
|
||||
center = (w // 2, h // 2)
|
||||
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
||||
img = cv2.warpAffine(
|
||||
img, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0)
|
||||
)
|
||||
|
||||
return img, angle
|
||||
|
||||
|
||||
def project_facial_area(
|
||||
facial_area: Tuple[int, int, int, int], angle: float, size: Tuple[int, int]
|
||||
) -> Tuple[int, int, int, int]:
|
||||
"""
|
||||
Update pre-calculated facial area coordinates after image itself
|
||||
rotated with respect to the eyes.
|
||||
Inspried from the work of @UmutDeniz26 - github.com/serengil/retinaface/pull/80
|
||||
|
||||
Args:
|
||||
facial_area (tuple of int): Representing the (x1, y1, x2, y2) of the facial area.
|
||||
x2 is equal to x1 + w1, and y2 is equal to y1 + h1
|
||||
angle (float): Angle of rotation in degrees. Its sign determines the direction of rotation.
|
||||
Note that angles > 360 degrees are normalized to the range [0, 360).
|
||||
size (tuple of int): Tuple representing the size of the image (width, height).
|
||||
|
||||
Returns:
|
||||
rotated_coordinates (tuple of int): Representing the new coordinates
|
||||
(x1, y1, x2, y2) or (x1, y1, x1+w1, y1+h1) of the rotated facial area.
|
||||
"""
|
||||
|
||||
# Normalize the witdh of the angle so we don't have to
|
||||
# worry about rotations greater than 360 degrees.
|
||||
# We workaround the quirky behavior of the modulo operator
|
||||
# for negative angle values.
|
||||
direction = 1 if angle >= 0 else -1
|
||||
angle = abs(angle) % 360
|
||||
if angle == 0:
|
||||
return facial_area
|
||||
|
||||
# Angle in radians
|
||||
angle = angle * np.pi / 180
|
||||
|
||||
height, weight = size
|
||||
|
||||
# Translate the facial area to the center of the image
|
||||
x = (facial_area[0] + facial_area[2]) / 2 - weight / 2
|
||||
y = (facial_area[1] + facial_area[3]) / 2 - height / 2
|
||||
|
||||
# Rotate the facial area
|
||||
x_new = x * np.cos(angle) + y * direction * np.sin(angle)
|
||||
y_new = -x * direction * np.sin(angle) + y * np.cos(angle)
|
||||
|
||||
# Translate the facial area back to the original position
|
||||
x_new = x_new + weight / 2
|
||||
y_new = y_new + height / 2
|
||||
|
||||
# Calculate projected coordinates after alignment
|
||||
x1 = x_new - (facial_area[2] - facial_area[0]) / 2
|
||||
y1 = y_new - (facial_area[3] - facial_area[1]) / 2
|
||||
x2 = x_new + (facial_area[2] - facial_area[0]) / 2
|
||||
y2 = y_new + (facial_area[3] - facial_area[1]) / 2
|
||||
|
||||
# validate projected coordinates are in image's boundaries
|
||||
x1 = max(int(x1), 0)
|
||||
y1 = max(int(y1), 0)
|
||||
x2 = min(int(x2), weight)
|
||||
y2 = min(int(y2), height)
|
||||
|
||||
return (x1, y1, x2, y2)
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any
|
||||
|
||||
# project dependencies
|
||||
from deepface.models.facial_recognition import (
|
||||
VGGFace,
|
||||
OpenFace,
|
||||
FbDeepFace,
|
||||
DeepID,
|
||||
ArcFace,
|
||||
SFace,
|
||||
Dlib,
|
||||
Facenet,
|
||||
GhostFaceNet,
|
||||
)
|
||||
from deepface.models.face_detection import (
|
||||
FastMtCnn,
|
||||
MediaPipe,
|
||||
MtCnn,
|
||||
OpenCv,
|
||||
Dlib as DlibDetector,
|
||||
RetinaFace,
|
||||
Ssd,
|
||||
Yolo,
|
||||
YuNet,
|
||||
CenterFace,
|
||||
)
|
||||
from deepface.models.demography import Age, Gender, Race, Emotion
|
||||
from deepface.models.spoofing import FasNet
|
||||
|
||||
|
||||
def build_model(task: str, model_name: str) -> Any:
|
||||
"""
|
||||
This function loads a pre-trained models as singletonish way
|
||||
Parameters:
|
||||
task (str): facial_recognition, facial_attribute, face_detector, spoofing
|
||||
model_name (str): model identifier
|
||||
- VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib,
|
||||
ArcFace, SFace, GhostFaceNet for face recognition
|
||||
- Age, Gender, Emotion, Race for facial attributes
|
||||
- opencv, mtcnn, ssd, dlib, retinaface, mediapipe, yolov8, yunet,
|
||||
fastmtcnn or centerface for face detectors
|
||||
- Fasnet for spoofing
|
||||
Returns:
|
||||
built model class
|
||||
"""
|
||||
|
||||
# singleton design pattern
|
||||
global cached_models
|
||||
|
||||
models = {
|
||||
"facial_recognition": {
|
||||
"VGG-Face": VGGFace.VggFaceClient,
|
||||
"OpenFace": OpenFace.OpenFaceClient,
|
||||
"Facenet": Facenet.FaceNet128dClient,
|
||||
"Facenet512": Facenet.FaceNet512dClient,
|
||||
"DeepFace": FbDeepFace.DeepFaceClient,
|
||||
"DeepID": DeepID.DeepIdClient,
|
||||
"Dlib": Dlib.DlibClient,
|
||||
"ArcFace": ArcFace.ArcFaceClient,
|
||||
"SFace": SFace.SFaceClient,
|
||||
"GhostFaceNet": GhostFaceNet.GhostFaceNetClient,
|
||||
},
|
||||
"spoofing": {
|
||||
"Fasnet": FasNet.Fasnet,
|
||||
},
|
||||
"facial_attribute": {
|
||||
"Emotion": Emotion.EmotionClient,
|
||||
"Age": Age.ApparentAgeClient,
|
||||
"Gender": Gender.GenderClient,
|
||||
"Race": Race.RaceClient,
|
||||
},
|
||||
"face_detector": {
|
||||
"opencv": OpenCv.OpenCvClient,
|
||||
"mtcnn": MtCnn.MtCnnClient,
|
||||
"ssd": Ssd.SsdClient,
|
||||
"dlib": DlibDetector.DlibClient,
|
||||
"retinaface": RetinaFace.RetinaFaceClient,
|
||||
"mediapipe": MediaPipe.MediaPipeClient,
|
||||
"yolov8": Yolo.YoloClient,
|
||||
"yunet": YuNet.YuNetClient,
|
||||
"fastmtcnn": FastMtCnn.FastMtCnnClient,
|
||||
"centerface": CenterFace.CenterFaceClient,
|
||||
},
|
||||
}
|
||||
|
||||
if models.get(task) is None:
|
||||
raise ValueError(f"unimplemented task - {task}")
|
||||
|
||||
if not "cached_models" in globals():
|
||||
cached_models = {current_task: {} for current_task in models.keys()}
|
||||
|
||||
if cached_models[task].get(model_name) is None:
|
||||
model = models[task].get(model_name)
|
||||
if model:
|
||||
cached_models[task][model_name] = model()
|
||||
else:
|
||||
raise ValueError(f"Invalid model_name passed - {task}/{model_name}")
|
||||
|
||||
return cached_models[task][model_name]
|
||||
|
|
@ -1,121 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Tuple
|
||||
|
||||
# 3rd party
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import package_utils
|
||||
|
||||
|
||||
tf_major_version = package_utils.get_tf_major_version()
|
||||
if tf_major_version == 1:
|
||||
from keras.preprocessing import image
|
||||
elif tf_major_version == 2:
|
||||
from tensorflow.keras.preprocessing import image
|
||||
|
||||
|
||||
def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
|
||||
"""Normalize input image.
|
||||
|
||||
Args:
|
||||
img (numpy array): the input image.
|
||||
normalization (str, optional): the normalization technique. Defaults to "base",
|
||||
for no normalization.
|
||||
|
||||
Returns:
|
||||
numpy array: the normalized image.
|
||||
"""
|
||||
|
||||
# issue 131 declares that some normalization techniques improves the accuracy
|
||||
|
||||
if normalization == "base":
|
||||
return img
|
||||
|
||||
# @trevorgribble and @davedgd contributed this feature
|
||||
# restore input in scale of [0, 255] because it was normalized in scale of
|
||||
# [0, 1] in preprocess_face
|
||||
img *= 255
|
||||
|
||||
if normalization == "raw":
|
||||
pass # return just restored pixels
|
||||
|
||||
elif normalization == "Facenet":
|
||||
mean, std = img.mean(), img.std()
|
||||
img = (img - mean) / std
|
||||
|
||||
elif normalization == "Facenet2018":
|
||||
# simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted)
|
||||
img /= 127.5
|
||||
img -= 1
|
||||
|
||||
elif normalization == "VGGFace":
|
||||
# mean subtraction based on VGGFace1 training data
|
||||
img[..., 0] -= 93.5940
|
||||
img[..., 1] -= 104.7624
|
||||
img[..., 2] -= 129.1863
|
||||
|
||||
elif normalization == "VGGFace2":
|
||||
# mean subtraction based on VGGFace2 training data
|
||||
img[..., 0] -= 91.4953
|
||||
img[..., 1] -= 103.8827
|
||||
img[..., 2] -= 131.0912
|
||||
|
||||
elif normalization == "ArcFace":
|
||||
# Reference study: The faces are cropped and resized to 112×112,
|
||||
# and each pixel (ranged between [0, 255]) in RGB images is normalised
|
||||
# by subtracting 127.5 then divided by 128.
|
||||
img -= 127.5
|
||||
img /= 128
|
||||
else:
|
||||
raise ValueError(f"unimplemented normalization type - {normalization}")
|
||||
|
||||
return img
|
||||
|
||||
|
||||
def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
|
||||
"""
|
||||
Resize an image to expected size of a ml model with adding black pixels.
|
||||
Args:
|
||||
img (np.ndarray): pre-loaded image as numpy array
|
||||
target_size (tuple): input shape of ml model
|
||||
Returns:
|
||||
img (np.ndarray): resized input image
|
||||
"""
|
||||
factor_0 = target_size[0] / img.shape[0]
|
||||
factor_1 = target_size[1] / img.shape[1]
|
||||
factor = min(factor_0, factor_1)
|
||||
|
||||
dsize = (
|
||||
int(img.shape[1] * factor),
|
||||
int(img.shape[0] * factor),
|
||||
)
|
||||
img = cv2.resize(img, dsize)
|
||||
|
||||
diff_0 = target_size[0] - img.shape[0]
|
||||
diff_1 = target_size[1] - img.shape[1]
|
||||
|
||||
# Put the base image in the middle of the padded image
|
||||
img = np.pad(
|
||||
img,
|
||||
(
|
||||
(diff_0 // 2, diff_0 - diff_0 // 2),
|
||||
(diff_1 // 2, diff_1 - diff_1 // 2),
|
||||
(0, 0),
|
||||
),
|
||||
"constant",
|
||||
)
|
||||
|
||||
# double check: if target image is not still the same size with target.
|
||||
if img.shape[0:2] != target_size:
|
||||
img = cv2.resize(img, target_size)
|
||||
|
||||
# make it 4-dimensional how ML models expect
|
||||
img = image.img_to_array(img)
|
||||
img = np.expand_dims(img, axis=0)
|
||||
|
||||
if img.max() > 1:
|
||||
img = (img.astype(np.float32) / 255.0).astype(np.float32)
|
||||
|
||||
return img
|
||||
|
|
@ -1,417 +0,0 @@
|
|||
# built-in dependencies
|
||||
import os
|
||||
import pickle
|
||||
from typing import List, Union, Optional, Dict, Any, Set
|
||||
import time
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import image_utils
|
||||
from deepface.modules import representation, detection, verification
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
def find(
|
||||
img_path: Union[str, np.ndarray],
|
||||
db_path: str,
|
||||
model_name: str = "VGG-Face",
|
||||
distance_metric: str = "cosine",
|
||||
enforce_detection: bool = True,
|
||||
detector_backend: str = "opencv",
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
threshold: Optional[float] = None,
|
||||
normalization: str = "base",
|
||||
silent: bool = False,
|
||||
refresh_database: bool = True,
|
||||
anti_spoofing: bool = False,
|
||||
) -> List[pd.DataFrame]:
|
||||
"""
|
||||
Identify individuals in a database
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
|
||||
or a base64 encoded image. If the source image contains multiple faces, the result will
|
||||
include information for each detected face.
|
||||
|
||||
db_path (string): Path to the folder containing image files. All detected faces
|
||||
in the database will be considered in the decision-making process.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2'.
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Default is True. Set to False to avoid the exception for low-resolution images.
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'.
|
||||
|
||||
align (boolean): Perform alignment based on the eye positions.
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
threshold (float): Specify a threshold to determine whether a pair represents the same
|
||||
person or different individuals. This threshold is used for comparing distances.
|
||||
If left unset, default pre-tuned threshold values will be applied based on the specified
|
||||
model name and distance metric (default is None).
|
||||
|
||||
normalization (string): Normalize the input image before feeding it to the model.
|
||||
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
|
||||
|
||||
silent (boolean): Suppress or allow some log messages for a quieter analysis process.
|
||||
|
||||
refresh_database (boolean): Synchronizes the images representation (pkl) file with the
|
||||
directory/db files, if set to false, it will ignore any file changes inside the db_path
|
||||
directory (default is True).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
|
||||
Returns:
|
||||
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
|
||||
to the identity information for an individual detected in the source image.
|
||||
The DataFrame columns include:
|
||||
|
||||
- 'identity': Identity label of the detected individual.
|
||||
|
||||
- 'target_x', 'target_y', 'target_w', 'target_h': Bounding box coordinates of the
|
||||
target face in the database.
|
||||
|
||||
- 'source_x', 'source_y', 'source_w', 'source_h': Bounding box coordinates of the
|
||||
detected face in the source image.
|
||||
|
||||
- 'threshold': threshold to determine a pair whether same person or different persons
|
||||
|
||||
- 'distance': Similarity score between the faces based on the
|
||||
specified model and distance metric
|
||||
"""
|
||||
|
||||
tic = time.time()
|
||||
|
||||
if not os.path.isdir(db_path):
|
||||
raise ValueError(f"Passed path {db_path} does not exist!")
|
||||
|
||||
img, _ = image_utils.load_image(img_path)
|
||||
if img is None:
|
||||
raise ValueError(f"Passed image path {img_path} does not exist!")
|
||||
|
||||
file_parts = [
|
||||
"ds",
|
||||
"model",
|
||||
model_name,
|
||||
"detector",
|
||||
detector_backend,
|
||||
"aligned" if align else "unaligned",
|
||||
"normalization",
|
||||
normalization,
|
||||
"expand",
|
||||
str(expand_percentage),
|
||||
]
|
||||
|
||||
file_name = "_".join(file_parts) + ".pkl"
|
||||
file_name = file_name.replace("-", "").lower()
|
||||
|
||||
datastore_path = os.path.join(db_path, file_name)
|
||||
representations = []
|
||||
|
||||
# required columns for representations
|
||||
df_cols = [
|
||||
"identity",
|
||||
"hash",
|
||||
"embedding",
|
||||
"target_x",
|
||||
"target_y",
|
||||
"target_w",
|
||||
"target_h",
|
||||
]
|
||||
|
||||
# Ensure the proper pickle file exists
|
||||
if not os.path.exists(datastore_path):
|
||||
with open(datastore_path, "wb") as f:
|
||||
pickle.dump([], f)
|
||||
|
||||
# Load the representations from the pickle file
|
||||
with open(datastore_path, "rb") as f:
|
||||
representations = pickle.load(f)
|
||||
|
||||
# check each item of representations list has required keys
|
||||
for i, current_representation in enumerate(representations):
|
||||
missing_keys = set(df_cols) - set(current_representation.keys())
|
||||
if len(missing_keys) > 0:
|
||||
raise ValueError(
|
||||
f"{i}-th item does not have some required keys - {missing_keys}."
|
||||
f"Consider to delete {datastore_path}"
|
||||
)
|
||||
|
||||
# embedded images
|
||||
pickled_images = [representation["identity"] for representation in representations]
|
||||
|
||||
# Get the list of images on storage
|
||||
storage_images = image_utils.list_images(path=db_path)
|
||||
|
||||
if len(storage_images) == 0 and refresh_database is True:
|
||||
raise ValueError(f"No item found in {db_path}")
|
||||
if len(representations) == 0 and refresh_database is False:
|
||||
raise ValueError(f"Nothing is found in {datastore_path}")
|
||||
|
||||
must_save_pickle = False
|
||||
new_images, old_images, replaced_images = set(), set(), set()
|
||||
|
||||
if not refresh_database:
|
||||
logger.info(
|
||||
f"Could be some changes in {db_path} not tracked."
|
||||
"Set refresh_database to true to assure that any changes will be tracked."
|
||||
)
|
||||
|
||||
# Enforce data consistency amongst on disk images and pickle file
|
||||
if refresh_database:
|
||||
new_images = set(storage_images) - set(pickled_images) # images added to storage
|
||||
old_images = set(pickled_images) - set(storage_images) # images removed from storage
|
||||
|
||||
# detect replaced images
|
||||
for current_representation in representations:
|
||||
identity = current_representation["identity"]
|
||||
if identity in old_images:
|
||||
continue
|
||||
alpha_hash = current_representation["hash"]
|
||||
beta_hash = image_utils.find_image_hash(identity)
|
||||
if alpha_hash != beta_hash:
|
||||
logger.debug(f"Even though {identity} represented before, it's replaced later.")
|
||||
replaced_images.add(identity)
|
||||
|
||||
if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
|
||||
logger.info(
|
||||
f"Found {len(new_images)} newly added image(s)"
|
||||
f", {len(old_images)} removed image(s)"
|
||||
f", {len(replaced_images)} replaced image(s)."
|
||||
)
|
||||
|
||||
# append replaced images into both old and new images. these will be dropped and re-added.
|
||||
new_images.update(replaced_images)
|
||||
old_images.update(replaced_images)
|
||||
|
||||
# remove old images first
|
||||
if len(old_images) > 0:
|
||||
representations = [rep for rep in representations if rep["identity"] not in old_images]
|
||||
must_save_pickle = True
|
||||
|
||||
# find representations for new images
|
||||
if len(new_images) > 0:
|
||||
representations += __find_bulk_embeddings(
|
||||
employees=new_images,
|
||||
model_name=model_name,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
normalization=normalization,
|
||||
silent=silent,
|
||||
) # add new images
|
||||
must_save_pickle = True
|
||||
|
||||
if must_save_pickle:
|
||||
with open(datastore_path, "wb") as f:
|
||||
pickle.dump(representations, f)
|
||||
if not silent:
|
||||
logger.info(f"There are now {len(representations)} representations in {file_name}")
|
||||
|
||||
# Should we have no representations bailout
|
||||
if len(representations) == 0:
|
||||
if not silent:
|
||||
toc = time.time()
|
||||
logger.info(f"find function duration {toc - tic} seconds")
|
||||
return []
|
||||
|
||||
# ----------------------------
|
||||
# now, we got representations for facial database
|
||||
df = pd.DataFrame(representations)
|
||||
|
||||
if silent is False:
|
||||
logger.info(f"Searching {img_path} in {df.shape[0]} length datastore")
|
||||
|
||||
# img path might have more than once face
|
||||
source_objs = detection.extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
grayscale=False,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
resp_obj = []
|
||||
|
||||
for source_obj in source_objs:
|
||||
if anti_spoofing is True and source_obj.get("is_real", True) is False:
|
||||
raise ValueError("Spoof detected in the given image.")
|
||||
source_img = source_obj["face"]
|
||||
source_region = source_obj["facial_area"]
|
||||
target_embedding_obj = representation.represent(
|
||||
img_path=source_img,
|
||||
model_name=model_name,
|
||||
enforce_detection=enforce_detection,
|
||||
detector_backend="skip",
|
||||
align=align,
|
||||
normalization=normalization,
|
||||
)
|
||||
|
||||
target_representation = target_embedding_obj[0]["embedding"]
|
||||
|
||||
result_df = df.copy() # df will be filtered in each img
|
||||
result_df["source_x"] = source_region["x"]
|
||||
result_df["source_y"] = source_region["y"]
|
||||
result_df["source_w"] = source_region["w"]
|
||||
result_df["source_h"] = source_region["h"]
|
||||
|
||||
distances = []
|
||||
for _, instance in df.iterrows():
|
||||
source_representation = instance["embedding"]
|
||||
if source_representation is None:
|
||||
distances.append(float("inf")) # no representation for this image
|
||||
continue
|
||||
|
||||
target_dims = len(list(target_representation))
|
||||
source_dims = len(list(source_representation))
|
||||
if target_dims != source_dims:
|
||||
raise ValueError(
|
||||
"Source and target embeddings must have same dimensions but "
|
||||
+ f"{target_dims}:{source_dims}. Model structure may change"
|
||||
+ " after pickle created. Delete the {file_name} and re-run."
|
||||
)
|
||||
|
||||
distance = verification.find_distance(
|
||||
source_representation, target_representation, distance_metric
|
||||
)
|
||||
|
||||
distances.append(distance)
|
||||
|
||||
# ---------------------------
|
||||
target_threshold = threshold or verification.find_threshold(model_name, distance_metric)
|
||||
|
||||
result_df["threshold"] = target_threshold
|
||||
result_df["distance"] = distances
|
||||
|
||||
result_df = result_df.drop(columns=["embedding"])
|
||||
# pylint: disable=unsubscriptable-object
|
||||
result_df = result_df[result_df["distance"] <= target_threshold]
|
||||
result_df = result_df.sort_values(by=["distance"], ascending=True).reset_index(drop=True)
|
||||
|
||||
resp_obj.append(result_df)
|
||||
|
||||
# -----------------------------------
|
||||
|
||||
if not silent:
|
||||
toc = time.time()
|
||||
logger.info(f"find function duration {toc - tic} seconds")
|
||||
|
||||
return resp_obj
|
||||
|
||||
|
||||
def __find_bulk_embeddings(
|
||||
employees: Set[str],
|
||||
model_name: str = "VGG-Face",
|
||||
detector_backend: str = "opencv",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
normalization: str = "base",
|
||||
silent: bool = False,
|
||||
) -> List[Dict["str", Any]]:
|
||||
"""
|
||||
Find embeddings of a list of images
|
||||
|
||||
Args:
|
||||
employees (list): list of exact image paths
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
|
||||
detector_backend (str): face detector model name
|
||||
|
||||
enforce_detection (bool): set this to False if you
|
||||
want to proceed when you cannot detect any face
|
||||
|
||||
align (bool): enable or disable alignment of image
|
||||
before feeding to facial recognition model
|
||||
|
||||
expand_percentage (int): expand detected facial area with a
|
||||
percentage (default is 0).
|
||||
|
||||
normalization (bool): normalization technique
|
||||
|
||||
silent (bool): enable or disable informative logging
|
||||
Returns:
|
||||
representations (list): pivot list of dict with
|
||||
image name, hash, embedding and detected face area's coordinates
|
||||
"""
|
||||
representations = []
|
||||
for employee in tqdm(
|
||||
employees,
|
||||
desc="Finding representations",
|
||||
disable=silent,
|
||||
):
|
||||
file_hash = image_utils.find_image_hash(employee)
|
||||
|
||||
try:
|
||||
img_objs = detection.extract_faces(
|
||||
img_path=employee,
|
||||
detector_backend=detector_backend,
|
||||
grayscale=False,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
)
|
||||
|
||||
except ValueError as err:
|
||||
logger.error(f"Exception while extracting faces from {employee}: {str(err)}")
|
||||
img_objs = []
|
||||
|
||||
if len(img_objs) == 0:
|
||||
representations.append(
|
||||
{
|
||||
"identity": employee,
|
||||
"hash": file_hash,
|
||||
"embedding": None,
|
||||
"target_x": 0,
|
||||
"target_y": 0,
|
||||
"target_w": 0,
|
||||
"target_h": 0,
|
||||
}
|
||||
)
|
||||
else:
|
||||
for img_obj in img_objs:
|
||||
img_content = img_obj["face"]
|
||||
img_region = img_obj["facial_area"]
|
||||
embedding_obj = representation.represent(
|
||||
img_path=img_content,
|
||||
model_name=model_name,
|
||||
enforce_detection=enforce_detection,
|
||||
detector_backend="skip",
|
||||
align=align,
|
||||
normalization=normalization,
|
||||
)
|
||||
|
||||
img_representation = embedding_obj[0]["embedding"]
|
||||
representations.append(
|
||||
{
|
||||
"identity": employee,
|
||||
"hash": file_hash,
|
||||
"embedding": img_representation,
|
||||
"target_x": img_region["x"],
|
||||
"target_y": img_region["y"],
|
||||
"target_w": img_region["w"],
|
||||
"target_h": img_region["h"],
|
||||
}
|
||||
)
|
||||
|
||||
return representations
|
||||
|
|
@ -1,144 +0,0 @@
|
|||
# built-in dependencies
|
||||
from typing import Any, Dict, List, Union, Optional
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.commons import image_utils
|
||||
from deepface.modules import modeling, detection, preprocessing
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
|
||||
|
||||
def represent(
|
||||
img_path: Union[str, np.ndarray],
|
||||
model_name: str = "VGG-Face",
|
||||
enforce_detection: bool = True,
|
||||
detector_backend: str = "opencv",
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
normalization: str = "base",
|
||||
anti_spoofing: bool = False,
|
||||
max_faces: Optional[int] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Represent facial images as multi-dimensional vector embeddings.
|
||||
|
||||
Args:
|
||||
img_path (str or np.ndarray): The exact path to the image, a numpy array in BGR format,
|
||||
or a base64 encoded image. If the source image contains multiple faces, the result will
|
||||
include information for each detected face.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Default is True. Set to False to avoid the exception for low-resolution images.
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'.
|
||||
|
||||
align (boolean): Perform alignment based on the eye positions.
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
normalization (string): Normalize the input image before feeding it to the model.
|
||||
Default is base. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
max_faces (int): Set a limit on the number of faces to be processed (default is None).
|
||||
|
||||
Returns:
|
||||
results (List[Dict[str, Any]]): A list of dictionaries, each containing the
|
||||
following fields:
|
||||
|
||||
- embedding (List[float]): Multidimensional vector representing facial features.
|
||||
The number of dimensions varies based on the reference model
|
||||
(e.g., FaceNet returns 128 dimensions, VGG-Face returns 4096 dimensions).
|
||||
- facial_area (dict): Detected facial area by face detection in dictionary format.
|
||||
Contains 'x' and 'y' as the left-corner point, and 'w' and 'h'
|
||||
as the width and height. If `detector_backend` is set to 'skip', it represents
|
||||
the full image area and is nonsensical.
|
||||
- face_confidence (float): Confidence score of face detection. If `detector_backend` is set
|
||||
to 'skip', the confidence will be 0 and is nonsensical.
|
||||
"""
|
||||
resp_objs = []
|
||||
|
||||
model: FacialRecognition = modeling.build_model(
|
||||
task="facial_recognition", model_name=model_name
|
||||
)
|
||||
|
||||
# ---------------------------------
|
||||
# we have run pre-process in verification. so, this can be skipped if it is coming from verify.
|
||||
target_size = model.input_shape
|
||||
if detector_backend != "skip":
|
||||
img_objs = detection.extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
grayscale=False,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
anti_spoofing=anti_spoofing,
|
||||
max_faces=max_faces,
|
||||
)
|
||||
else: # skip
|
||||
# Try load. If load error, will raise exception internal
|
||||
img, _ = image_utils.load_image(img_path)
|
||||
|
||||
if len(img.shape) != 3:
|
||||
raise ValueError(f"Input img must be 3 dimensional but it is {img.shape}")
|
||||
|
||||
# make dummy region and confidence to keep compatibility with `extract_faces`
|
||||
img_objs = [
|
||||
{
|
||||
"face": img,
|
||||
"facial_area": {"x": 0, "y": 0, "w": img.shape[0], "h": img.shape[1]},
|
||||
"confidence": 0,
|
||||
}
|
||||
]
|
||||
# ---------------------------------
|
||||
|
||||
if max_faces is not None and max_faces < len(img_objs):
|
||||
# sort as largest facial areas come first
|
||||
img_objs = sorted(
|
||||
img_objs,
|
||||
key=lambda img_obj: img_obj["facial_area"]["w"] * img_obj["facial_area"]["h"],
|
||||
reverse=True,
|
||||
)
|
||||
# discard rest of the items
|
||||
img_objs = img_objs[0:max_faces]
|
||||
|
||||
for img_obj in img_objs:
|
||||
if anti_spoofing is True and img_obj.get("is_real", True) is False:
|
||||
raise ValueError("Spoof detected in the given image.")
|
||||
img = img_obj["face"]
|
||||
|
||||
# rgb to bgr
|
||||
img = img[:, :, ::-1]
|
||||
|
||||
region = img_obj["facial_area"]
|
||||
confidence = img_obj["confidence"]
|
||||
|
||||
# resize to expected shape of ml model
|
||||
img = preprocessing.resize_image(
|
||||
img=img,
|
||||
# thanks to DeepId (!)
|
||||
target_size=(target_size[1], target_size[0]),
|
||||
)
|
||||
|
||||
# custom normalization
|
||||
img = preprocessing.normalize_input(img=img, normalization=normalization)
|
||||
|
||||
embedding = model.forward(img)
|
||||
|
||||
resp_objs.append(
|
||||
{
|
||||
"embedding": embedding,
|
||||
"facial_area": region,
|
||||
"face_confidence": confidence,
|
||||
}
|
||||
)
|
||||
|
||||
return resp_objs
|
||||
|
|
@ -1,382 +0,0 @@
|
|||
# built-in dependencies
|
||||
import time
|
||||
from typing import Any, Dict, Optional, Union, List, Tuple
|
||||
|
||||
# 3rd party dependencies
|
||||
import numpy as np
|
||||
|
||||
# project dependencies
|
||||
from deepface.modules import representation, detection, modeling
|
||||
from deepface.models.FacialRecognition import FacialRecognition
|
||||
from deepface.commons.logger import Logger
|
||||
|
||||
logger = Logger()
|
||||
|
||||
|
||||
def verify(
|
||||
img1_path: Union[str, np.ndarray, List[float]],
|
||||
img2_path: Union[str, np.ndarray, List[float]],
|
||||
model_name: str = "VGG-Face",
|
||||
detector_backend: str = "opencv",
|
||||
distance_metric: str = "cosine",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
normalization: str = "base",
|
||||
silent: bool = False,
|
||||
threshold: Optional[float] = None,
|
||||
anti_spoofing: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Verify if an image pair represents the same person or different persons.
|
||||
|
||||
The verification function converts facial images to vectors and calculates the similarity
|
||||
between those vectors. Vectors of images of the same person should exhibit higher similarity
|
||||
(or lower distance) than vectors of images of different persons.
|
||||
|
||||
Args:
|
||||
img1_path (str or np.ndarray or List[float]): Path to the first image.
|
||||
Accepts exact image path as a string, numpy array (BGR), base64 encoded images
|
||||
or pre-calculated embeddings.
|
||||
|
||||
img2_path (str or np.ndarray or or List[float]): Path to the second image.
|
||||
Accepts exact image path as a string, numpy array (BGR), base64 encoded images
|
||||
or pre-calculated embeddings.
|
||||
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
|
||||
detector_backend (string): face detector backend. Options: 'opencv', 'retinaface',
|
||||
'mtcnn', 'ssd', 'dlib', 'mediapipe', 'yolov8', 'centerface' or 'skip'
|
||||
(default is opencv)
|
||||
|
||||
distance_metric (string): Metric for measuring similarity. Options: 'cosine',
|
||||
'euclidean', 'euclidean_l2' (default is cosine).
|
||||
|
||||
enforce_detection (boolean): If no face is detected in an image, raise an exception.
|
||||
Set to False to avoid the exception for low-resolution images (default is True).
|
||||
|
||||
align (bool): Flag to enable face alignment (default is True).
|
||||
|
||||
expand_percentage (int): expand detected facial area with a percentage (default is 0).
|
||||
|
||||
normalization (string): Normalize the input image before feeding it to the model.
|
||||
Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base)
|
||||
|
||||
silent (boolean): Suppress or allow some log messages for a quieter analysis process
|
||||
(default is False).
|
||||
|
||||
threshold (float): Specify a threshold to determine whether a pair represents the same
|
||||
person or different individuals. This threshold is used for comparing distances.
|
||||
If left unset, default pre-tuned threshold values will be applied based on the specified
|
||||
model name and distance metric (default is None).
|
||||
|
||||
anti_spoofing (boolean): Flag to enable anti spoofing (default is False).
|
||||
|
||||
Returns:
|
||||
result (dict): A dictionary containing verification results.
|
||||
|
||||
- 'verified' (bool): Indicates whether the images represent the same person (True)
|
||||
or different persons (False).
|
||||
|
||||
- 'distance' (float): The distance measure between the face vectors.
|
||||
A lower distance indicates higher similarity.
|
||||
|
||||
- 'threshold' (float): The maximum threshold used for verification.
|
||||
If the distance is below this threshold, the images are considered a match.
|
||||
|
||||
- 'model' (str): The chosen face recognition model.
|
||||
|
||||
- 'similarity_metric' (str): The chosen similarity metric for measuring distances.
|
||||
|
||||
- 'facial_areas' (dict): Rectangular regions of interest for faces in both images.
|
||||
- 'img1': {'x': int, 'y': int, 'w': int, 'h': int}
|
||||
Region of interest for the first image.
|
||||
- 'img2': {'x': int, 'y': int, 'w': int, 'h': int}
|
||||
Region of interest for the second image.
|
||||
|
||||
- 'time' (float): Time taken for the verification process in seconds.
|
||||
"""
|
||||
|
||||
tic = time.time()
|
||||
|
||||
model: FacialRecognition = modeling.build_model(
|
||||
task="facial_recognition", model_name=model_name
|
||||
)
|
||||
dims = model.output_shape
|
||||
|
||||
no_facial_area = {
|
||||
"x": None,
|
||||
"y": None,
|
||||
"w": None,
|
||||
"h": None,
|
||||
"left_eye": None,
|
||||
"right_eye": None,
|
||||
}
|
||||
|
||||
def extract_embeddings_and_facial_areas(
|
||||
img_path: Union[str, np.ndarray, List[float]], index: int
|
||||
) -> Tuple[List[List[float]], List[dict]]:
|
||||
"""
|
||||
Extracts facial embeddings and corresponding facial areas from an
|
||||
image or returns pre-calculated embeddings.
|
||||
|
||||
Depending on the type of img_path, the function either extracts
|
||||
facial embeddings from the provided image
|
||||
(via a path or NumPy array) or verifies that the input is a list of
|
||||
pre-calculated embeddings and validates them.
|
||||
|
||||
Args:
|
||||
img_path (Union[str, np.ndarray, List[float]]):
|
||||
- A string representing the file path to an image,
|
||||
- A NumPy array containing the image data,
|
||||
- Or a list of pre-calculated embedding values (of type `float`).
|
||||
index (int): An index value used in error messages and logging
|
||||
to identify the number of the image.
|
||||
|
||||
Returns:
|
||||
Tuple[List[List[float]], List[dict]]:
|
||||
- A list containing lists of facial embeddings for each detected face.
|
||||
- A list of dictionaries where each dictionary contains facial area information.
|
||||
"""
|
||||
if isinstance(img_path, list):
|
||||
# given image is already pre-calculated embedding
|
||||
if not all(isinstance(dim, float) for dim in img_path):
|
||||
raise ValueError(
|
||||
f"When passing img{index}_path as a list,"
|
||||
" ensure that all its items are of type float."
|
||||
)
|
||||
|
||||
if silent is False:
|
||||
logger.warn(
|
||||
f"You passed {index}-th image as pre-calculated embeddings."
|
||||
"Please ensure that embeddings have been calculated"
|
||||
f" for the {model_name} model."
|
||||
)
|
||||
|
||||
if len(img_path) != dims:
|
||||
raise ValueError(
|
||||
f"embeddings of {model_name} should have {dims} dimensions,"
|
||||
f" but {index}-th image has {len(img_path)} dimensions input"
|
||||
)
|
||||
|
||||
img_embeddings = [img_path]
|
||||
img_facial_areas = [no_facial_area]
|
||||
else:
|
||||
try:
|
||||
img_embeddings, img_facial_areas = __extract_faces_and_embeddings(
|
||||
img_path=img_path,
|
||||
model_name=model_name,
|
||||
detector_backend=detector_backend,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
normalization=normalization,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
except ValueError as err:
|
||||
raise ValueError(f"Exception while processing img{index}_path") from err
|
||||
return img_embeddings, img_facial_areas
|
||||
|
||||
img1_embeddings, img1_facial_areas = extract_embeddings_and_facial_areas(img1_path, 1)
|
||||
img2_embeddings, img2_facial_areas = extract_embeddings_and_facial_areas(img2_path, 2)
|
||||
|
||||
min_distance, min_idx, min_idy = float("inf"), None, None
|
||||
for idx, img1_embedding in enumerate(img1_embeddings):
|
||||
for idy, img2_embedding in enumerate(img2_embeddings):
|
||||
distance = find_distance(img1_embedding, img2_embedding, distance_metric)
|
||||
if distance < min_distance:
|
||||
min_distance, min_idx, min_idy = distance, idx, idy
|
||||
|
||||
# find the face pair with minimum distance
|
||||
threshold = threshold or find_threshold(model_name, distance_metric)
|
||||
distance = float(min_distance)
|
||||
facial_areas = (
|
||||
no_facial_area if min_idx is None else img1_facial_areas[min_idx],
|
||||
no_facial_area if min_idy is None else img2_facial_areas[min_idy],
|
||||
)
|
||||
|
||||
toc = time.time()
|
||||
|
||||
resp_obj = {
|
||||
"verified": distance <= threshold,
|
||||
"distance": distance,
|
||||
"threshold": threshold,
|
||||
"model": model_name,
|
||||
"detector_backend": detector_backend,
|
||||
"similarity_metric": distance_metric,
|
||||
"facial_areas": {"img1": facial_areas[0], "img2": facial_areas[1]},
|
||||
"time": round(toc - tic, 2),
|
||||
}
|
||||
|
||||
return resp_obj
|
||||
|
||||
|
||||
def __extract_faces_and_embeddings(
|
||||
img_path: Union[str, np.ndarray],
|
||||
model_name: str = "VGG-Face",
|
||||
detector_backend: str = "opencv",
|
||||
enforce_detection: bool = True,
|
||||
align: bool = True,
|
||||
expand_percentage: int = 0,
|
||||
normalization: str = "base",
|
||||
anti_spoofing: bool = False,
|
||||
) -> Tuple[List[List[float]], List[dict]]:
|
||||
"""
|
||||
Extract facial areas and find corresponding embeddings for given image
|
||||
Returns:
|
||||
embeddings (List[float])
|
||||
facial areas (List[dict])
|
||||
"""
|
||||
embeddings = []
|
||||
facial_areas = []
|
||||
|
||||
img_objs = detection.extract_faces(
|
||||
img_path=img_path,
|
||||
detector_backend=detector_backend,
|
||||
grayscale=False,
|
||||
enforce_detection=enforce_detection,
|
||||
align=align,
|
||||
expand_percentage=expand_percentage,
|
||||
anti_spoofing=anti_spoofing,
|
||||
)
|
||||
|
||||
# find embeddings for each face
|
||||
for img_obj in img_objs:
|
||||
if anti_spoofing is True and img_obj.get("is_real", True) is False:
|
||||
raise ValueError("Spoof detected in given image.")
|
||||
img_embedding_obj = representation.represent(
|
||||
img_path=img_obj["face"],
|
||||
model_name=model_name,
|
||||
enforce_detection=enforce_detection,
|
||||
detector_backend="skip",
|
||||
align=align,
|
||||
normalization=normalization,
|
||||
)
|
||||
# already extracted face given, safe to access its 1st item
|
||||
img_embedding = img_embedding_obj[0]["embedding"]
|
||||
embeddings.append(img_embedding)
|
||||
facial_areas.append(img_obj["facial_area"])
|
||||
|
||||
return embeddings, facial_areas
|
||||
|
||||
|
||||
def find_cosine_distance(
|
||||
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
|
||||
) -> np.float64:
|
||||
"""
|
||||
Find cosine distance between two given vectors
|
||||
Args:
|
||||
source_representation (np.ndarray or list): 1st vector
|
||||
test_representation (np.ndarray or list): 2nd vector
|
||||
Returns
|
||||
distance (np.float64): calculated cosine distance
|
||||
"""
|
||||
if isinstance(source_representation, list):
|
||||
source_representation = np.array(source_representation)
|
||||
|
||||
if isinstance(test_representation, list):
|
||||
test_representation = np.array(test_representation)
|
||||
|
||||
a = np.dot(source_representation, test_representation)
|
||||
b = np.linalg.norm(source_representation)
|
||||
c = np.linalg.norm(test_representation)
|
||||
return 1 - a / (b * c)
|
||||
|
||||
|
||||
def find_euclidean_distance(
|
||||
source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
|
||||
) -> np.float64:
|
||||
"""
|
||||
Find euclidean distance between two given vectors
|
||||
Args:
|
||||
source_representation (np.ndarray or list): 1st vector
|
||||
test_representation (np.ndarray or list): 2nd vector
|
||||
Returns
|
||||
distance (np.float64): calculated euclidean distance
|
||||
"""
|
||||
if isinstance(source_representation, list):
|
||||
source_representation = np.array(source_representation)
|
||||
|
||||
if isinstance(test_representation, list):
|
||||
test_representation = np.array(test_representation)
|
||||
|
||||
return np.linalg.norm(source_representation - test_representation)
|
||||
|
||||
|
||||
def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray:
|
||||
"""
|
||||
Normalize input vector with l2
|
||||
Args:
|
||||
x (np.ndarray or list): given vector
|
||||
Returns:
|
||||
y (np.ndarray): l2 normalized vector
|
||||
"""
|
||||
if isinstance(x, list):
|
||||
x = np.array(x)
|
||||
norm = np.linalg.norm(x)
|
||||
return x if norm == 0 else x / norm
|
||||
|
||||
|
||||
def find_distance(
|
||||
alpha_embedding: Union[np.ndarray, list],
|
||||
beta_embedding: Union[np.ndarray, list],
|
||||
distance_metric: str,
|
||||
) -> np.float64:
|
||||
"""
|
||||
Wrapper to find distance between vectors according to the given distance metric
|
||||
Args:
|
||||
source_representation (np.ndarray or list): 1st vector
|
||||
test_representation (np.ndarray or list): 2nd vector
|
||||
Returns
|
||||
distance (np.float64): calculated cosine distance
|
||||
"""
|
||||
if distance_metric == "cosine":
|
||||
distance = find_cosine_distance(alpha_embedding, beta_embedding)
|
||||
elif distance_metric == "euclidean":
|
||||
distance = find_euclidean_distance(alpha_embedding, beta_embedding)
|
||||
elif distance_metric == "euclidean_l2":
|
||||
distance = find_euclidean_distance(
|
||||
l2_normalize(alpha_embedding), l2_normalize(beta_embedding)
|
||||
)
|
||||
else:
|
||||
raise ValueError("Invalid distance_metric passed - ", distance_metric)
|
||||
return distance
|
||||
|
||||
|
||||
def find_threshold(model_name: str, distance_metric: str) -> float:
|
||||
"""
|
||||
Retrieve pre-tuned threshold values for a model and distance metric pair
|
||||
Args:
|
||||
model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512,
|
||||
OpenFace, DeepFace, DeepID, Dlib, ArcFace, SFace and GhostFaceNet (default is VGG-Face).
|
||||
distance_metric (str): distance metric name. Options are cosine, euclidean
|
||||
and euclidean_l2.
|
||||
Returns:
|
||||
threshold (float): threshold value for that model name and distance metric
|
||||
pair. Distances less than this threshold will be classified same person.
|
||||
"""
|
||||
|
||||
base_threshold = {"cosine": 0.40, "euclidean": 0.55, "euclidean_l2": 0.75}
|
||||
|
||||
thresholds = {
|
||||
# "VGG-Face": {"cosine": 0.40, "euclidean": 0.60, "euclidean_l2": 0.86}, # 2622d
|
||||
"VGG-Face": {
|
||||
"cosine": 0.68,
|
||||
"euclidean": 1.17,
|
||||
"euclidean_l2": 1.17,
|
||||
}, # 4096d - tuned with LFW
|
||||
"Facenet": {"cosine": 0.40, "euclidean": 10, "euclidean_l2": 0.80},
|
||||
"Facenet512": {"cosine": 0.30, "euclidean": 23.56, "euclidean_l2": 1.04},
|
||||
"ArcFace": {"cosine": 0.68, "euclidean": 4.15, "euclidean_l2": 1.13},
|
||||
"Dlib": {"cosine": 0.07, "euclidean": 0.6, "euclidean_l2": 0.4},
|
||||
"SFace": {"cosine": 0.593, "euclidean": 10.734, "euclidean_l2": 1.055},
|
||||
"OpenFace": {"cosine": 0.10, "euclidean": 0.55, "euclidean_l2": 0.55},
|
||||
"DeepFace": {"cosine": 0.23, "euclidean": 64, "euclidean_l2": 0.64},
|
||||
"DeepID": {"cosine": 0.015, "euclidean": 45, "euclidean_l2": 0.17},
|
||||
"GhostFaceNet": {"cosine": 0.65, "euclidean": 35.71, "euclidean_l2": 1.10},
|
||||
}
|
||||
|
||||
threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4)
|
||||
|
||||
return threshold
|
||||
|
Before Width: | Height: | Size: 150 KiB |
|
Before Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 251 KiB |
|
Before Width: | Height: | Size: 62 KiB |
|
Before Width: | Height: | Size: 230 KiB |
|
Before Width: | Height: | Size: 82 KiB |
|
Before Width: | Height: | Size: 48 KiB |
|
Before Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 470 KiB |
|
Before Width: | Height: | Size: 430 KiB |
|
Before Width: | Height: | Size: 490 KiB |
|
Before Width: | Height: | Size: 200 KiB |
|
Before Width: | Height: | Size: 228 KiB |
|
Before Width: | Height: | Size: 259 KiB |
|
Before Width: | Height: | Size: 844 KiB |
|
Before Width: | Height: | Size: 250 KiB |
|
Before Width: | Height: | Size: 216 KiB |
BIN
icon/patreon.png
|
Before Width: | Height: | Size: 4.2 KiB |
|
Before Width: | Height: | Size: 457 KiB |
BIN
icon/stock-1.jpg
|
Before Width: | Height: | Size: 219 KiB |
BIN
icon/stock-2.jpg
|
Before Width: | Height: | Size: 242 KiB |
BIN
icon/stock-3.jpg
|
Before Width: | Height: | Size: 201 KiB |
|
Before Width: | Height: | Size: 170 KiB |
|
Before Width: | Height: | Size: 736 KiB |
|
|
@ -1,3 +0,0 @@
|
|||
{
|
||||
"version": "0.0.94"
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
requests>=2.27.1
|
||||
numpy>=1.14.0
|
||||
pandas>=0.23.4
|
||||
gdown>=3.10.1
|
||||
tqdm>=4.30.0
|
||||
Pillow>=5.2.0
|
||||
opencv-python>=4.5.5.64
|
||||
tensorflow>=1.9.0
|
||||
keras>=2.2.0
|
||||
Flask>=1.1.2
|
||||
flask_cors>=4.0.1
|
||||
mtcnn>=0.1.0
|
||||
retina-face>=0.0.1
|
||||
fire>=0.4.0
|
||||
gunicorn>=20.1.0
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
opencv-contrib-python>=4.3.0.36
|
||||
mediapipe>=0.8.7.3
|
||||
dlib>=19.20.0
|
||||
ultralytics>=8.0.122
|
||||
facenet-pytorch>=2.5.3
|
||||
torch>=2.1.2
|
||||