122 lines
3.4 KiB
Python
122 lines
3.4 KiB
Python
|
|
# built-in dependencies
|
|||
|
|
from typing import Tuple
|
|||
|
|
|
|||
|
|
# 3rd party
|
|||
|
|
import numpy as np
|
|||
|
|
import cv2
|
|||
|
|
|
|||
|
|
# project dependencies
|
|||
|
|
from deepface.commons import package_utils
|
|||
|
|
|
|||
|
|
|
|||
|
|
tf_major_version = package_utils.get_tf_major_version()
|
|||
|
|
if tf_major_version == 1:
|
|||
|
|
from keras.preprocessing import image
|
|||
|
|
elif tf_major_version == 2:
|
|||
|
|
from tensorflow.keras.preprocessing import image
|
|||
|
|
|
|||
|
|
|
|||
|
|
def normalize_input(img: np.ndarray, normalization: str = "base") -> np.ndarray:
|
|||
|
|
"""Normalize input image.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
img (numpy array): the input image.
|
|||
|
|
normalization (str, optional): the normalization technique. Defaults to "base",
|
|||
|
|
for no normalization.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
numpy array: the normalized image.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
# issue 131 declares that some normalization techniques improves the accuracy
|
|||
|
|
|
|||
|
|
if normalization == "base":
|
|||
|
|
return img
|
|||
|
|
|
|||
|
|
# @trevorgribble and @davedgd contributed this feature
|
|||
|
|
# restore input in scale of [0, 255] because it was normalized in scale of
|
|||
|
|
# [0, 1] in preprocess_face
|
|||
|
|
img *= 255
|
|||
|
|
|
|||
|
|
if normalization == "raw":
|
|||
|
|
pass # return just restored pixels
|
|||
|
|
|
|||
|
|
elif normalization == "Facenet":
|
|||
|
|
mean, std = img.mean(), img.std()
|
|||
|
|
img = (img - mean) / std
|
|||
|
|
|
|||
|
|
elif normalization == "Facenet2018":
|
|||
|
|
# simply / 127.5 - 1 (similar to facenet 2018 model preprocessing step as @iamrishab posted)
|
|||
|
|
img /= 127.5
|
|||
|
|
img -= 1
|
|||
|
|
|
|||
|
|
elif normalization == "VGGFace":
|
|||
|
|
# mean subtraction based on VGGFace1 training data
|
|||
|
|
img[..., 0] -= 93.5940
|
|||
|
|
img[..., 1] -= 104.7624
|
|||
|
|
img[..., 2] -= 129.1863
|
|||
|
|
|
|||
|
|
elif normalization == "VGGFace2":
|
|||
|
|
# mean subtraction based on VGGFace2 training data
|
|||
|
|
img[..., 0] -= 91.4953
|
|||
|
|
img[..., 1] -= 103.8827
|
|||
|
|
img[..., 2] -= 131.0912
|
|||
|
|
|
|||
|
|
elif normalization == "ArcFace":
|
|||
|
|
# Reference study: The faces are cropped and resized to 112×112,
|
|||
|
|
# and each pixel (ranged between [0, 255]) in RGB images is normalised
|
|||
|
|
# by subtracting 127.5 then divided by 128.
|
|||
|
|
img -= 127.5
|
|||
|
|
img /= 128
|
|||
|
|
else:
|
|||
|
|
raise ValueError(f"unimplemented normalization type - {normalization}")
|
|||
|
|
|
|||
|
|
return img
|
|||
|
|
|
|||
|
|
|
|||
|
|
def resize_image(img: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
|
|||
|
|
"""
|
|||
|
|
Resize an image to expected size of a ml model with adding black pixels.
|
|||
|
|
Args:
|
|||
|
|
img (np.ndarray): pre-loaded image as numpy array
|
|||
|
|
target_size (tuple): input shape of ml model
|
|||
|
|
Returns:
|
|||
|
|
img (np.ndarray): resized input image
|
|||
|
|
"""
|
|||
|
|
factor_0 = target_size[0] / img.shape[0]
|
|||
|
|
factor_1 = target_size[1] / img.shape[1]
|
|||
|
|
factor = min(factor_0, factor_1)
|
|||
|
|
|
|||
|
|
dsize = (
|
|||
|
|
int(img.shape[1] * factor),
|
|||
|
|
int(img.shape[0] * factor),
|
|||
|
|
)
|
|||
|
|
img = cv2.resize(img, dsize)
|
|||
|
|
|
|||
|
|
diff_0 = target_size[0] - img.shape[0]
|
|||
|
|
diff_1 = target_size[1] - img.shape[1]
|
|||
|
|
|
|||
|
|
# Put the base image in the middle of the padded image
|
|||
|
|
img = np.pad(
|
|||
|
|
img,
|
|||
|
|
(
|
|||
|
|
(diff_0 // 2, diff_0 - diff_0 // 2),
|
|||
|
|
(diff_1 // 2, diff_1 - diff_1 // 2),
|
|||
|
|
(0, 0),
|
|||
|
|
),
|
|||
|
|
"constant",
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# double check: if target image is not still the same size with target.
|
|||
|
|
if img.shape[0:2] != target_size:
|
|||
|
|
img = cv2.resize(img, target_size)
|
|||
|
|
|
|||
|
|
# make it 4-dimensional how ML models expect
|
|||
|
|
img = image.img_to_array(img)
|
|||
|
|
img = np.expand_dims(img, axis=0)
|
|||
|
|
|
|||
|
|
if img.max() > 1:
|
|||
|
|
img = (img.astype(np.float32) / 255.0).astype(np.float32)
|
|||
|
|
|
|||
|
|
return img
|