diff --git a/faceboxes_pytorch/faceboxes_face_detector.py b/faceboxes_pytorch/faceboxes_face_detector.py index 835cafc6..3676d5b6 100755 --- a/faceboxes_pytorch/faceboxes_face_detector.py +++ b/faceboxes_pytorch/faceboxes_face_detector.py @@ -4,11 +4,13 @@ from .data import cfg from .layers.functions.prior_box import PriorBox from .models.faceboxes import FaceBoxes -from .utils.box_utils import decode +from .utils.box_utils import decode, batch_decode, get_faceboxes_max_batch_size +from typing import List, Tuple + +import cv2 class FaceBoxesFaceDetector(object): def __init__(self, use_gpu=False): - torch.set_grad_enabled(False) # net and model self.net = FaceBoxes(phase='test', size=None, num_classes=2) # initialize detector weight_path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'weights/FaceBoxes.pth')) @@ -55,6 +57,7 @@ def load_model(self, model, pretrained_path, load_to_cpu): model.load_state_dict(pretrained_dict, strict=False) return model + @torch.no_grad() def get_faceboxes(self, image, threshold=0.2): resize = 1 @@ -90,3 +93,126 @@ def get_faceboxes(self, image, threshold=0.2): scores, boxes = zip(*scores_and_boxes) return scores, boxes + + @torch.no_grad() + def get_batch_faceboxes(self, image_list:List[np.ndarray], *, batch_size=-1, threshold=0.2)->List[Tuple[List[float],List[np.ndarray]]]: + """ + image_list: List[np.ndarray] 同じサイズの画像のリスト + batch_size: image_listの中から何個処理するかを指定する. デフォルトの場合はgpuの空きメモリから算出する. 値は2以上にすること + threshold: faceboxesのconfの閾値 + + return: List[Tuple[confのリスト],[BoundingBoxのリスト]] + """ + + if len(image_list) == 0: + return [([],[])] + + if (batch_size == 0) or (batch_size == 1): + raise ValueError("batch_size must be greater than or equal to 2.") + + im_height, im_width, im_ch = image_list[0].shape + + if (batch_size == -1) and (torch.cuda.is_available()): + # batch_sizeが未指定の場合はgpuの空きメモリと画像1枚あたりの容量から許容枚数を算出し、安全マージンの7掛けした値をbatch_sizeとして使用する + torch.cuda.empty_cache() + batch_size = get_faceboxes_max_batch_size(width=im_width, height=im_height, ch=im_ch) + batch_size = int(batch_size * 0.7) + + if (batch_size == -1) and (not torch.cuda.is_available()): + batch_size = len(image_list) + + batch_size = min(len(image_list), batch_size) + resize = 1 + + results = [] + + priorbox = PriorBox(cfg, image_size=(im_height, im_width)) + priors = priorbox.forward() + priors = priors.to(self.device) + prior_data = priors.data + + for i in range(0, int(len(image_list) / batch_size) + 1): + images = image_list[batch_size * i: min(len(image_list), batch_size * (i+1))] + + if len(images) == 0: + break + + if len(images) == 1: + scores, boxes = self.get_faceboxes(images[0], threshold) + results.append((scores, boxes)) + break + + im_height, im_width, im_ch = images[0].shape + scale = torch.Tensor([im_width, im_height, im_width, im_height]) + images = np.array(images).reshape(len(images), im_height, im_width, im_ch) + + imgs = torch.from_numpy(images) + imgs = imgs.to(self.device) + scale = scale.to(self.device) + + imgs = imgs.to(torch.float32) + imgs = torch.sub(imgs, torch.tensor((104,117,123)).to(self.device)) + imgs = imgs.permute(0,3,1,2) + + loc, conf = self.net(imgs) + + boxes = batch_decode(loc.data.squeeze(0), prior_data, cfg['variance']) + boxes = boxes * scale / resize + + boxes = boxes.cpu().numpy() + scores = conf.data.cpu().numpy()[:, :, 1] + + thres_tuple = torch.nonzero(torch.where(conf[:,:,1] > threshold, 1, 0), as_tuple=True) + image_inds = thres_tuple[0].data.cpu().numpy() + boxes_inds = thres_tuple[1].data.cpu().numpy() + + # ここgpu化しなくても処理時間が支配的ではないのでcpuでやる + result = [] + for i in range(0, len(images)): + inds = boxes_inds[np.where(image_inds == i)] + if len(inds) == 0: + result.append(([],[])) + continue + + scores_and_boxes = zip(scores[i][inds], boxes[i][inds]) + scores_and_boxes = sorted(scores_and_boxes, key=lambda x: -x[0]) + s, b = zip(*scores_and_boxes) + result.append((s,b)) + + results += result + + # 以降gpu上の画像は使わないので解放する + del imgs + + return results + + @torch.no_grad() + def get_batch_faceboxes_with_resize(self, image_list:List[np.ndarray], *, resize_target_width=360, resize_target_height=640, + batch_size=-1, threshold=0.2)->List[Tuple[List[float],List[np.ndarray]]]: + """ + image_list: List[np.ndarray] 画像のリスト 内部でresizeするため、同じ大きさである必要はない + resize_target_width: 内部でresizeするときのwidth + resize_target_height: 内部でresizeするときのheight + 上二つのデフォルト値はアスペクト比9:16にしている + batch_size: image_listの中から何個処理するかを指定する. デフォルトの場合はgpuの空きメモリから算出する.値は2以上にすること + threshold: faceboxesのconfの閾値 + + return: List[Tuple[confのリスト],[BoundingBoxのリスト]] + """ + + resize_target_resize_size = (resize_target_width, resize_target_height) + shapes = [i.shape for i in image_list] + resized_images = [cv2.resize(i, resize_target_resize_size) for i in image_list] + + results = self.get_batch_faceboxes(resized_images, batch_size=batch_size, threshold=threshold) + + new_results = [] + for shape, (confs, boxes) in zip(shapes, results): + h, w = shape[:2] + rh = h / resize_target_height + rw = w / resize_target_width + ratios = [rw, rh, rw, rh] + new_result = (confs, [b * ratios for b in boxes]) + new_results.append(new_result) + + return new_results diff --git a/faceboxes_pytorch/utils/box_utils.py b/faceboxes_pytorch/utils/box_utils.py index 4797f1d7..067fda58 100755 --- a/faceboxes_pytorch/utils/box_utils.py +++ b/faceboxes_pytorch/utils/box_utils.py @@ -1,6 +1,20 @@ import torch import numpy as np +import subprocess + +DEFAULT_ATTRIBUTES = ( + 'index', + 'uuid', + 'name', + 'timestamp', + 'memory.total', + 'memory.free', + 'memory.used', + 'utilization.gpu', + 'utilization.memory' +) + def point_form(boxes): """ Convert prior_boxes to (xmin, ymin, xmax, ymax) @@ -274,3 +288,37 @@ def nms(boxes, scores, overlap=0.5, top_k=200): return keep, count +# https://qiita.com/tomotaka_ito/items/1da001c98b46ecf28ec7 +def get_gpu_info(nvidia_smi_path='nvidia-smi', keys=DEFAULT_ATTRIBUTES, no_units=True): + nu_opt = '' if not no_units else ',nounits' + cmd = '%s --query-gpu=%s --format=csv,noheader%s' % (nvidia_smi_path, ','.join(keys), nu_opt) + output = subprocess.check_output(cmd, shell=True) + lines = output.decode().split('\n') + lines = [ line.strip() for line in lines if line.strip() != '' ] + + return [ { k: v for k, v in zip(keys, line.split(', ')) } for line in lines ] + + +def get_faceboxes_max_batch_size(width=1080, height=1920, ch=3, print_status_flg=False): + gpu_info = get_gpu_info() + if print_status_flg: + print(gpu_info) + + # faceboxesは内部でfloat32に変換するため1ch4byte + image_bit = width * height * ch * 4 + image_mega_byte = image_bit / 1024 / 1024 + gpu_batch_sizes = [int(int(info['memory.free']) / image_mega_byte) for info in gpu_info] + + if print_status_flg: + print(f'gpu_batch_sizes: {gpu_batch_sizes}') + + return min(gpu_batch_sizes) + + +def batch_decode(loc, priors, variances): + boxes = torch.cat(( + priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2) + boxes[:, :, :2] -= boxes[:, :, 2:] / 2 + boxes[:, :, 2:] += boxes[:, :, :2] + return boxes diff --git a/pyproject.toml b/pyproject.toml index f12fd9e3..f6bad8ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "faceboxes_pytorch" -version = "0.1.2" +version = "0.1.3" description = "" authors = ["Your Name "]