Liquid-dev · yujidn · Feb 18, 2022 · Dec 8, 2021 · Dec 8, 2021 · Dec 8, 2021
diff --git a/faceboxes_pytorch/faceboxes_face_detector.py b/faceboxes_pytorch/faceboxes_face_detector.py
@@ -4,11 +4,13 @@
 from .data import cfg
 from .layers.functions.prior_box import PriorBox
 from .models.faceboxes import FaceBoxes
-from .utils.box_utils import decode
+from .utils.box_utils import decode, batch_decode, get_faceboxes_max_batch_size
+from typing import List, Tuple
+
+import cv2
 
 class FaceBoxesFaceDetector(object):
     def __init__(self, use_gpu=False):
-        torch.set_grad_enabled(False)
         # net and model
         self.net = FaceBoxes(phase='test', size=None, num_classes=2)    # initialize detector
         weight_path = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'weights/FaceBoxes.pth'))
@@ -55,6 +57,7 @@ def load_model(self, model, pretrained_path, load_to_cpu):
         model.load_state_dict(pretrained_dict, strict=False)
         return model
 
+    @torch.no_grad()
     def get_faceboxes(self, image, threshold=0.2):
         resize = 1
 
@@ -90,3 +93,126 @@ def get_faceboxes(self, image, threshold=0.2):
         scores, boxes = zip(*scores_and_boxes)
 
         return scores, boxes
+
+    @torch.no_grad()
+    def get_batch_faceboxes(self, image_list:List[np.ndarray], *, batch_size=-1, threshold=0.2)->List[Tuple[List[float],List[np.ndarray]]]:
+        """
+        image_list: List[np.ndarray] 同じサイズの画像のリスト
+        batch_size: image_listの中から何個処理するかを指定する. デフォルトの場合はgpuの空きメモリから算出する.　値は2以上にすること
+        threshold: faceboxesのconfの閾値
+
+        return: List[Tuple[confのリスト],[BoundingBoxのリスト]]
+        """
+
+        if len(image_list) == 0:
+            return [([],[])]
+
+        if (batch_size == 0) or (batch_size == 1):
+            raise ValueError("batch_size must be greater than or equal to 2.")
+
+        im_height, im_width, im_ch = image_list[0].shape
+
+        if (batch_size == -1) and (torch.cuda.is_available()):
+            # batch_sizeが未指定の場合はgpuの空きメモリと画像1枚あたりの容量から許容枚数を算出し、安全マージンの7掛けした値をbatch_sizeとして使用する
+            torch.cuda.empty_cache()
+            batch_size = get_faceboxes_max_batch_size(width=im_width, height=im_height, ch=im_ch)
+            batch_size = int(batch_size * 0.7)
+
+        if (batch_size == -1) and (not torch.cuda.is_available()):
+            batch_size = len(image_list)
+
+        batch_size = min(len(image_list), batch_size)
+        resize = 1
+
+        results = []
+
+        priorbox = PriorBox(cfg, image_size=(im_height, im_width))
+        priors = priorbox.forward()
+        priors = priors.to(self.device)
+        prior_data = priors.data
+
+        for i in range(0, int(len(image_list) / batch_size) + 1):
+            images = image_list[batch_size * i: min(len(image_list), batch_size * (i+1))]
+
+            if len(images) == 0:
+                break
+
+            if len(images) == 1:
+                scores, boxes = self.get_faceboxes(images[0], threshold)
+                results.append((scores, boxes))
+                break
+
+            im_height, im_width, im_ch = images[0].shape
+            scale = torch.Tensor([im_width, im_height, im_width, im_height])
+            images = np.array(images).reshape(len(images), im_height, im_width, im_ch)
+
+            imgs = torch.from_numpy(images)
+            imgs = imgs.to(self.device)
+            scale = scale.to(self.device)
+
+            imgs = imgs.to(torch.float32)
+            imgs = torch.sub(imgs, torch.tensor((104,117,123)).to(self.device))
+            imgs = imgs.permute(0,3,1,2)
+
+            loc, conf = self.net(imgs)
+
+            boxes = batch_decode(loc.data.squeeze(0), prior_data, cfg['variance'])
+            boxes = boxes * scale / resize
+
+            boxes = boxes.cpu().numpy()
+            scores = conf.data.cpu().numpy()[:, :, 1]
+
+            thres_tuple = torch.nonzero(torch.where(conf[:,:,1] > threshold, 1, 0), as_tuple=True)
+            image_inds = thres_tuple[0].data.cpu().numpy()
+            boxes_inds = thres_tuple[1].data.cpu().numpy()
+
+            # ここgpu化しなくても処理時間が支配的ではないのでcpuでやる
+            result = []
+            for i in range(0, len(images)):
+                inds = boxes_inds[np.where(image_inds == i)]
+                if len(inds) == 0:
+                    result.append(([],[]))
+                    continue
+
+                scores_and_boxes = zip(scores[i][inds], boxes[i][inds])
+                scores_and_boxes = sorted(scores_and_boxes, key=lambda x: -x[0])
+                s, b = zip(*scores_and_boxes)
+                result.append((s,b))
+
+            results += result
+
+            # 以降gpu上の画像は使わないので解放する
+            del imgs
+
+        return results
+
+    @torch.no_grad()
+    def get_batch_faceboxes_with_resize(self, image_list:List[np.ndarray], *, resize_target_width=360, resize_target_height=640,
+                                    batch_size=-1, threshold=0.2)->List[Tuple[List[float],List[np.ndarray]]]:
+        """
+        image_list: List[np.ndarray] 画像のリスト 内部でresizeするため、同じ大きさである必要はない
+        resize_target_width: 内部でresizeするときのwidth
+        resize_target_height: 内部でresizeするときのheight
+          上二つのデフォルト値はアスペクト比9:16にしている
+        batch_size: image_listの中から何個処理するかを指定する. デフォルトの場合はgpuの空きメモリから算出する.値は2以上にすること
+        threshold: faceboxesのconfの閾値
+
+        return: List[Tuple[confのリスト],[BoundingBoxのリスト]]
+        """
+
+        resize_target_resize_size = (resize_target_width, resize_target_height)
+        shapes = [i.shape for i in image_list]
+        resized_images = [cv2.resize(i, resize_target_resize_size) for i in image_list]
+
+        results = self.get_batch_faceboxes(resized_images, batch_size=batch_size, threshold=threshold)
+
+        new_results = []
+        for shape, (confs, boxes) in zip(shapes, results):
+            h, w = shape[:2]
+            rh = h / resize_target_height
+            rw = w / resize_target_width
+            ratios = [rw, rh, rw, rh]
+            new_result = (confs, [b * ratios for b in boxes])
+            new_results.append(new_result)
+
+        return new_results
diff --git a/faceboxes_pytorch/utils/box_utils.py b/faceboxes_pytorch/utils/box_utils.py
@@ -1,6 +1,20 @@
 import torch
 import numpy as np
 
+import subprocess
+
+DEFAULT_ATTRIBUTES = (
+    'index',
+    'uuid',
+    'name',
+    'timestamp',
+    'memory.total',
+    'memory.free',
+    'memory.used',
+    'utilization.gpu',
+    'utilization.memory'
+)
+
 
 def point_form(boxes):
     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
@@ -274,3 +288,37 @@ def nms(boxes, scores, overlap=0.5, top_k=200):
     return keep, count
 
 
+# https://qiita.com/tomotaka_ito/items/1da001c98b46ecf28ec7
+def get_gpu_info(nvidia_smi_path='nvidia-smi', keys=DEFAULT_ATTRIBUTES, no_units=True):
+    nu_opt = '' if not no_units else ',nounits'
+    cmd = '%s --query-gpu=%s --format=csv,noheader%s' % (nvidia_smi_path, ','.join(keys), nu_opt)
+    output = subprocess.check_output(cmd, shell=True)
+    lines = output.decode().split('\n')
+    lines = [ line.strip() for line in lines if line.strip() != '' ]
+
+    return [ { k: v for k, v in zip(keys, line.split(', ')) } for line in lines ]
+
+
+def get_faceboxes_max_batch_size(width=1080, height=1920, ch=3, print_status_flg=False):
+    gpu_info = get_gpu_info()
+    if print_status_flg:
+        print(gpu_info)
+
+    # faceboxesは内部でfloat32に変換するため1ch4byte
+    image_bit = width * height * ch * 4
+    image_mega_byte = image_bit / 1024 / 1024
+    gpu_batch_sizes = [int(int(info['memory.free']) / image_mega_byte) for info in gpu_info]
+
+    if print_status_flg:
+        print(f'gpu_batch_sizes: {gpu_batch_sizes}')
+
+    return  min(gpu_batch_sizes)
+
+
+def batch_decode(loc, priors, variances):
+    boxes = torch.cat((
+        priors[:, :2] + loc[:, :, :2] * variances[0] * priors[:, 2:],
+        priors[:, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
+    boxes[:, :, :2] -= boxes[:, :, 2:] / 2
+    boxes[:, :, 2:] += boxes[:, :, :2]
+    return boxes
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "faceboxes_pytorch"
-version = "0.1.2"
+version = "0.1.3"
 description = ""
 authors = ["Your Name <you@example.com>"]