further sfd improvements, update req

2020-12-19 07:58:10 +00:00
commit cdf86ed3a0
@@ -24,6 +24,7 @@ requirements:
    - scipy
    - opencv
    - tqdm
+    - numba

 about:
  home: https://github.com/1adrianb/face-alignment
@@ -1,19 +1,6 @@
 import math
 import numpy as np
-
-def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
-    xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
-    dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
-    dw, dh = math.log(ww / aww), math.log(hh / ahh)
-    return dx, dy, dw, dh
-
-
-def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
-    xc, yc = dx * aww + axc, dy * ahh + ayc
-    ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
-    x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
-    return x1, y1, x2, y2
-
+from numba import jit

 def nms(dets, thresh):
    if 0 == len(dets):
@@ -62,7 +49,7 @@ def encode(matched, priors, variances):
    # return target for smooth_l1_loss
    return np.concatenate([g_cxcy, g_wh], 1)  # [num_priors,4]

-
+@jit(nopython=True)
 def decode(loc, priors, variances):
    """Decode locations from predictions using priors to undo
    the encoding we did for offset regression at train time.
@@ -3,6 +3,8 @@ import torch.nn.functional as F

 import cv2
 import numpy as np
+from numba import jit
+from numba.typed import List

 from .bbox import *

@@ -26,7 +28,7 @@ def batch_detect(net, img_batch, device):
    if 'cuda' in device:
        torch.backends.cudnn.benchmark = True

-    BB = img_batch.size(0)
+    batch_size = img_batch.size(0)
    img_batch = img_batch.to(device, dtype=torch.float32)

    img_batch = img_batch.flip(-3)  # RGB to BGR
@@ -38,12 +40,16 @@ def batch_detect(net, img_batch, device):
    for i in range(len(olist) // 2):
        olist[i * 2] = F.softmax(olist[i * 2], dim=1)

-    bboxlists = []
-
    olist = [oelem.data.cpu().numpy() for oelem in olist]
-    variances = [0.1, 0.2]
+    
+    bboxlists = get_predictions(List(olist), batch_size)
+    return bboxlists

-    for j in range(BB):
+@jit(nopython=True)
+def get_predictions(olist, batch_size):
+    bboxlists = []
+    variances = [0.1, 0.2]
+    for j in range(batch_size):
        bboxlist = []
        for i in range(len(olist) // 2):
            ocls, oreg = olist[i * 2], olist[i * 2 + 1]
@@ -52,16 +58,17 @@ def batch_detect(net, img_batch, device):
            for Iindex, hindex, windex in poss:
                axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
                score = ocls[j, 1, hindex, windex]
-                loc = oreg[j, :, hindex, windex].resize(1, 4)
+                loc = oreg[j, :, hindex, windex].copy().reshape(1, 4)
                priors = np.array([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
                box = decode(loc, priors, variances)
-                x1, y1, x2, y2 = box[0] * 1.0
+                x1, y1, x2, y2 = box[0]
                bboxlist.append([x1, y1, x2, y2, score])

        bboxlists.append(bboxlist)

    bboxlists = np.array(bboxlists)
-    return bboxlists
+    return bboxlists  
+    


 def flip_detect(net, img, device):
@@ -68,40 +68,40 @@ class s3fd(nn.Module):
        self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
-        h = F.relu(self.conv1_1(x))
-        h = F.relu(self.conv1_2(h))
+        h = F.relu(self.conv1_1(x), inplace=True)
+        h = F.relu(self.conv1_2(h), inplace=True)
        h = F.max_pool2d(h, 2, 2)

-        h = F.relu(self.conv2_1(h))
-        h = F.relu(self.conv2_2(h))
+        h = F.relu(self.conv2_1(h), inplace=True)
+        h = F.relu(self.conv2_2(h), inplace=True)
        h = F.max_pool2d(h, 2, 2)

-        h = F.relu(self.conv3_1(h))
-        h = F.relu(self.conv3_2(h))
-        h = F.relu(self.conv3_3(h))
+        h = F.relu(self.conv3_1(h), inplace=True)
+        h = F.relu(self.conv3_2(h), inplace=True)
+        h = F.relu(self.conv3_3(h), inplace=True)
        f3_3 = h
        h = F.max_pool2d(h, 2, 2)

-        h = F.relu(self.conv4_1(h))
-        h = F.relu(self.conv4_2(h))
-        h = F.relu(self.conv4_3(h))
+        h = F.relu(self.conv4_1(h), inplace=True)
+        h = F.relu(self.conv4_2(h), inplace=True)
+        h = F.relu(self.conv4_3(h), inplace=True)
        f4_3 = h
        h = F.max_pool2d(h, 2, 2)

-        h = F.relu(self.conv5_1(h))
-        h = F.relu(self.conv5_2(h))
-        h = F.relu(self.conv5_3(h))
+        h = F.relu(self.conv5_1(h), inplace=True)
+        h = F.relu(self.conv5_2(h), inplace=True)
+        h = F.relu(self.conv5_3(h), inplace=True)
        f5_3 = h
        h = F.max_pool2d(h, 2, 2)

-        h = F.relu(self.fc6(h))
-        h = F.relu(self.fc7(h))
+        h = F.relu(self.fc6(h), inplace=True)
+        h = F.relu(self.fc7(h), inplace=True)
        ffc7 = h
-        h = F.relu(self.conv6_1(h))
-        h = F.relu(self.conv6_2(h))
+        h = F.relu(self.conv6_1(h), inplace=True)
+        h = F.relu(self.conv6_2(h), inplace=True)
        f6_2 = h
-        h = F.relu(self.conv7_1(h))
-        h = F.relu(self.conv7_2(h))
+        h = F.relu(self.conv7_1(h), inplace=True)
+        h = F.relu(self.conv7_2(h), inplace=True)
        f7_2 = h

        f3_3 = self.conv3_3_norm(f3_3)
@@ -1,3 +1,4 @@
 opencv-python
 scipy>=0.17.0
 scikit-image
+numba
@@ -42,6 +42,7 @@ requirements = [
    'scikit-image',
    'opencv-python',
    'tqdm',
+    'numba',
    'enum34;python_version<"3.4"'
 ]