From e9f8dfb57bdcbeb73b57b66041f7a34b9fc91c4a Mon Sep 17 00:00:00 2001
From: Feng Ni <nemonameless@qq.com>
Date: Fri, 22 Apr 2022 19:09:49 +0800
Subject: [PATCH] [cherry-pick] fix yolox cpp infer (#5805)

* fix yolox cpp infer

* fix cpp infer order, test=document_fix
---
 configs/yolox/_base_/yolox_reader.yml  |  8 ++--
 configs/yolox/yolox_nano_300e_coco.yml |  8 ++--
 configs/yolox/yolox_tiny_300e_coco.yml |  8 ++--
 deploy/cpp/include/preprocess_op.h     | 16 +++++++
 deploy/cpp/src/preprocess_op.cc        | 18 +++++++
 deploy/python/preprocess.py            | 65 ++++----------------------
 6 files changed, 55 insertions(+), 68 deletions(-)
diff --git a/configs/yolox/_base_/yolox_reader.yml b/configs/yolox/_base_/yolox_reader.yml
index 65763881788..a33b847b159 100644
--- a/configs/yolox/_base_/yolox_reader.yml
+++ b/configs/yolox/_base_/yolox_reader.yml
@@ -27,8 +27,8 @@ TrainReader:
 EvalReader:
   sample_transforms:
     - Decode: {}
-    - Resize: {target_size: 640, keep_ratio: True}
-    - Pad: {size: 640, fill_value: [114., 114., 114.]}
+    - Resize: {target_size: [640, 640], keep_ratio: True, interp: 1}
+    - Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
     - Permute: {}
   batch_size: 4
 
@@ -38,7 +38,7 @@ TestReader:
     image_shape: [3, 640, 640]
   sample_transforms:
     - Decode: {}
-    - Resize: {target_size: 640, keep_ratio: True}
-    - Pad: {size: 640, fill_value: [114., 114., 114.]}
+    - Resize: {target_size: [640, 640], keep_ratio: True, interp: 1}
+    - Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
     - Permute: {}
   batch_size: 1
diff --git a/configs/yolox/yolox_nano_300e_coco.yml b/configs/yolox/yolox_nano_300e_coco.yml
index 79f96131790..80b8b5c51fb 100644
--- a/configs/yolox/yolox_nano_300e_coco.yml
+++ b/configs/yolox/yolox_nano_300e_coco.yml
@@ -64,8 +64,8 @@ TrainReader:
 EvalReader:
   sample_transforms:
     - Decode: {}
-    - Resize: {target_size: 416, keep_ratio: True}
-    - Pad: {size: 416, fill_value: [114., 114., 114.]}
+    - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1}
+    - Pad: {size: [416, 416], fill_value: [114., 114., 114.]}
     - Permute: {}
   batch_size: 8
 
@@ -75,7 +75,7 @@ TestReader:
     image_shape: [3, 416, 416]
   sample_transforms:
     - Decode: {}
-    - Resize: {target_size: 416, keep_ratio: True}
-    - Pad: {size: 416, fill_value: [114., 114., 114.]}
+    - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1}
+    - Pad: {size: [416, 416], fill_value: [114., 114., 114.]}
     - Permute: {}
   batch_size: 1
diff --git a/configs/yolox/yolox_tiny_300e_coco.yml b/configs/yolox/yolox_tiny_300e_coco.yml
index 6e32c485187..c81c172d279 100644
--- a/configs/yolox/yolox_tiny_300e_coco.yml
+++ b/configs/yolox/yolox_tiny_300e_coco.yml
@@ -52,8 +52,8 @@ TrainReader:
 EvalReader:
   sample_transforms:
     - Decode: {}
-    - Resize: {target_size: 416, keep_ratio: True}
-    - Pad: {size: 416, fill_value: [114., 114., 114.]}
+    - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1}
+    - Pad: {size: [416, 416], fill_value: [114., 114., 114.]}
     - Permute: {}
   batch_size: 8
 
@@ -63,7 +63,7 @@ TestReader:
     image_shape: [3, 416, 416]
   sample_transforms:
     - Decode: {}
-    - Resize: {target_size: 416, keep_ratio: True}
-    - Pad: {size: 416, fill_value: [114., 114., 114.]}
+    - Resize: {target_size: [416, 416], keep_ratio: True, interp: 1}
+    - Pad: {size: [416, 416], fill_value: [114., 114., 114.]}
     - Permute: {}
   batch_size: 1
diff --git a/deploy/cpp/include/preprocess_op.h b/deploy/cpp/include/preprocess_op.h
index 2d24799a332..a54bc2afb8a 100644
--- a/deploy/cpp/include/preprocess_op.h
+++ b/deploy/cpp/include/preprocess_op.h
@@ -161,6 +161,20 @@ class WarpAffine : public PreprocessOp {
   int pad_ = 31;
 };
 
+class Pad : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    size_ = item["size"].as<std::vector<int>>();
+    fill_value_ = item["fill_value"].as<std::vector<float>>();
+  }
+
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  std::vector<int> size_;
+  std::vector<float> fill_value_;
+};
+
 void CropImg(cv::Mat& img,
              cv::Mat& crop_img,
              std::vector<int>& area,
@@ -203,6 +217,8 @@ class Preprocessor {
       return std::make_shared<TopDownEvalAffine>();
     } else if (name == "WarpAffine") {
       return std::make_shared<WarpAffine>();
+    }else if (name == "Pad") {
+      return std::make_shared<Pad>();
     }
     std::cerr << "can not find function of OP: " << name
               << " and return: nullptr" << std::endl;
diff --git a/deploy/cpp/src/preprocess_op.cc b/deploy/cpp/src/preprocess_op.cc
index 98c700f6219..6147555be57 100644
--- a/deploy/cpp/src/preprocess_op.cc
+++ b/deploy/cpp/src/preprocess_op.cc
@@ -229,6 +229,23 @@ void WarpAffine::Run(cv::Mat* im, ImageBlob* data) {
   };
 }
 
+void Pad::Run(cv::Mat* im, ImageBlob* data) {
+  int h = size_[0];
+  int w = size_[1];
+  int rh = im->rows;
+  int rw = im->cols;
+  if (h == rh && w == rw){
+    data->in_net_im_ = im->clone();
+    return;
+  }
+  cv::copyMakeBorder(
+      *im, *im, 0, h - rh, 0, w - rw, cv::BORDER_CONSTANT, cv::Scalar(114));
+  data->in_net_im_ = im->clone();
+  data->in_net_shape_ = {
+      static_cast<float>(im->rows), static_cast<float>(im->cols),
+  };
+}
+
 // Preprocessor op running order
 const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
                                                           "TopDownEvalAffine",
@@ -237,6 +254,7 @@ const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
                                                           "WarpAffine",
                                                           "NormalizeImage",
                                                           "PadStride",
+                                                          "Pad",
                                                           "Permute"};
 
 void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
diff --git a/deploy/python/preprocess.py b/deploy/python/preprocess.py
index b8cf256d508..31536477585 100644
--- a/deploy/python/preprocess.py
+++ b/deploy/python/preprocess.py
@@ -247,77 +247,30 @@ def __call__(self, im, im_info):
 
 
 class Pad(object):
-    def __init__(self,
-                 size=None,
-                 size_divisor=32,
-                 pad_mode=0,
-                 offsets=None,
-                 fill_value=(127.5, 127.5, 127.5)):
+    def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
         """
-        Pad image to a specified size or multiple of size_divisor.
+        Pad image to a specified size.
         Args:
-            size (int, Sequence): image target size, if None, pad to multiple of size_divisor, default None
-            size_divisor (int): size divisor, default 32
-            pad_mode (int): pad mode, currently only supports four modes [-1, 0, 1, 2]. if -1, use specified offsets
-                if 0, only pad to right and bottom. if 1, pad according to center. if 2, only pad left and top
-            offsets (list): [offset_x, offset_y], specify offset while padding, only supported pad_mode=-1
-            fill_value (bool): rgb value of pad area, default (127.5, 127.5, 127.5)
+            size (list[int]): image target size
+            fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
         """
         super(Pad, self).__init__()
         if isinstance(size, int):
             size = [size, size]
-
-        assert pad_mode in [
-            -1, 0, 1, 2
-        ], 'currently only supports four modes [-1, 0, 1, 2]'
-        if pad_mode == -1:
-            assert offsets, 'if pad_mode is -1, offsets should not be None'
-
         self.size = size
-        self.size_divisor = size_divisor
-        self.pad_mode = pad_mode
         self.fill_value = fill_value
-        self.offsets = offsets
-
-    def apply_image(self, image, offsets, im_size, size):
-        x, y = offsets
-        im_h, im_w = im_size
-        h, w = size
-        canvas = np.ones((h, w, 3), dtype=np.float32)
-        canvas *= np.array(self.fill_value, dtype=np.float32)
-        canvas[y:y + im_h, x:x + im_w, :] = image.astype(np.float32)
-        return canvas
 
     def __call__(self, im, im_info):
         im_h, im_w = im.shape[:2]
-        if self.size:
-            h, w = self.size
-            assert (
-                im_h <= h and im_w <= w
-            ), '(h, w) of target size should be greater than (im_h, im_w)'
-        else:
-            h = int(np.ceil(im_h / self.size_divisor) * self.size_divisor)
-            w = int(np.ceil(im_w / self.size_divisor) * self.size_divisor)
-
+        h, w = self.size
         if h == im_h and w == im_w:
             im = im.astype(np.float32)
             return im, im_info
 
-        if self.pad_mode == -1:
-            offset_x, offset_y = self.offsets
-        elif self.pad_mode == 0:
-            offset_y, offset_x = 0, 0
-        elif self.pad_mode == 1:
-            offset_y, offset_x = (h - im_h) // 2, (w - im_w) // 2
-        else:
-            offset_y, offset_x = h - im_h, w - im_w
-
-        offsets, im_size, size = [offset_x, offset_y], [im_h, im_w], [h, w]
-        im = self.apply_image(im, offsets, im_size, size)
-
-        if self.pad_mode == 0:
-            return im, im_info
-
+        canvas = np.ones((h, w, 3), dtype=np.float32)
+        canvas *= np.array(self.fill_value, dtype=np.float32)
+        canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
+        im = canvas
         return im, im_info