Added roi_align layoutinfer & cases (#615)

* Added roi_align layoutinfer & cases Type: New feature Signed-off-by: Chen <[email protected]> * Update instancenorm op spec .json Type: bug fix Signed-off-by: Chen <[email protected]> * Added roi_pool layoutinfer & fixed case bug Type: new feature Signed-off-by: Chen <[email protected]> --------- Signed-off-by: Chen <[email protected]> Co-authored-by: Chen <[email protected]>
VeriSilicon · Jul 8, 2023 · ea8046e · ea8046e
1 parent 32c5a61
commit ea8046e
Show file tree

Hide file tree

Showing 12 changed files with 311 additions and 19 deletions.
diff --git a/include/tim/vx/ops/instanceNormalization.json b/include/tim/vx/ops/instanceNormalization.json
@@ -6,6 +6,14 @@
                  "dtype": "float",
                  "Optional": "true",
                  "default": "1e-5f"
+                },
+                {"name": "input_layout",
+                    "dtype": "tim::vx::DataLayout",
+                    "Optional": "true",
+                    "default": "tim::vx::DataLayout::WHCN",
+                    "range":["tim::vx::DataLayout::ANY",
+                             "tim::vx::DataLayout::WHCN",
+                             "tim::vx::DataLayout::CWHN"]
                 }
             ]
     }

diff --git a/include/tim/vx/ops/roi_align.h b/include/tim/vx/ops/roi_align.h
@@ -51,7 +51,7 @@ class RoiAlign : public BuiltinOp {
  public:
   RoiAlign(Graph* graph, int32_t output_height, int32_t output_width,
             float height_ratio, float width_ratio, int32_t height_sample_num,
-            int32_t width_sample_num);
+            int32_t width_sample_num, DataLayout input_layout = DataLayout::WHCN);
 
   std::shared_ptr<Operation> Clone(
       std::shared_ptr<Graph>& graph) const override;

diff --git a/include/tim/vx/ops/roi_pool.h b/include/tim/vx/ops/roi_pool.h
@@ -37,17 +37,17 @@ namespace ops {
  *
  * Select and scale the feature map of each region of interest to a unified output
  * size by max-pooling.
- * 
+ *
  *    pool_type : only support max-pooling  (MAX)
- *    scale : The ratio of image to feature map (Range: 0 < scale <= 1) 
+ *    scale : The ratio of image to feature map (Range: 0 < scale <= 1)
  *    size : The size of roi pooling (height/width)
  *
  */
 
 class RoiPool : public BuiltinOp {
  public:
-  RoiPool(Graph* graph, PoolType type, float scale,
-            const std::array<uint32_t, 2>& size);
+  RoiPool(Graph* graph, PoolType type, float scale, const std::array<uint32_t, 2>& size,
+          DataLayout input_layout = DataLayout::WHCN);
 
   std::shared_ptr<Operation> Clone(
       std::shared_ptr<Graph>& graph) const override;

diff --git a/include/tim/vx/ops/roialign.json b/include/tim/vx/ops/roialign.json
@@ -19,6 +19,14 @@
                 },
                 {"name":"width_sample_num",
                     "dtype": "int32_t"
+                },
+                {"name": "input_layout",
+                    "dtype": "tim::vx::DataLayout",
+                    "Optional": "true",
+                    "default": "tim::vx::DataLayout::WHCN",
+                    "range":["tim::vx::DataLayout::ANY",
+                             "tim::vx::DataLayout::WHCN",
+                             "tim::vx::DataLayout::CWHN"]
                 }
             ]
     }

diff --git a/include/tim/vx/ops/roipool.json b/include/tim/vx/ops/roipool.json
@@ -14,6 +14,14 @@
                 },
                 {"name":"size",
                     "dtype": "std::array<uint32_t, 2>"
+                },
+                {"name": "input_layout",
+                    "dtype": "tim::vx::DataLayout",
+                    "Optional": "true",
+                    "default": "tim::vx::DataLayout::WHCN",
+                    "range":["tim::vx::DataLayout::ANY",
+                             "tim::vx::DataLayout::WHCN",
+                             "tim::vx::DataLayout::CWHN"]
                 }
             ]
     }

diff --git a/src/tim/transform/layout_inference.cc b/src/tim/transform/layout_inference.cc
@@ -69,6 +69,8 @@
 #include "ops/broadcast_layout_inference.h"
 #include "ops/unidirectional_rnn_layout_inference.h"
 #include "ops/bidirectional_rnn_layout_inference.h"
+#include "ops/roi_align_layout_inference.h"
+#include "ops/roi_pool_layout_inference.h"
 
 #include <algorithm>
 #include <deque>
@@ -260,6 +262,8 @@ std::vector<std::shared_ptr<vx::Tensor>> HandleLayoutInfer(
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_LRN2, LRN);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_L2_NORMALIZE, L2Normalization);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_INSTANCE_NORM, InstanceNorm);
+    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_ALIGN, RoiAlign);
+    REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ROI_POOL, RoiPool);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_ADDN, AddN);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_PRELU, PRelu);
     REGIST_LAYOUT_INFERENCE(VSI_NN_OP_GATHER, Gather);

diff --git a/src/tim/transform/layout_inference_test.cc b/src/tim/transform/layout_inference_test.cc
@@ -351,4 +351,72 @@ TEST(Resize, bilinear_outputsize) {
     std::vector<float> output(golden.size());
     EXPECT_TRUE(infer_output->CopyDataFromTensor(output.data()));
     EXPECT_TRUE(ArraysMatch(golden, output, 1e-5f));
+}
+
+TEST(RoiAlign, nhwc) {
+  auto ctx = tim::vx::Context::Create();
+  auto src_graph = ctx->CreateGraph();
+
+  tim::vx::ShapeType input_shape({1, 4, 4, 1});  //cwhn
+  tim::vx::ShapeType regions_shape({4, 4});
+  tim::vx::ShapeType batch_index_shape({4});
+  tim::vx::ShapeType output_shape({1, 2, 2, 4});
+
+  tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT32, input_shape,
+                                 tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec regions_spec(tim::vx::DataType::FLOAT32, regions_shape,
+                                   tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec batch_index_spec(tim::vx::DataType::INT32,
+                                       batch_index_shape,
+                                       tim::vx::TensorAttribute::INPUT);
+  tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape,
+                                  tim::vx::TensorAttribute::OUTPUT);
+
+  std::vector<float> input_data = {-10.0f, -1.0f, 4.0f,  -5.0f, -8.0f, -2.0f,
+                                   9.0f,   1.0f,  7.0f,  -2.0f, 3.0f,  -7.0f,
+                                   -2.0f,  10.0f, -3.0f, 5.0f};
+
+  std::vector<float> regions_data = {2.0f, 2.0f, 4.0f, 4.0f, 0.0f, 0.0f,
+                                     8.0f, 8.0f, 2.0f, 0.0f, 4.0f, 8.0f,
+                                     0.0f, 2.0f, 8.0f, 4.0f};
+
+  std::vector<int32_t> batch_index_data = {0, 0, 0, 0};
+
+  std::vector<float> golden = {
+      0.375f, 5.125f, -0.375f, 2.875f, -0.5f,    -0.3125f, 3.1875f, 1.125f,
+      0.25f,  4.25f,  4.875f,  0.625f, -0.1875f, 1.125f,   0.9375f, -2.625f};
+
+  auto input_tensor = src_graph->CreateTensor(input_spec);
+  auto regions_tensor = src_graph->CreateTensor(regions_spec, regions_data.data());
+  auto batch_index_tensor =
+      src_graph->CreateTensor(batch_index_spec, batch_index_data.data());
+  auto output_tensor = src_graph->CreateTensor(output_spec);
+
+  auto roi_align = src_graph->CreateOperation<tim::vx::ops::RoiAlign>(
+      2, 2, 2.0f, 2.0f, 4, 4, tim::vx::DataLayout::CWHN);
+  (*roi_align)
+      .BindInput(input_tensor)
+      .BindInput(regions_tensor)
+      .BindInput(batch_index_tensor)
+      .BindOutput(output_tensor);
+
+  // Do layout inference
+  auto transform = tim::transform::LayoutInference(src_graph, ctx);
+  auto infer_graph = transform.first;
+  auto graph_io_map = transform.second;
+  infer_graph->Compile();
+
+  auto infer_input = graph_io_map[src_graph->InputsTensor()[0]];
+  auto infer_beta = graph_io_map[src_graph->InputsTensor()[1]];
+  auto infer_gamma = graph_io_map[src_graph->InputsTensor()[2]];
+  auto infer_output = graph_io_map[src_graph->OutputsTensor()[0]];
+
+  infer_input->CopyDataToTensor(input_data.data(), input_data.size() * sizeof(float));
+  infer_beta->CopyDataToTensor(regions_data.data(), regions_data.size() * sizeof(float));
+  infer_gamma->CopyDataToTensor(batch_index_data.data(), batch_index_data.size() * sizeof(float));
+  infer_graph->Run();
+
+  std::vector<float> output(golden.size());
+  EXPECT_TRUE(infer_output->CopyDataFromTensor(output.data()));
+  EXPECT_TRUE(ArraysMatch(golden, output, 1e-5f));
 }
diff --git a/src/tim/transform/ops/roi_align_layout_inference.h b/src/tim/transform/ops/roi_align_layout_inference.h
@@ -0,0 +1,99 @@
+/****************************************************************************
+ *
+ *    Copyright (c) 2020-2023 Vivante Corporation
+ *
+ *    Permission is hereby granted, free of charge, to any person obtaining a
+ *    copy of this software and associated documentation files (the "Software"),
+ *    to deal in the Software without restriction, including without limitation
+ *    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *    and/or sell copies of the Software, and to permit persons to whom the
+ *    Software is furnished to do so, subject to the following conditions:
+ *
+ *    The above copyright notice and this permission notice shall be included in
+ *    all copies or substantial portions of the Software.
+ *
+ *    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *    DEALINGS IN THE SOFTWARE.
+ *
+ *****************************************************************************/
+#ifndef TIM_LAYOUT_INFER_ROI_ALIGN_LAYOUT_INFERENCE_H_
+#define TIM_LAYOUT_INFER_ROI_ALIGN_LAYOUT_INFERENCE_H_
+
+#include "tim/vx/ops/roi_align.h"
+
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "builtin_op_impl.h"
+
+namespace tim {
+namespace transform {
+
+class RoiAlignLayoutInfer : public OpLayoutInfer {
+ public:
+  RoiAlignLayoutInfer(
+      const std::shared_ptr<vx::Operation> op,
+      std::shared_ptr<layout_inference_impl::LayoutInferContext>& context)
+      : OpLayoutInfer(op, context) {}
+
+  void OnInputs(
+      std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
+    vx::DataLayout layout = op_->impl()->layout_;
+    auto input_tensors = op_->impl()->InputsTensor();
+    std::shared_ptr<IPermuteVector> required_pv;
+    switch (layout)
+    { // kernel layout must be IWHO in tflite & nnapi
+      case vx::DataLayout::CWHN:
+        required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+        break;
+      case vx::DataLayout::WHCN:
+        required_pv = MakeShared(4);
+        break;
+      default:
+        VSILOGE("The layout of input is not support.");
+        required_pv = MakeShared(4);
+        break;
+    }
+    auto input_pv = context_->GetPermuteVector(input_tensors[0]);
+    auto final_pv = input_pv->Reverse()->Add(required_pv);
+    std::shared_ptr<vx::Tensor> infer_input;
+    if (!final_pv->IsAligned()) {
+      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      context_->SetPermuteVector(input_tensors[0], required_pv);
+    } else {
+      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      context_->SetPermuteVector(input_tensors[0], input_pv);
+    }
+    context_->UpdateTensorMap(input_tensors[0], infer_input);
+
+    for (const auto& t_src : op_->impl()->InputsTensor()) {
+      if(t_src->IsConstTensor()) {
+        std::vector<uint8_t> dataRef(t_src->GetSpec().GetByteSize());
+        t_src->CopyDataFromTensor(dataRef.data());
+        auto t_infer = context_->infer_graph_->CreateTensor(
+                t_src->GetSpec(), (const void*)dataRef.data());
+        context_->SetPermuteVector(t_src, MakeShared(t_src->GetShape().size()));
+        context_->UpdateTensorMap(t_src, t_infer);
+      }
+    }
+
+    auto roi_align = op_->Clone(context_->infer_graph_);
+    auto outs_infer = CreateOutputsTensor(required_pv);
+    for (const auto& i_src : op_->impl()->InputsTensor()) {
+      (*roi_align).BindInput(context_->GetMapedTensor(i_src));
+    }
+    (*roi_align).BindOutput(outs_infer[0]);
+    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
+    // Add out tensor of src_graph into next_tensor
+    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
+  }
+};
+
+}  // namespace transform
+}  // namespace tim
+
+#endif
diff --git a/src/tim/transform/ops/roi_pool_layout_inference.h b/src/tim/transform/ops/roi_pool_layout_inference.h
@@ -0,0 +1,99 @@
+/****************************************************************************
+ *
+ *    Copyright (c) 2020-2023 Vivante Corporation
+ *
+ *    Permission is hereby granted, free of charge, to any person obtaining a
+ *    copy of this software and associated documentation files (the "Software"),
+ *    to deal in the Software without restriction, including without limitation
+ *    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *    and/or sell copies of the Software, and to permit persons to whom the
+ *    Software is furnished to do so, subject to the following conditions:
+ *
+ *    The above copyright notice and this permission notice shall be included in
+ *    all copies or substantial portions of the Software.
+ *
+ *    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ *    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ *    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ *    DEALINGS IN THE SOFTWARE.
+ *
+ *****************************************************************************/
+#ifndef TIM_LAYOUT_INFER_ROI_POOL_LAYOUT_INFERENCE_H_
+#define TIM_LAYOUT_INFER_ROI_POOL_LAYOUT_INFERENCE_H_
+
+#include "tim/vx/ops/roi_pool.h"
+
+#include "ops/op_layout_inference.h"
+#include "permute_vector.h"
+#include "builtin_op_impl.h"
+
+namespace tim {
+namespace transform {
+
+class RoiPoolLayoutInfer : public OpLayoutInfer {
+ public:
+  RoiPoolLayoutInfer(
+      const std::shared_ptr<vx::Operation> op,
+      std::shared_ptr<layout_inference_impl::LayoutInferContext>& context)
+      : OpLayoutInfer(op, context) {}
+
+  void OnInputs(
+      std::vector<std::shared_ptr<vx::Tensor>>& next_tensors) override {
+    vx::DataLayout layout = op_->impl()->layout_;
+    auto input_tensors = op_->impl()->InputsTensor();
+    std::shared_ptr<IPermuteVector> required_pv;
+    switch (layout)
+    { // kernel layout must be IWHO in tflite & nnapi
+      case vx::DataLayout::CWHN:
+        required_pv = std::make_shared<PermuteVector<4>>(kCWHN2WHCN);
+        break;
+      case vx::DataLayout::WHCN:
+        required_pv = MakeShared(4);
+        break;
+      default:
+        VSILOGE("The layout of input is not support.");
+        required_pv = MakeShared(4);
+        break;
+    }
+    auto input_pv = context_->GetPermuteVector(input_tensors[0]);
+    auto final_pv = input_pv->Reverse()->Add(required_pv);
+    std::shared_ptr<vx::Tensor> infer_input;
+    if (!final_pv->IsAligned()) {
+      infer_input = InsertPermute(context_->GetMapedTensor(input_tensors[0]), final_pv);
+      context_->SetPermuteVector(input_tensors[0], required_pv);
+    } else {
+      infer_input = context_->GetMapedTensor(input_tensors[0]);
+      context_->SetPermuteVector(input_tensors[0], input_pv);
+    }
+    context_->UpdateTensorMap(input_tensors[0], infer_input);
+
+    for (const auto& t_src : op_->impl()->InputsTensor()) {
+      if(t_src->IsConstTensor()) {
+        std::vector<uint8_t> dataRef(t_src->GetSpec().GetByteSize());
+        t_src->CopyDataFromTensor(dataRef.data());
+        auto t_infer = context_->infer_graph_->CreateTensor(
+                t_src->GetSpec(), (const void*)dataRef.data());
+        context_->SetPermuteVector(t_src, MakeShared(t_src->GetShape().size()));
+        context_->UpdateTensorMap(t_src, t_infer);
+      }
+    }
+
+    auto roi_pool = op_->Clone(context_->infer_graph_);
+    auto outs_infer = CreateOutputsTensor(required_pv);
+    for (const auto& i_src : op_->impl()->InputsTensor()) {
+      (*roi_pool).BindInput(context_->GetMapedTensor(i_src));
+    }
+    (*roi_pool).BindOutput(outs_infer[0]);
+    context_->SetPermuteVector(op_->impl()->OutputsTensor()[0], required_pv);
+    // Add out tensor of src_graph into next_tensor
+    next_tensors.push_back(op_->impl()->OutputsTensor()[0]);
+  }
+};
+
+}  // namespace transform
+}  // namespace tim
+
+#endif
diff --git a/src/tim/vx/ops/roi_align.cc b/src/tim/vx/ops/roi_align.cc
@@ -32,8 +32,8 @@ namespace ops {
 
 RoiAlign::RoiAlign(Graph* graph, int32_t output_height, int32_t output_width,
           float height_ratio, float width_ratio, int32_t height_sample_num,
-          int32_t width_sample_num)
-    : BuiltinOp(graph, VSI_NN_OP_ROI_ALIGN),
+          int32_t width_sample_num, DataLayout input_layout)
+    : BuiltinOp(graph, VSI_NN_OP_ROI_ALIGN, 0, 0, input_layout),
       output_height_(output_height),
       output_width_(output_width),
       height_ratio_(height_ratio),
@@ -53,7 +53,8 @@ std::shared_ptr<Operation> RoiAlign::Clone(
     std::shared_ptr<Graph>& graph) const {
   return graph->CreateOperation<RoiAlign>(
       this->output_height_, this->output_width_, this->height_ratio_,
-      this->width_ratio_, this->height_sample_num_, this->width_sample_num_);
+      this->width_ratio_, this->height_sample_num_, this->width_sample_num_,
+      this->impl_->layout_);
 }
 
 }  // namespace ops