From 47b35874e21ba59c99ad079abb4f96bbfad20c5f Mon Sep 17 00:00:00 2001 From: Chen Xin Date: Mon, 10 Apr 2023 18:38:07 +0800 Subject: [PATCH] Check md5sum for const tensor Tensor with same md5 will be created once and shared Type: New feature Signed-off-by: Chen Xin --- CMakeLists.txt | 9 ++++ src/tim/CMakeLists.txt | 3 +- src/tim/transform/layout_inference_test.cc | 50 ++++++++++++++++++++ src/tim/vx/graph.cc | 54 +++++++++++++++++++++- src/tim/vx/graph_private.h | 4 ++ 5 files changed, 118 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1e388a684..fdf7f2cb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ option(TIM_VX_ENABLE_PLATFORM "Enable multi devices support" option(TIM_VX_ENABLE_PLATFORM_LITE "Enable lite multi-device support" OFF) option(TIM_VX_ENABLE_GRPC "Enable gPRC support" OFF) option(TIM_VX_DBG_ENABLE_TENSOR_HNDL "Enable built-in tensor from handle: use malloced memory instead of VideoMemory by kernel driver" ON) +option(TIM_VX_ENABLE_MD5_CHECK "Enable md5sum check for const tensor" ON) set(CMAKE_CXX_STANDARD 14) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -46,6 +47,11 @@ if(${TIM_VX_ENABLE_40BIT}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVSI_40BIT_VA_SUPPORT") endif() +if(${TIM_VX_ENABLE_MD5_CHECK}) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DENABLE_MD5_CHECK") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_MD5_CHECK") +endif() + if(${TIM_VX_ENABLE_CUSTOM_OP}) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTIM_VX_ENABLE_CUSTOM_OP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DTIM_VX_ENABLE_CUSTOM_OP") @@ -93,6 +99,9 @@ if(TIM_VX_ENABLE_GRPC) include(cmake/gRPC.cmake) endif() +if(TIM_VX_ENABLE_MD5_CHECK) + find_package(OpenSSL REQUIRED) +endif() add_subdirectory("src/tim") if(TIM_VX_BUILD_EXAMPLES) diff --git a/src/tim/CMakeLists.txt b/src/tim/CMakeLists.txt index 1172297a5..8159ad84e 100644 --- a/src/tim/CMakeLists.txt +++ b/src/tim/CMakeLists.txt @@ -138,7 +138,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${op_as_flags}") add_library(${TARGET_NAME} ${${TARGET_NAME}_SRCS}) target_include_directories(${TARGET_NAME} PRIVATE ${INC_DIRS}) target_link_libraries(${TARGET_NAME} PUBLIC - -Wl,--no-whole-archive ${OVXDRV_LIBRARIES} ${EXTERNAL_LIBS}) + -Wl,--no-whole-archive ${OVXDRV_LIBRARIES} ${EXTERNAL_LIBS} ${OPENSSL_CRYPTO_LIBRARY}) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations ") if(${TIM_VX_USE_EXTERNAL_OVXLIB}) #-Wl,--whole-archive should not applied to external library, but only for shared library diff --git a/src/tim/transform/layout_inference_test.cc b/src/tim/transform/layout_inference_test.cc index ebd659c16..a9f5c8351 100644 --- a/src/tim/transform/layout_inference_test.cc +++ b/src/tim/transform/layout_inference_test.cc @@ -167,6 +167,56 @@ TEST(GroupedConv2d, kernel_bigger_than_input_SAME) { infer_input->CopyDataToTensor(in_data.data(), in_data.size() * sizeof(float)); infer_graph->Run(); + std::vector output(golden.size()); + EXPECT_TRUE(infer_output->CopyDataFromTensor(output.data())); + EXPECT_EQ(golden, output); +} + +TEST(FC, share_const_tensor) { + auto ctx = tim::vx::Context::Create(); + auto src_graph = ctx->CreateGraph(); + + tim::vx::ShapeType input_shape({2, 1}); + tim::vx::ShapeType kernel_shape({2, 2}); + tim::vx::ShapeType bias_shape({2}); + tim::vx::ShapeType output_shape({2, 1}); + tim::vx::TensorSpec input_spec(tim::vx::DataType::FLOAT32, input_shape, + tim::vx::TensorAttribute::INPUT); + tim::vx::TensorSpec kernel_spec(tim::vx::DataType::FLOAT32, kernel_shape, + tim::vx::TensorAttribute::CONSTANT); + tim::vx::TensorSpec bias_spec(tim::vx::DataType::FLOAT32, bias_shape, + tim::vx::TensorAttribute::CONSTANT); + tim::vx::TensorSpec tran_spec(tim::vx::DataType::FLOAT32, output_shape, + tim::vx::TensorAttribute::TRANSIENT); + tim::vx::TensorSpec output_spec(tim::vx::DataType::FLOAT32, output_shape, + tim::vx::TensorAttribute::OUTPUT); + std::vector in_data = {1,4,}; + std::vector weight = {-3,3,2,1,}; + std::vector bias = {0.1, 0.4,}; + std::vector golden = {-8, 25}; + auto input_tensor = src_graph->CreateTensor(input_spec); + auto weight_tensor = src_graph->CreateTensor(kernel_spec, weight.data()); + auto bias_tensor = src_graph->CreateTensor(bias_spec, bias.data()); + auto tran_tensor = src_graph->CreateTensor(tran_spec); + auto output_tensor = src_graph->CreateTensor(output_spec); + + auto op1 = src_graph->CreateOperation(0,2); + (*op1).BindInputs({input_tensor, weight_tensor, bias_tensor}).BindOutputs({tran_tensor}); + + auto op2 = src_graph->CreateOperation(0,2); + (*op2).BindInputs({tran_tensor, weight_tensor, bias_tensor}).BindOutputs({output_tensor}); + // Do layout inference + auto transform = tim::transform::LayoutInference(src_graph, ctx); + auto infer_graph = transform.first; + auto graph_io_map = transform.second; + infer_graph->Compile(); + + auto infer_input = graph_io_map[src_graph->InputsTensor()[0]]; + auto infer_output = graph_io_map[src_graph->OutputsTensor()[0]]; + + infer_input->CopyDataToTensor(in_data.data(), in_data.size() * sizeof(float)); + infer_graph->Run(); + std::vector output(golden.size()); EXPECT_TRUE(infer_output->CopyDataFromTensor(output.data())); EXPECT_EQ(golden, output); diff --git a/src/tim/vx/graph.cc b/src/tim/vx/graph.cc index 5c172969c..be74c0674 100644 --- a/src/tim/vx/graph.cc +++ b/src/tim/vx/graph.cc @@ -24,6 +24,10 @@ #include "tim/vx/graph.h" #include +#ifdef ENABLE_MD5_CHECK +#include +#endif + #include "context_private.h" #include "graph_private.h" #include "op_impl.h" @@ -55,6 +59,32 @@ GraphImpl::GraphImpl(ContextImpl* context, const CompileOption& options) GraphImpl::~GraphImpl() { vsi_nn_ReleaseGraph(&graph_); } +std::map>& GraphImpl::GetMd5TensorMap() { + return md5_tensor_; +} +#ifdef ENABLE_MD5_CHECK +#define MD5_SECRET_LEN_16 (16) +#define MD5_BYTE_STRING_LEN (4) +const std::string GraphImpl::commonMd5Secret32(const std::string& src) { + MD5_CTX ctx; + + std::string md5String; + unsigned char md[MD5_SECRET_LEN_16] = {0}; + char tmp[MD5_BYTE_STRING_LEN] = {0}; + + MD5_Init(&ctx); + MD5_Update(&ctx, src.c_str(), src.size()); + MD5_Final(md, &ctx); + + for (int i = 0; i < 16; ++i) { + memset(tmp, 0x00, sizeof(tmp)); + snprintf(tmp, sizeof(tmp), "%02X", md[i]); + md5String += tmp; + } + return md5String; +} +#endif + vsi_nn_graph_t* GraphImpl::graph() { return graph_; } void GraphImpl::AddInput(vsi_nn_tensor_id_t id) { @@ -135,7 +165,29 @@ void GraphImpl::PrintGraph() const { vsi_nn_PrintGraph(this->graph_); } std::shared_ptr GraphImpl::CreateTensor(const TensorSpec& spec, const void* data) { - auto tensor = std::make_shared(this, spec, data); + std::shared_ptr tensor; +#ifdef ENABLE_MD5_CHECK + if (spec.attr_ & TensorAttribute::CONSTANT && data != NULL) { + std::string md5; + uint32_t data_size = 1; + for (auto it = spec.shape_.begin(); it != spec.shape_.end(); ++it) { + data_size *= *it; + } + if (data_size < 512) { + md5 = commonMd5Secret32(std::string((const char*)data, data_size)); + } else { + md5 = commonMd5Secret32(std::string((const char*)data, 512)); //Take first 512 bytes + } + if (GetMd5TensorMap().find(md5) != GetMd5TensorMap().end()) { + tensor = GetMd5TensorMap()[md5]; + } else { + tensor = std::make_shared(this, spec, data); + GetMd5TensorMap()[md5] = tensor; + } + return tensor; + } +#endif + tensor = std::make_shared(this, spec, data); if (spec.attr_ & TensorAttribute::INPUT) { this->AddInput(tensor); this->AddInput(tensor->GetId()); diff --git a/src/tim/vx/graph_private.h b/src/tim/vx/graph_private.h index c693fb34c..beaea1f1b 100644 --- a/src/tim/vx/graph_private.h +++ b/src/tim/vx/graph_private.h @@ -26,6 +26,7 @@ #include "tim/vx/graph.h" #include +#include #include #include #include @@ -44,6 +45,8 @@ class GraphImpl : public Graph { GraphImpl(ContextImpl* context, const CompileOption& options = CompileOption::DefaultOptions); ~GraphImpl(); + const std::string commonMd5Secret32(const std::string& src); + std::map>& GetMd5TensorMap(); /// Return the low-level graph object vsi_nn_graph_t* graph(); void AddInput(vsi_nn_tensor_id_t id); @@ -97,6 +100,7 @@ class GraphImpl : public Graph { int32_t not_consumed_output_cnt_; std::map, std::vector>> tensor_consumers_; std::map, std::shared_ptr> tensor_producer_; + std::map> md5_tensor_; CompileOption options_; private: