Tencent · wnqn1597 · Aug 24, 2024 · Aug 30, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/.ci/pnnx.yml b/.ci/pnnx.yml
@@ -196,7 +196,13 @@ jobs:
         export OMP_NUM_THREADS=1
         export MKL_NUM_THREADS=1
         export MKL_ENABLE_INSTRUCTIONS=SSE4_2
+        export Torch_DIR=${{ci.workspace}}/pnnx-deps-torch-install/share/cmake/Torch
+        export ncnn_DIR=$(pwd)/build/install/lib/cmake/ncnn
+        echo ${{ci.workspace}}/pnnx-deps-torch-install
+        echo $ncnn_DIR
+        ls
         cd tools/pnnx
+        cp tests/ncnn/CMakeListsForCPP.txt build/tests/ncnn/CMakeLists.txt
         cd build && ctest --output-on-failure -j 16
 
     - name: python-pnnx

diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp
@@ -226,6 +226,7 @@ int main(int argc, char** argv)
     std::string ncnnparampath = ptbase + ".ncnn.param";
     std::string ncnnbinpath = ptbase + ".ncnn.bin";
     std::string ncnnpypath = ptbase + "_ncnn.py";
+    std::string ncnncpppath = ptbase + "_ncnn.cpp";
     int fp16 = 1;
     int optlevel = 2;
     std::string device = "cpu";
@@ -267,6 +268,8 @@ int main(int argc, char** argv)
             ncnnbinpath = std::string(value);
         if (strcmp(key, "ncnnpy") == 0)
             ncnnpypath = std::string(value);
+        if (strcmp(key, "ncnncpp") == 0)
+            ncnncpppath = std::string(value);
         if (strcmp(key, "fp16") == 0)
             fp16 = atoi(value);
         if (strcmp(key, "optlevel") == 0)
@@ -292,6 +295,7 @@ int main(int argc, char** argv)
         fprintf(stderr, "ncnnparam = %s\n", ncnnparampath.c_str());
         fprintf(stderr, "ncnnbin = %s\n", ncnnbinpath.c_str());
         fprintf(stderr, "ncnnpy = %s\n", ncnnpypath.c_str());
+        fprintf(stderr, "ncnncpp = %s\n", ncnncpppath.c_str());
         fprintf(stderr, "fp16 = %d\n", fp16);
         fprintf(stderr, "optlevel = %d\n", optlevel);
         fprintf(stderr, "device = %s\n", device.c_str());
@@ -375,7 +379,7 @@ int main(int argc, char** argv)
 
         pnnx::pass_ncnn(pnnx_graph, module_operators);
 
-        pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, fp16);
+        pnnx::save_ncnn(pnnx_graph, ncnnparampath, ncnnbinpath, ncnnpypath, ncnncpppath, fp16);
     }
 
     //     pnnx::Graph pnnx_graph2;

diff --git a/tools/pnnx/src/save_ncnn.cpp b/tools/pnnx/src/save_ncnn.cpp
@@ -54,6 +54,50 @@ static const char* type_to_dtype_string(int type)
     return "null";
 }
 
+static const char* type_to_libtorch_dtype_string(int type)
+{
+    if (type == 1) return "Float";
+    if (type == 2) return "Double";
+    if (type == 3) return "Half";
+    if (type == 4) return "Int";
+    if (type == 5)
+    {
+        fprintf(stderr, "replace ncnn input torch.long type with torch.int\n");
+        return "Int";
+    }
+    if (type == 6) return "Short";
+    if (type == 7) return "Char";
+    if (type == 8) return "Byte";
+    if (type == 9) return "Bool";
+    if (type == 10) return "ComplexFloat";
+    if (type == 11) return "ComplexDouble";
+    if (type == 12) return "ComplexHalf";
+
+    fprintf(stderr, "unknown type %d.\n", type);
+    return "Float";
+}
+
+static size_t type_to_elemsize(int type)
+{
+    if (type == 1) return 4;
+    if (type == 2) return 8;
+    if (type == 3) return 2;
+    if (type == 4) return 4;
+    if (type == 5)
+    {
+        fprintf(stderr, "replace ncnn input torch.long type with torch.int\n");
+        return 4;
+    }
+    if (type == 6) return 2;
+    if (type == 7) return 1;
+    if (type == 8) return 1;
+    if (type == 9) return 1;
+    if (type == 10) return 8;
+    if (type == 11) return 16;
+    if (type == 12) return 4;
+    return 4;
+}
+
 static bool string_is_positive_integer(const std::string& t)
 {
     for (size_t i = 0; i < t.size(); i++)
@@ -124,7 +168,7 @@ static size_t alignSize(size_t sz, int n)
     return (sz + n - 1) & -n;
 }
 
-int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16)
+int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, const std::string& cpppath, int fp16)
 {
     FILE* paramfp = fopen(parampath.c_str(), "wb");
     if (!paramfp)
@@ -348,6 +392,8 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b
     fclose(paramfp);
     fclose(binfp);
 
+    // py inference
+
     FILE* pyfp = fopen(pypath.c_str(), "wb");
     if (!pyfp)
     {
@@ -462,6 +508,161 @@ int save_ncnn(const Graph& g, const std::string& parampath, const std::string& b
 
     fclose(pyfp);
 
+    // ================
+    // cpp inference
+    // ================
+
+    FILE* cppfp = fopen(cpppath.c_str(), "wb");
+    if (!cppfp)
+    {
+        fprintf(stderr, "fopen %s failed\n", cpppath.c_str());
+        return -1;
+    }
+
+    // include
+
+    fprintf(cppfp, "#include <stdio.h>\n");
+    fprintf(cppfp, "#include <vector>\n");
+    fprintf(cppfp, "#include <torch/torch.h>\n");
+    fprintf(cppfp, "#include \"net.h\"\n\n");
+
+    // utils
+
+    fprintf(cppfp, "void copy_tensor2mat(const ncnn::Mat& m, const at::Tensor& t, size_t elemsize)\n");
+    fprintf(cppfp, "{\n");
+    fprintf(cppfp, "    void* t_ptr = t.data_ptr();\n");
+    fprintf(cppfp, "    size_t step = m.d * m.h * m.w * elemsize;\n");
+    fprintf(cppfp, "    for (int q=0; q<m.c; q++)\n");
+    fprintf(cppfp, "    {\n");
+    fprintf(cppfp, "        const char* ptr = m.channel(q);\n");
+    fprintf(cppfp, "        memcpy((void*)ptr, t_ptr, step);\n");
+    fprintf(cppfp, "        t_ptr = (void*)((char*)t_ptr + step);\n");
+    fprintf(cppfp, "    }\n");
+    fprintf(cppfp, "}\n\n");
+
+    fprintf(cppfp, "void copy_mat2tensor(const at::Tensor& t, const ncnn::Mat& m, size_t elemsize)\n");
+    fprintf(cppfp, "{\n");
+    fprintf(cppfp, "    void* t_ptr = t.data_ptr();\n");
+    fprintf(cppfp, "    size_t step = m.d * m.h * m.w * elemsize;\n");
+    fprintf(cppfp, "    for (int q=0; q<m.c; q++)\n");
+    fprintf(cppfp, "    {\n");
+    fprintf(cppfp, "        const char* ptr = m.channel(q);\n");
+    fprintf(cppfp, "        memcpy(t_ptr, (void*)ptr, step);\n");
+    fprintf(cppfp, "        t_ptr = (void*)((char*)t_ptr + step);\n");
+    fprintf(cppfp, "    }\n");
+    fprintf(cppfp, "}\n\n");
+
+    fprintf(cppfp, "at::Tensor create_tensor_from_mat(const ncnn::Mat& m)\n");
+    fprintf(cppfp, "{\n");
+    fprintf(cppfp, "    at::Tensor t;\n");
+    fprintf(cppfp, "    if (m.dims == 1) t = at::zeros({m.w});\n");
+    fprintf(cppfp, "    if (m.dims == 2) t = at::zeros({m.h, m.w});\n");
+    fprintf(cppfp, "    if (m.dims == 3) t = at::zeros({m.c, m.h, m.w});\n");
+    fprintf(cppfp, "    if (m.dims == 4) t = at::zeros({m.c, m.d, m.h, m.w});\n");
+    fprintf(cppfp, "    return t;\n");
+    fprintf(cppfp, "}\n\n");
+
+    // test inference
+
+    fprintf(cppfp, "int main(int argc, char** argv)\n");
+    fprintf(cppfp, "{\n");
+    fprintf(cppfp, "    ncnn::Net net;\n");
+    fprintf(cppfp, "    if (net.load_param(\"%s\"))\n", parampath.c_str());
+    fprintf(cppfp, "        exit(-1);\n");
+    fprintf(cppfp, "    if (net.load_model(\"%s\"))\n", binpath.c_str());
+    fprintf(cppfp, "        exit(-1);\n\n");
+
+    fprintf(cppfp, "    at::manual_seed(0);\n");
+
+    for (int input_index = 0;; input_index++)
+    {
+        std::string input_name = std::string("in") + std::to_string(input_index);
+        const Operand* r = g.get_operand(input_name);
+        if (!r)
+            break;
+
+        int numel = 1;
+        for (size_t j = 0; j < r->shape.size(); j++)
+        {
+            numel *= r->shape[j];
+        }
+        const int batch_index = r->params.at("__batch_index").i;
+
+        if (type_is_integer(r->type))
+        {
+            fprintf(cppfp, "    at::Tensor %s_t = at::randint(10, {%d}, ", input_name.c_str(), numel);
+            fprintf(cppfp, "at::TensorOptions().dtype(at::k%s));\n", type_to_libtorch_dtype_string(r->type));
+        }
+        else
+        {
+            fprintf(cppfp, "    at::Tensor %s_t = at::rand({%d}, ", input_name.c_str(), numel);
+            fprintf(cppfp, "at::TensorOptions().dtype(at::k%s));\n", type_to_libtorch_dtype_string(r->type));
+        }
+
+        fprintf(cppfp, "    ncnn::Mat %s(", input_name.c_str());
+        for (int i = r->shape.size() - 1; i >= 0; i--)
+        {
+            // squeeze batch index
+            if (batch_index != 233 && i == batch_index && r->shape[i] == 1)
+                continue;
+            fprintf(cppfp, "%d, ", r->shape[i]);
+        }
+        fprintf(cppfp, "(size_t)%du);\n", type_to_elemsize(r->type));
+        fprintf(cppfp, "    copy_tensor2mat(%s, %s_t, (size_t)%du);\n", input_name.c_str(), input_name.c_str(), type_to_elemsize(r->type));
+    }
+
+    fprintf(cppfp, "    ncnn::Extractor ex = net.create_extractor();\n");
+
+    for (int input_index = 0;; input_index++)
+    {
+        std::string input_name = std::string("in") + std::to_string(input_index);
+        const Operand* r = g.get_operand(input_name);
+        if (!r)
+            break;
+        fprintf(cppfp, "    ex.input(\"%s\", %s);\n", input_name.c_str(), input_name.c_str());
+    }
+
+    fprintf(cppfp, "\n");
+
+    for (int output_index = 0;; output_index++)
+    {
+        std::string output_name = std::string("out") + std::to_string(output_index);
+        const Operand* r = g.get_operand(output_name);
+        if (!r)
+            break;
+
+        fprintf(cppfp, "    ncnn::Mat %s;\n", output_name.c_str());
+        fprintf(cppfp, "    ex.extract(\"%s\", %s);\n", output_name.c_str(), output_name.c_str());
+
+        fprintf(cppfp, "    at::Tensor %s_t = create_tensor_from_mat(%s).toType(at::k%s);\n",
+                output_name.c_str(), output_name.c_str(), type_to_libtorch_dtype_string(r->type));
+        fprintf(cppfp, "    copy_mat2tensor(%s_t, %s, (size_t)%du);\n", output_name.c_str(), output_name.c_str(), type_to_elemsize(r->type));
+
+        // unsqueeze batch index
+        const int batch_index = r->params.at("__batch_index").i;
+        if (batch_index != 233)
+            fprintf(cppfp, "    %s_t = %s_t.unsqueeze(%d);\n", output_name.c_str(), output_name.c_str(), batch_index);
+    }
+
+    fprintf(cppfp, "    torch::save({");
+    for (int output_index = 0;; output_index++)
+    {
+        std::string output_name = std::string("out") + std::to_string(output_index);
+        const Operand* r = g.get_operand(output_name);
+        if (!r)
+            break;
+        if (output_index != 0)
+            fprintf(cppfp, ", ");
+        fprintf(cppfp, "%s_t", output_name.c_str());
+    }
+    fprintf(cppfp, "}, \"out.pt\");\n");
+
+    fprintf(cppfp, "\n");
+    fprintf(cppfp, "    return 0;\n");
+    fprintf(cppfp, "}\n");
+
+    fclose(cppfp);
+
     return 0;
 }
 

diff --git a/tools/pnnx/src/save_ncnn.h b/tools/pnnx/src/save_ncnn.h
@@ -19,7 +19,7 @@
 
 namespace pnnx {
 
-int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, int fp16);
+int save_ncnn(const Graph& g, const std::string& parampath, const std::string& binpath, const std::string& pypath, const std::string& cpppath, int fp16);
 
 } // namespace pnnx
 

diff --git a/tools/pnnx/tests/ncnn/CMakeListsForCPP.txt b/tools/pnnx/tests/ncnn/CMakeListsForCPP.txt
@@ -0,0 +1,13 @@
+cmake_minimum_required (VERSION 2.8)
+
+project(${FNAME})
+
+# set(Torch_DIR "/home/beqjal/libtorch/share/cmake/Torch")
+find_package(Torch REQUIRED)
+# set(ncnn_DIR "/home/beqjal/ncnn/build/install/lib/cmake/ncnn" CACHE PATH "/home/beqjal/ncnn/build/install/lib/cmake/ncnn/ncnnConfig.cmake")
+find_package(ncnn REQUIRED)
+
+add_executable(${FNAME} ${FNAME}.cpp)
+target_link_libraries(${FNAME} "${TORCH_LIBRARIES}")
+target_link_libraries(${FNAME} ncnn)
+set_property(TARGET ${FNAME} PROPERTY CXX_STANDARD 14)
diff --git a/tools/pnnx/tests/ncnn/test_F_adaptive_avg_pool1d.py b/tools/pnnx/tests/ncnn/test_F_adaptive_avg_pool1d.py
@@ -46,9 +46,16 @@ def test():
     import test_F_adaptive_avg_pool1d_ncnn
     b = test_F_adaptive_avg_pool1d_ncnn.test_inference()
 
+    # ncnn cpp inference
+    os.system("mkdir -p build && cd build && cmake .. -DFNAME=test_F_adaptive_avg_pool1d_ncnn && make")
+    os.system("./build/test_F_adaptive_avg_pool1d_ncnn")
+    c = list(torch.jit.load("out.pt").parameters())
+    c = c[0]
+
     b = b.reshape_as(a)
+    c = c.reshape_as(a)
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-4, 1e-4) and torch.allclose(a, c, 1e-4, 1e-4)
 
 if __name__ == "__main__":
     if test():

diff --git a/tools/pnnx/tests/ncnn/test_F_adaptive_avg_pool2d.py b/tools/pnnx/tests/ncnn/test_F_adaptive_avg_pool2d.py
@@ -48,10 +48,19 @@ def test():
     import test_F_adaptive_avg_pool2d_ncnn
     b = test_F_adaptive_avg_pool2d_ncnn.test_inference()
 
+    # ncnn cpp inference
+    os.system("mkdir -p build && cd build && cmake .. -DFNAME=test_F_adaptive_avg_pool2d_ncnn && make")
+    os.system("./build/test_F_adaptive_avg_pool2d_ncnn")
+    c = list(torch.jit.load("out.pt").parameters())
+
     for a0, b0 in zip(a, b):
         b0 = b0.reshape_as(a0)
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
             return False
+    for a0, c0 in zip(a, c):
+        c0 = c0.reshape_as(a0)
+        if not torch.allclose(a0, c0, 1e-4, 1e-4):
+            return False
     return True
 
 if __name__ == "__main__":

diff --git a/tools/pnnx/tests/ncnn/test_F_adaptive_avg_pool3d.py b/tools/pnnx/tests/ncnn/test_F_adaptive_avg_pool3d.py
@@ -48,10 +48,19 @@ def test():
     import test_F_adaptive_avg_pool3d_ncnn
     b = test_F_adaptive_avg_pool3d_ncnn.test_inference()
 
+    # ncnn cpp inference
+    os.system("mkdir -p build && cd build && cmake .. -DFNAME=test_F_adaptive_avg_pool3d_ncnn && make")
+    os.system("./build/test_F_adaptive_avg_pool3d_ncnn")
+    c = list(torch.jit.load("out.pt").parameters())
+
     for a0, b0 in zip(a, b):
         b0 = b0.reshape_as(a0)
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
             return False
+    for a0, c0 in zip(a, c):
+        c0 = c0.reshape_as(a0)
+        if not torch.allclose(a0, c0, 1e-4, 1e-4):
+            return False
     return True
 
 if __name__ == "__main__":

diff --git a/tools/pnnx/tests/ncnn/test_F_adaptive_max_pool1d.py b/tools/pnnx/tests/ncnn/test_F_adaptive_max_pool1d.py
@@ -46,9 +46,16 @@ def test():
     import test_F_adaptive_max_pool1d_ncnn
     b = test_F_adaptive_max_pool1d_ncnn.test_inference()
 
+    # pnnx inference cpp
+    os.system("mkdir -p build && cd build && cmake .. -DFNAME=test_F_adaptive_max_pool1d_ncnn && make")
+    os.system("./build/test_F_adaptive_max_pool1d_ncnn")
+    c = list(torch.jit.load("out.pt").parameters())
+    c = c[0]
+
     b = b.reshape_as(a)
+    c = c.reshape_as(a)
 
-    return torch.allclose(a, b, 1e-4, 1e-4)
+    return torch.allclose(a, b, 1e-4, 1e-4) and torch.allclose(a, c, 1e-4, 1e-4)
 
 if __name__ == "__main__":
     if test():

diff --git a/tools/pnnx/tests/ncnn/test_F_adaptive_max_pool2d.py b/tools/pnnx/tests/ncnn/test_F_adaptive_max_pool2d.py
@@ -48,10 +48,19 @@ def test():
     import test_F_adaptive_max_pool2d_ncnn
     b = test_F_adaptive_max_pool2d_ncnn.test_inference()
 
+    # pnnx inference cpp
+    os.system("mkdir -p build && cd build && cmake .. -DFNAME=test_F_adaptive_max_pool2d_ncnn && make")
+    os.system("./build/test_F_adaptive_max_pool2d_ncnn")
+    c = list(torch.jit.load("out.pt").parameters())
+
     for a0, b0 in zip(a, b):
         b0 = b0.reshape_as(a0)
         if not torch.allclose(a0, b0, 1e-4, 1e-4):
             return False
+    for a0, c0 in zip(a, c):
+        c0 = c0.reshape_as(a0)
+        if not torch.allclose(a0, c0, 1e-4, 1e-4):
+            return False
     return True
 
 if __name__ == "__main__":