From 2092d665b28b217b35627d0fd815b764b58ad0bd Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Wed, 21 Aug 2024 19:36:21 +0800
Subject: [PATCH] some zvfh infra

---
 src/cpu.cpp         | 16 ++++++++++++++++
 src/cpu.h           |  2 ++
 src/layer/noop.cpp  |  2 +-
 src/layer/split.cpp |  2 +-
 src/mat.h           |  8 ++++----
 src/net.cpp         | 18 +++++++++---------
 tests/testutil.cpp  | 12 ++++++------
 7 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/src/cpu.cpp b/src/cpu.cpp
index f9e64a1cc75b..e18671a21d50 100644
--- a/src/cpu.cpp
+++ b/src/cpu.cpp
@@ -2564,6 +2564,22 @@ int cpu_support_riscv_zfh()
 #endif
 }
 
+int cpu_support_riscv_zvfh()
+{
+    try_initialize_global_cpu_info();
+#if defined __ANDROID__ || defined __linux__
+#if __riscv
+    // v + f does not imply zfh, but how to discover zvfh properly ?
+    // upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
+    return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
+#else
+    return 0;
+#endif
+#else
+    return 0;
+#endif
+}
+
 int cpu_riscv_vlenb()
 {
     try_initialize_global_cpu_info();
diff --git a/src/cpu.h b/src/cpu.h
index 2ae6b8c3ffe9..9530d4f7e7e3 100644
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -116,6 +116,8 @@ NCNN_EXPORT int cpu_support_loongson_mmi();
 NCNN_EXPORT int cpu_support_riscv_v();
 // zfh = riscv half-precision float
 NCNN_EXPORT int cpu_support_riscv_zfh();
+// zvfh = riscv vector half-precision float
+NCNN_EXPORT int cpu_support_riscv_zvfh();
 // vlenb = riscv vector length in bytes
 NCNN_EXPORT int cpu_riscv_vlenb();
 
diff --git a/src/layer/noop.cpp b/src/layer/noop.cpp
index a8b42f70e83b..b14f16ea88df 100644
--- a/src/layer/noop.cpp
+++ b/src/layer/noop.cpp
@@ -21,7 +21,7 @@ Noop::Noop()
 {
     support_inplace = true;
     support_packing = true;
-    support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh();
+    support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh();
     support_bf16_storage = true;
 }
 
diff --git a/src/layer/split.cpp b/src/layer/split.cpp
index 996624dfe7a4..b5b24f8b3adf 100644
--- a/src/layer/split.cpp
+++ b/src/layer/split.cpp
@@ -22,7 +22,7 @@ Split::Split()
     one_blob_only = false;
     support_inplace = false;
     support_packing = true;
-    support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh();
+    support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh();
     support_bf16_storage = true;
 }
 
diff --git a/src/mat.h b/src/mat.h
index 99e4a020c118..1cbce635bbd4 100644
--- a/src/mat.h
+++ b/src/mat.h
@@ -138,9 +138,9 @@ class NCNN_EXPORT Mat
     void fill(vfloat32m1_t _v);
     void fill(vuint16m1_t _v);
     void fill(vint8m1_t _v);
-#if __riscv_zfh
+#if __riscv_zvfh
     void fill(vfloat16m1_t _v);
-#endif // __riscv_zfh
+#endif // __riscv_zvfh
 #endif // __riscv_vector
     template<typename T>
     void fill(T v);
@@ -1132,7 +1132,7 @@ NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v)
         ptr += packn;
     }
 }
-#if __riscv_zfh
+#if __riscv_zvfh
 NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v)
 {
     const int packn = cpu_riscv_vlenb() / 2;
@@ -1146,7 +1146,7 @@ NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v)
         ptr += packn;
     }
 }
-#endif // __riscv_zfh
+#endif // __riscv_zvfh
 #endif // __riscv_vector
 
 template<typename T>
diff --git a/src/net.cpp b/src/net.cpp
index 3574944e726e..558213083753 100644
--- a/src/net.cpp
+++ b/src/net.cpp
@@ -639,15 +639,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
         }
         else
 #endif // NCNN_VFPV4
-#if NCNN_RVV
-        if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && layer->support_fp16_storage)
+#if NCNN_ZVFH
+        if (opt.use_fp16_storage && cpu_support_riscv_zvfh() && layer->support_fp16_storage)
         {
             Mat bottom_blob_fp16;
             cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt);
             bottom_blob = bottom_blob_fp16;
         }
         else
-#endif // NCNN_RVV
+#endif // NCNN_ZVFH
 #if NCNN_BF16
         if (opt.use_bf16_storage && layer->support_bf16_storage)
         {
@@ -767,15 +767,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
         }
         else
 #endif // NCNN_VFPV4
-#if NCNN_RVV
-        if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && !layer->support_fp16_storage)
+#if NCNN_ZVFH
+        if (opt.use_fp16_storage && cpu_support_riscv_zvfh() && !layer->support_fp16_storage)
         {
             Mat bottom_blob_fp32;
             cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt);
             bottom_blob = bottom_blob_fp32;
         }
         else
-#endif // NCNN_RVV
+#endif // NCNN_ZVFH
 #if NCNN_BF16
         if (opt.use_bf16_storage && !layer->support_bf16_storage)
         {
@@ -2761,8 +2761,8 @@ int Extractor::extract(int blob_index, Mat& feat, int type)
         }
         else
 #endif // NCNN_VFPV4
-#if NCNN_RVV
-        if (d->opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && (type == 0))
+#if NCNN_ZVFH
+        if (d->opt.use_fp16_storage && cpu_support_riscv_zvfh() && (type == 0))
         {
             if (feat.elembits() == 16)
             {
@@ -2772,7 +2772,7 @@ int Extractor::extract(int blob_index, Mat& feat, int type)
             }
         }
         else
-#endif // NCNN_RVV
+#endif // NCNN_ZVFH
 #if NCNN_BF16
         if (d->opt.use_bf16_storage && (type == 0))
         {
diff --git a/tests/testutil.cpp b/tests/testutil.cpp
index 837043cb754c..c83d9bbed769 100644
--- a/tests/testutil.cpp
+++ b/tests/testutil.cpp
@@ -342,13 +342,13 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc
     }
     else
 #endif // NCNN_VFPV4
-#if NCNN_RVV
-    if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
+#if NCNN_ZVFH
+    if (opt.use_fp16_storage && ncnn::cpu_support_riscv_zvfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
     {
         ncnn::cast_float32_to_float16(a, a4, opt);
     }
     else
-#endif // NCNN_RVV
+#endif // NCNN_ZVFH
 #if NCNN_BF16
     if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
     {
@@ -470,13 +470,13 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc
     }
     else
 #endif // NCNN_VFPV4
-#if NCNN_RVV
-    if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
+#if NCNN_ZVFH
+    if (opt.use_fp16_storage && ncnn::cpu_support_riscv_zvfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
     {
         ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
     }
     else
-#endif // NCNN_RVV
+#endif // NCNN_ZVFH
 #if NCNN_BF16
     if (opt.use_bf16_storage && op->support_bf16_storage && c4_unpacked.elembits() == 16)
     {