diff --git a/src/cpu.cpp b/src/cpu.cpp index f9e64a1cc75b..e18671a21d50 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -2564,6 +2564,22 @@ int cpu_support_riscv_zfh() #endif } +int cpu_support_riscv_zvfh() +{ + try_initialize_global_cpu_info(); +#if defined __ANDROID__ || defined __linux__ +#if __riscv + // v + f does not imply zfh, but how to discover zvfh properly ? + // upstream issue https://github.com/riscv/riscv-isa-manual/issues/414 + return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F; +#else + return 0; +#endif +#else + return 0; +#endif +} + int cpu_riscv_vlenb() { try_initialize_global_cpu_info(); diff --git a/src/cpu.h b/src/cpu.h index 2ae6b8c3ffe9..9530d4f7e7e3 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -116,6 +116,8 @@ NCNN_EXPORT int cpu_support_loongson_mmi(); NCNN_EXPORT int cpu_support_riscv_v(); // zfh = riscv half-precision float NCNN_EXPORT int cpu_support_riscv_zfh(); +// zvfh = riscv vector half-precision float +NCNN_EXPORT int cpu_support_riscv_zvfh(); // vlenb = riscv vector length in bytes NCNN_EXPORT int cpu_riscv_vlenb(); diff --git a/src/layer/noop.cpp b/src/layer/noop.cpp index a8b42f70e83b..b14f16ea88df 100644 --- a/src/layer/noop.cpp +++ b/src/layer/noop.cpp @@ -21,7 +21,7 @@ Noop::Noop() { support_inplace = true; support_packing = true; - support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh(); + support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh(); support_bf16_storage = true; } diff --git a/src/layer/split.cpp b/src/layer/split.cpp index 996624dfe7a4..b5b24f8b3adf 100644 --- a/src/layer/split.cpp +++ b/src/layer/split.cpp @@ -22,7 +22,7 @@ Split::Split() one_blob_only = false; support_inplace = false; support_packing = true; - support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh(); + support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh(); support_bf16_storage = true; } diff --git a/src/mat.h b/src/mat.h index 99e4a020c118..1cbce635bbd4 100644 --- a/src/mat.h +++ b/src/mat.h @@ -138,9 +138,9 @@ class NCNN_EXPORT Mat void fill(vfloat32m1_t _v); void fill(vuint16m1_t _v); void fill(vint8m1_t _v); -#if __riscv_zfh +#if __riscv_zvfh void fill(vfloat16m1_t _v); -#endif // __riscv_zfh +#endif // __riscv_zvfh #endif // __riscv_vector template void fill(T v); @@ -1132,7 +1132,7 @@ NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v) ptr += packn; } } -#if __riscv_zfh +#if __riscv_zvfh NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) { const int packn = cpu_riscv_vlenb() / 2; @@ -1146,7 +1146,7 @@ NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v) ptr += packn; } } -#endif // __riscv_zfh +#endif // __riscv_zvfh #endif // __riscv_vector template diff --git a/src/net.cpp b/src/net.cpp index 3574944e726e..558213083753 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -639,15 +639,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio } else #endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && layer->support_fp16_storage) +#if NCNN_ZVFH + if (opt.use_fp16_storage && cpu_support_riscv_zvfh() && layer->support_fp16_storage) { Mat bottom_blob_fp16; cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt); bottom_blob = bottom_blob_fp16; } else -#endif // NCNN_RVV +#endif // NCNN_ZVFH #if NCNN_BF16 if (opt.use_bf16_storage && layer->support_bf16_storage) { @@ -767,15 +767,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio } else #endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && !layer->support_fp16_storage) +#if NCNN_ZVFH + if (opt.use_fp16_storage && cpu_support_riscv_zvfh() && !layer->support_fp16_storage) { Mat bottom_blob_fp32; cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt); bottom_blob = bottom_blob_fp32; } else -#endif // NCNN_RVV +#endif // NCNN_ZVFH #if NCNN_BF16 if (opt.use_bf16_storage && !layer->support_bf16_storage) { @@ -2761,8 +2761,8 @@ int Extractor::extract(int blob_index, Mat& feat, int type) } else #endif // NCNN_VFPV4 -#if NCNN_RVV - if (d->opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && (type == 0)) +#if NCNN_ZVFH + if (d->opt.use_fp16_storage && cpu_support_riscv_zvfh() && (type == 0)) { if (feat.elembits() == 16) { @@ -2772,7 +2772,7 @@ int Extractor::extract(int blob_index, Mat& feat, int type) } } else -#endif // NCNN_RVV +#endif // NCNN_ZVFH #if NCNN_BF16 if (d->opt.use_bf16_storage && (type == 0)) { diff --git a/tests/testutil.cpp b/tests/testutil.cpp index 837043cb754c..c83d9bbed769 100644 --- a/tests/testutil.cpp +++ b/tests/testutil.cpp @@ -342,13 +342,13 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc } else #endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) +#if NCNN_ZVFH + if (opt.use_fp16_storage && ncnn::cpu_support_riscv_zvfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { ncnn::cast_float32_to_float16(a, a4, opt); } else -#endif // NCNN_RVV +#endif // NCNN_ZVFH #if NCNN_BF16 if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING)) { @@ -470,13 +470,13 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc } else #endif // NCNN_VFPV4 -#if NCNN_RVV - if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16) +#if NCNN_ZVFH + if (opt.use_fp16_storage && ncnn::cpu_support_riscv_zvfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16) { ncnn::cast_float16_to_float32(c4_unpacked, c, opt); } else -#endif // NCNN_RVV +#endif // NCNN_ZVFH #if NCNN_BF16 if (opt.use_bf16_storage && op->support_bf16_storage && c4_unpacked.elembits() == 16) {