Skip to content

Commit

Permalink
some zvfh infra
Browse files Browse the repository at this point in the history
  • Loading branch information
nihui committed Aug 21, 2024
1 parent 95561b4 commit 2092d66
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 21 deletions.
16 changes: 16 additions & 0 deletions src/cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2564,6 +2564,22 @@ int cpu_support_riscv_zfh()
#endif
}

int cpu_support_riscv_zvfh()
{
try_initialize_global_cpu_info();
#if defined __ANDROID__ || defined __linux__
#if __riscv
// v + f does not imply zfh, but how to discover zvfh properly ?
// upstream issue https://github.com/riscv/riscv-isa-manual/issues/414
return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F;
#else
return 0;
#endif
#else
return 0;
#endif
}

int cpu_riscv_vlenb()
{
try_initialize_global_cpu_info();
Expand Down
2 changes: 2 additions & 0 deletions src/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ NCNN_EXPORT int cpu_support_loongson_mmi();
NCNN_EXPORT int cpu_support_riscv_v();
// zfh = riscv half-precision float
NCNN_EXPORT int cpu_support_riscv_zfh();
// zvfh = riscv vector half-precision float
NCNN_EXPORT int cpu_support_riscv_zvfh();
// vlenb = riscv vector length in bytes
NCNN_EXPORT int cpu_riscv_vlenb();

Expand Down
2 changes: 1 addition & 1 deletion src/layer/noop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Noop::Noop()
{
support_inplace = true;
support_packing = true;
support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh();
support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh();
support_bf16_storage = true;
}

Expand Down
2 changes: 1 addition & 1 deletion src/layer/split.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Split::Split()
one_blob_only = false;
support_inplace = false;
support_packing = true;
support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zfh();
support_fp16_storage = cpu_support_arm_asimdhp() || cpu_support_riscv_zvfh();
support_bf16_storage = true;
}

Expand Down
8 changes: 4 additions & 4 deletions src/mat.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,9 @@ class NCNN_EXPORT Mat
void fill(vfloat32m1_t _v);
void fill(vuint16m1_t _v);
void fill(vint8m1_t _v);
#if __riscv_zfh
#if __riscv_zvfh
void fill(vfloat16m1_t _v);
#endif // __riscv_zfh
#endif // __riscv_zvfh
#endif // __riscv_vector
template<typename T>
void fill(T v);
Expand Down Expand Up @@ -1132,7 +1132,7 @@ NCNN_FORCEINLINE void Mat::fill(vint8m1_t _v)
ptr += packn;
}
}
#if __riscv_zfh
#if __riscv_zvfh
NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v)
{
const int packn = cpu_riscv_vlenb() / 2;
Expand All @@ -1146,7 +1146,7 @@ NCNN_FORCEINLINE void Mat::fill(vfloat16m1_t _v)
ptr += packn;
}
}
#endif // __riscv_zfh
#endif // __riscv_zvfh
#endif // __riscv_vector

template<typename T>
Expand Down
18 changes: 9 additions & 9 deletions src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,15 +639,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
}
else
#endif // NCNN_VFPV4
#if NCNN_RVV
if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && layer->support_fp16_storage)
#if NCNN_ZVFH
if (opt.use_fp16_storage && cpu_support_riscv_zvfh() && layer->support_fp16_storage)
{
Mat bottom_blob_fp16;
cast_float32_to_float16(bottom_blob, bottom_blob_fp16, opt);
bottom_blob = bottom_blob_fp16;
}
else
#endif // NCNN_RVV
#endif // NCNN_ZVFH
#if NCNN_BF16
if (opt.use_bf16_storage && layer->support_bf16_storage)
{
Expand Down Expand Up @@ -767,15 +767,15 @@ int NetPrivate::convert_layout(Mat& bottom_blob, const Layer* layer, const Optio
}
else
#endif // NCNN_VFPV4
#if NCNN_RVV
if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && !layer->support_fp16_storage)
#if NCNN_ZVFH
if (opt.use_fp16_storage && cpu_support_riscv_zvfh() && !layer->support_fp16_storage)
{
Mat bottom_blob_fp32;
cast_float16_to_float32(bottom_blob, bottom_blob_fp32, opt);
bottom_blob = bottom_blob_fp32;
}
else
#endif // NCNN_RVV
#endif // NCNN_ZVFH
#if NCNN_BF16
if (opt.use_bf16_storage && !layer->support_bf16_storage)
{
Expand Down Expand Up @@ -2761,8 +2761,8 @@ int Extractor::extract(int blob_index, Mat& feat, int type)
}
else
#endif // NCNN_VFPV4
#if NCNN_RVV
if (d->opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && (type == 0))
#if NCNN_ZVFH
if (d->opt.use_fp16_storage && cpu_support_riscv_zvfh() && (type == 0))
{
if (feat.elembits() == 16)
{
Expand All @@ -2772,7 +2772,7 @@ int Extractor::extract(int blob_index, Mat& feat, int type)
}
}
else
#endif // NCNN_RVV
#endif // NCNN_ZVFH
#if NCNN_BF16
if (d->opt.use_bf16_storage && (type == 0))
{
Expand Down
12 changes: 6 additions & 6 deletions tests/testutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,13 +342,13 @@ static int convert_to_optimal_layout(const ncnn::Mat& a, ncnn::Mat& a4, const nc
}
else
#endif // NCNN_VFPV4
#if NCNN_RVV
if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
#if NCNN_ZVFH
if (opt.use_fp16_storage && ncnn::cpu_support_riscv_zvfh() && op->support_fp16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
{
ncnn::cast_float32_to_float16(a, a4, opt);
}
else
#endif // NCNN_RVV
#endif // NCNN_ZVFH
#if NCNN_BF16
if (opt.use_bf16_storage && op->support_bf16_storage && !(flag & TEST_LAYER_DISABLE_AUTO_INPUT_CASTING))
{
Expand Down Expand Up @@ -470,13 +470,13 @@ static int convert_to_vanilla_layout(const ncnn::Mat& c4, ncnn::Mat& c, const nc
}
else
#endif // NCNN_VFPV4
#if NCNN_RVV
if (opt.use_fp16_storage && ncnn::cpu_support_riscv_v() && ncnn::cpu_support_riscv_zfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
#if NCNN_ZVFH
if (opt.use_fp16_storage && ncnn::cpu_support_riscv_zvfh() && op->support_fp16_storage && c4_unpacked.elembits() == 16)
{
ncnn::cast_float16_to_float32(c4_unpacked, c, opt);
}
else
#endif // NCNN_RVV
#endif // NCNN_ZVFH
#if NCNN_BF16
if (opt.use_bf16_storage && op->support_bf16_storage && c4_unpacked.elembits() == 16)
{
Expand Down

0 comments on commit 2092d66

Please sign in to comment.