From ceb4324cd3bce8a8baa83a7d6fcdce5c72e9110c Mon Sep 17 00:00:00 2001 From: Jorgen Lundman Date: Tue, 7 Mar 2023 14:21:28 +0900 Subject: [PATCH] Upstream: add kfpu_begin_ctx(X) kfpu_end_ctx(X) To allow a little more flexibility and pass the xsave state around. Linux and FreeBSD these macros call the original kfpu_begin() and kfpu_end() macros. Signed-off-by: Joergen Lundman --- include/os/freebsd/spl/sys/simd_x86.h | 4 ++++ include/os/linux/kernel/linux/simd_x86.h | 3 +++ include/os/windows/spl/sys/simd.h | 8 ++++++++ include/zfs_fletcher.h | 11 +++++++++-- lib/libspl/include/sys/simd.h | 2 ++ module/zcommon/zfs_fletcher_avx512.c | 4 ++-- module/zcommon/zfs_fletcher_intel.c | 4 ++-- module/zcommon/zfs_fletcher_sse.c | 4 ++-- 8 files changed, 32 insertions(+), 8 deletions(-) diff --git a/include/os/freebsd/spl/sys/simd_x86.h b/include/os/freebsd/spl/sys/simd_x86.h index 6512d4fcba4f..36e5f26cf74f 100644 --- a/include/os/freebsd/spl/sys/simd_x86.h +++ b/include/os/freebsd/spl/sys/simd_x86.h @@ -45,11 +45,15 @@ fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);\ } +#define kfpu_begin_ctx(CTX) kfpu_begin() + #define kfpu_end() { \ if (__predict_false(curpcb->pcb_flags & PCB_FPUNOSAVE)) \ fpu_kern_leave(curthread, NULL); \ } +#define kfpu_end_ctx(CTX) kfpu_end() + /* * Check if OS supports AVX and AVX2 by checking XCR0 * Only call this function if CPUID indicates that AVX feature is diff --git a/include/os/linux/kernel/linux/simd_x86.h b/include/os/linux/kernel/linux/simd_x86.h index 1d77f0487a30..631e2d3ffcd5 100644 --- a/include/os/linux/kernel/linux/simd_x86.h +++ b/include/os/linux/kernel/linux/simd_x86.h @@ -401,6 +401,9 @@ kfpu_end(void) #endif /* defined(HAVE_KERNEL_FPU_INTERNAL */ #endif /* defined(KERNEL_EXPORTS_X86_FPU) */ +#define kfpu_begin_ctx(CTX) kfpu_begin() +#define kfpu_end_ctx(CTX) kfpu_end() + /* * Linux kernel provides an interface for CPU feature testing. */ diff --git a/include/os/windows/spl/sys/simd.h b/include/os/windows/spl/sys/simd.h index ed46cabd3770..e5c45ca3cc91 100644 --- a/include/os/windows/spl/sys/simd.h +++ b/include/os/windows/spl/sys/simd.h @@ -116,6 +116,14 @@ extern uint32_t kfpu_state; if (NT_SUCCESS(saveStatus)) \ KeRestoreExtendedProcessorState(&SaveState); +#define kfpu_begin_ctx(CTX) \ + (CTX)->saveStatus = KeSaveExtendedProcessorState(kfpu_state, \ + &(CTX)->SaveState); + +#define kfpu_end_ctx(CTX) \ + if (NT_SUCCESS((CTX)->saveStatus)) \ + KeRestoreExtendedProcessorState(&(CTX)->SaveState); + /* * CPUID feature tests for user-space. Linux kernel provides an interface for * CPU feature testing. diff --git a/include/zfs_fletcher.h b/include/zfs_fletcher.h index db5b539648a6..630d99a04153 100644 --- a/include/zfs_fletcher.h +++ b/include/zfs_fletcher.h @@ -91,8 +91,8 @@ typedef struct zfs_fletcher_aarch64_neon { uint64_t v[2] __attribute__((aligned(16))); } zfs_fletcher_aarch64_neon_t; - -typedef union fletcher_4_ctx { +typedef struct fletcher_4_ctx { + union { zio_cksum_t scalar; zfs_fletcher_superscalar_t superscalar[4]; @@ -108,6 +108,13 @@ typedef union fletcher_4_ctx { #if defined(__aarch64__) zfs_fletcher_aarch64_neon_t aarch64_neon[4]; #endif + }; /* Sure hope anonymous unions work everywhere */ + +#if defined(_WIN32) && defined(_KERNEL) /* kfpu_begin_ctx() */ + NTSTATUS saveStatus; + XSTATE_SAVE SaveState; +#endif + } fletcher_4_ctx_t; /* diff --git a/lib/libspl/include/sys/simd.h b/lib/libspl/include/sys/simd.h index 7b06ddf58305..e3026b0e4510 100644 --- a/lib/libspl/include/sys/simd.h +++ b/lib/libspl/include/sys/simd.h @@ -60,6 +60,8 @@ extern unsigned long getauxval(unsigned long type); #define kfpu_end() do {} while (0) #define kfpu_init() 0 #define kfpu_fini() ((void) 0) +#define kfpu_begin_ctx(CTX) kfpu_begin() +#define kfpu_end_ctx(CTX) kfpu_end() /* * CPUID feature tests for user-space. diff --git a/module/zcommon/zfs_fletcher_avx512.c b/module/zcommon/zfs_fletcher_avx512.c index 4a3d5cb24ab5..6493c3e3da40 100644 --- a/module/zcommon/zfs_fletcher_avx512.c +++ b/module/zcommon/zfs_fletcher_avx512.c @@ -39,7 +39,7 @@ ZFS_NO_SANITIZE_UNDEFINED static void fletcher_4_avx512f_init(fletcher_4_ctx_t *ctx) { - kfpu_begin(); + kfpu_begin_ctx(ctx); memset(ctx->avx512, 0, 4 * sizeof (zfs_fletcher_avx512_t)); } @@ -73,7 +73,7 @@ fletcher_4_avx512f_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp) } ZIO_SET_CHECKSUM(zcp, A, B, C, D); - kfpu_end(); + kfpu_end_ctx(ctx); } #define FLETCHER_4_AVX512_RESTORE_CTX(ctx) \ diff --git a/module/zcommon/zfs_fletcher_intel.c b/module/zcommon/zfs_fletcher_intel.c index c124d49280c1..258730aa7173 100644 --- a/module/zcommon/zfs_fletcher_intel.c +++ b/module/zcommon/zfs_fletcher_intel.c @@ -51,7 +51,7 @@ ZFS_NO_SANITIZE_UNDEFINED static void fletcher_4_avx2_init(fletcher_4_ctx_t *ctx) { - kfpu_begin(); + kfpu_begin_ctx(ctx); memset(ctx->avx, 0, 4 * sizeof (zfs_fletcher_avx_t)); } @@ -82,7 +82,7 @@ fletcher_4_avx2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp) 64 * ctx->avx[3].v[3]; ZIO_SET_CHECKSUM(zcp, A, B, C, D); - kfpu_end(); + kfpu_end_ctx(ctx); } #define FLETCHER_4_AVX2_RESTORE_CTX(ctx) \ diff --git a/module/zcommon/zfs_fletcher_sse.c b/module/zcommon/zfs_fletcher_sse.c index 6c78830be994..566126b57666 100644 --- a/module/zcommon/zfs_fletcher_sse.c +++ b/module/zcommon/zfs_fletcher_sse.c @@ -53,7 +53,7 @@ ZFS_NO_SANITIZE_UNDEFINED static void fletcher_4_sse2_init(fletcher_4_ctx_t *ctx) { - kfpu_begin(); + kfpu_begin_ctx(ctx); memset(ctx->sse, 0, 4 * sizeof (zfs_fletcher_sse_t)); } @@ -81,7 +81,7 @@ fletcher_4_sse2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp) 8 * ctx->sse[2].v[1] + ctx->sse[1].v[1]; ZIO_SET_CHECKSUM(zcp, A, B, C, D); - kfpu_end(); + kfpu_end_ctx(ctx); } #define FLETCHER_4_SSE_RESTORE_CTX(ctx) \