From 6759d940a5692b9ddb197b6efd00907235b36d4d Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 18 Jul 2024 10:57:46 +0800 Subject: [PATCH 1/4] use ruapu detection only on windows arm --- src/cpu.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index b1afbba3f65..efbdefbd41e 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -129,8 +129,10 @@ #include #endif +#if (defined _WIN32 && (__aarch64__ || __arm__)) #define RUAPU_IMPLEMENTATION #include "ruapu.h" +#endif // topology info static int g_cpucount; @@ -596,9 +598,6 @@ static int get_cpu_support_x86_avx2() static int get_cpu_support_x86_avx_vnni() { -#if __APPLE__ - return ruapu_supports("avxvnni"); -#else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -617,13 +616,16 @@ static int get_cpu_support_x86_avx_vnni() x86_cpuid_sublevel(7, 1, cpu_info); return cpu_info[0] & (1u << 4); -#endif } static int get_cpu_support_x86_avx512() { #if __APPLE__ - return ruapu_supports("avx512f") && ruapu_supports("avx512bw") && ruapu_supports("avx512cd") && ruapu_supports("avx512dq") && ruapu_supports("avx512vl"); + return get_hw_capability("hw.optional.avx512f") + && get_hw_capability("hw.optional.avx512bw") + && get_hw_capability("hw.optional.avx512cd") + && get_hw_capability("hw.optional.avx512dq") + && get_hw_capability("hw.optional.avx512vl"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -653,7 +655,7 @@ static int get_cpu_support_x86_avx512() static int get_cpu_support_x86_avx512_vnni() { #if __APPLE__ - return ruapu_supports("avx512vnni"); + return get_hw_capability("hw.optional.avx512vnni"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -683,7 +685,7 @@ static int get_cpu_support_x86_avx512_vnni() static int get_cpu_support_x86_avx512_bf16() { #if __APPLE__ - return ruapu_supports("avx512bf16"); + return get_hw_capability("hw.optional.avx512bf16"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -709,7 +711,7 @@ static int get_cpu_support_x86_avx512_bf16() static int get_cpu_support_x86_avx512_fp16() { #if __APPLE__ - return ruapu_supports("avx512fp16"); + return get_hw_capability("hw.optional.avx512fp16"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); @@ -1867,7 +1869,7 @@ static void initialize_global_cpu_info() g_powersave = 0; initialize_cpu_thread_affinity_mask(g_cpu_affinity_mask_all, g_cpu_affinity_mask_little, g_cpu_affinity_mask_big); -#if (defined _WIN32 && (__aarch64__ || __arm__)) || __APPLE__ +#if (defined _WIN32 && (__aarch64__ || __arm__)) if (!is_being_debugged()) { ruapu_init(); From 60c43f1a95b095e279edccc1385a0d94ca3712da Mon Sep 17 00:00:00 2001 From: nihui Date: Thu, 18 Jul 2024 02:51:11 +0000 Subject: [PATCH 2/4] apply code-format changes --- src/cpu.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index efbdefbd41e..dc3831ec8dc 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -622,10 +622,10 @@ static int get_cpu_support_x86_avx512() { #if __APPLE__ return get_hw_capability("hw.optional.avx512f") - && get_hw_capability("hw.optional.avx512bw") - && get_hw_capability("hw.optional.avx512cd") - && get_hw_capability("hw.optional.avx512dq") - && get_hw_capability("hw.optional.avx512vl"); + && get_hw_capability("hw.optional.avx512bw") + && get_hw_capability("hw.optional.avx512cd") + && get_hw_capability("hw.optional.avx512dq") + && get_hw_capability("hw.optional.avx512vl"); #else unsigned int cpu_info[4] = {0}; x86_cpuid(0, cpu_info); From 619913d9688d2c8808b418b362d72c61a2fc1a6f Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 18 Jul 2024 11:11:26 +0800 Subject: [PATCH 3/4] leak mingw32 --- src/cpu.cpp | 20 ++++++++++---------- src/cpu.h | 4 ++-- src/platform.h.in | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index dc3831ec8dc..1bca84a6c70 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -46,7 +46,7 @@ #include #endif -#if defined _WIN32 && !(defined __MINGW32__) +#if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include #include @@ -747,7 +747,7 @@ static int get_cpucount() count = emscripten_num_logical_cores(); else count = 1; -#elif (defined _WIN32 && !(defined __MINGW32__)) +#elif defined _WIN32 SYSTEM_INFO system_info; GetSystemInfo(&system_info); count = system_info.dwNumberOfProcessors; @@ -814,7 +814,7 @@ static int get_thread_siblings(int cpuid) static int get_physical_cpucount() { int count = 0; -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi == NULL) @@ -1052,7 +1052,7 @@ static int get_big_cpu_data_cache_size(int level) static int get_cpu_level2_cachesize() { int size = 0; -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi != NULL) @@ -1122,7 +1122,7 @@ static int get_cpu_level2_cachesize() static int get_cpu_level3_cachesize() { int size = 0; -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); LPFN_GLPI glpi = (LPFN_GLPI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetLogicalProcessorInformation"); if (glpi != NULL) @@ -1169,7 +1169,7 @@ static int get_cpu_level3_cachesize() return size; } -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 static ncnn::CpuSet get_smt_cpu_mask() { ncnn::CpuSet smt_cpu_mask; @@ -1263,7 +1263,7 @@ static int set_sched_affinity(const ncnn::CpuSet& thread_affinity_mask) return 0; } -#endif // (defined _WIN32 && !(defined __MINGW32__)) +#endif // defined _WIN32 #if defined __ANDROID__ || defined __linux__ static int get_max_freq_khz(int cpuid) @@ -1437,7 +1437,7 @@ static void initialize_cpu_thread_affinity_mask(ncnn::CpuSet& mask_all, ncnn::Cp mask_all.enable(i); } -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 // get max freq mhz for all cores int max_freq_mhz_min = INT_MAX; int max_freq_mhz_max = 0; @@ -1946,7 +1946,7 @@ static inline void try_initialize_global_cpu_info() namespace ncnn { -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 CpuSet::CpuSet() { disable_all(); @@ -2687,7 +2687,7 @@ const CpuSet& get_cpu_thread_affinity_mask(int powersave) int set_cpu_thread_affinity(const CpuSet& thread_affinity_mask) { try_initialize_global_cpu_info(); -#if defined __ANDROID__ || defined __linux__ || (defined _WIN32 && !(defined __MINGW32__)) +#if defined __ANDROID__ || defined __linux__ || defined _WIN32 #ifdef _OPENMP int num_threads = thread_affinity_mask.num_enabled(); diff --git a/src/cpu.h b/src/cpu.h index 7d6bfce1108..2ae6b8c3ffe 100644 --- a/src/cpu.h +++ b/src/cpu.h @@ -17,7 +17,7 @@ #include -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include #endif @@ -40,7 +40,7 @@ class NCNN_EXPORT CpuSet int num_enabled() const; public: -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 ULONG_PTR mask; #endif #if defined __ANDROID__ || defined __linux__ diff --git a/src/platform.h.in b/src/platform.h.in index a0f17f39e31..50a9454b7da 100644 --- a/src/platform.h.in +++ b/src/platform.h.in @@ -70,7 +70,7 @@ #ifdef __cplusplus #if NCNN_THREADS -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include #include @@ -86,7 +86,7 @@ namespace ncnn { #if NCNN_THREADS -#if (defined _WIN32 && !(defined __MINGW32__)) +#if defined _WIN32 class NCNN_EXPORT Mutex { public: @@ -141,7 +141,7 @@ public: private: DWORD key; }; -#else // (defined _WIN32 && !(defined __MINGW32__)) +#else // defined _WIN32 class NCNN_EXPORT Mutex { public: @@ -186,7 +186,7 @@ public: private: pthread_key_t key; }; -#endif // (defined _WIN32 && !(defined __MINGW32__)) +#endif // defined _WIN32 #else // NCNN_THREADS class NCNN_EXPORT Mutex { From ddc525fb3d811ab0688926a40ac4241d49630901 Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 18 Jul 2024 11:16:59 +0800 Subject: [PATCH 4/4] mingw has no powerbase.h --- src/cpu.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index 1bca84a6c70..f9e64a1cc75 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -49,7 +49,6 @@ #if defined _WIN32 #define WIN32_LEAN_AND_MEAN #include -#include #endif #if defined __ANDROID__ || defined __linux__