Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

build: armv7 and android static omp support #1600

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,10 @@ macro(ct2_compile_kernels_for_isa isa flag)
list(APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR}/kernels_${isa}.cc)
endmacro()

if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)"
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(aarch64)|(armv7-a)"
OR (APPLE AND CMAKE_OSX_ARCHITECTURES STREQUAL "arm64"))
add_definitions(-DCT2_ARM64_BUILD)
set(CT2_BUILD_ARCH "arm64")
add_definitions(-DCT2_ARM_BUILD)
set(CT2_BUILD_ARCH "arm")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86_64)|(amd64)|(AMD64)")
add_definitions(-DCT2_X86_BUILD)
set(CT2_BUILD_ARCH "x86_64")
Expand Down Expand Up @@ -261,14 +261,19 @@ if(ENABLE_CPU_DISPATCH)
ct2_compile_kernels_for_isa(avx2 "-mavx2 -mfma")
ct2_compile_kernels_for_isa(avx512 "-mavx512f -mavx512cd -mavx512vl -mavx512bw -mavx512dq")
endif()
elseif(CT2_BUILD_ARCH STREQUAL "arm64")
elseif(CT2_BUILD_ARCH STREQUAL "arm")
ct2_compile_kernels_for_isa(neon "-DUSE_NEON")
endif()
endif()

if(NOT OPENMP_RUNTIME STREQUAL "NONE")
if(WIN32)
add_compile_options("/openmp")
elseif(ANDROID)
# use static omp in Android
set(OpenMP_CXX_LIBRARIES -fopenmp -static-openmp)
add_compile_options("-fopenmp")
set(OpenMP_CXX_FOUND 1)
else()
find_package(OpenMP)
if(OpenMP_CXX_FOUND)
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/cpu_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace ctranslate2 {
}
}

#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)

namespace ctranslate2 {
namespace cpu {
Expand Down
2 changes: 1 addition & 1 deletion src/cpu/cpu_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace ctranslate2 {
bool cpu_supports_avx();
bool cpu_supports_avx2();
bool cpu_supports_avx512();
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
bool cpu_supports_neon();
#endif

Expand Down
6 changes: 3 additions & 3 deletions src/cpu/cpu_isa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace ctranslate2 {
return "AVX2";
case CpuIsa::AVX512:
return "AVX512";
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
case CpuIsa::NEON:
return "NEON";
#endif
Expand All @@ -51,7 +51,7 @@ namespace ctranslate2 {
return try_isa(env_isa, CpuIsa::AVX2, cpu_supports_avx2());
if (env_isa == "AVX")
return try_isa(env_isa, CpuIsa::AVX, cpu_supports_avx());
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
if (env_isa == "NEON")
return try_isa(env_isa, CpuIsa::NEON, cpu_supports_neon());
#endif
Expand All @@ -68,7 +68,7 @@ namespace ctranslate2 {
return CpuIsa::AVX2;
if (cpu_supports_avx())
return CpuIsa::AVX;
# elif defined(CT2_ARM64_BUILD)
# elif defined(CT2_ARM_BUILD)
if (cpu_supports_neon())
return CpuIsa::NEON;
# endif
Expand Down
4 changes: 2 additions & 2 deletions src/cpu/cpu_isa.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace ctranslate2 {
AVX,
AVX2,
AVX512,
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
NEON,
#endif
};
Expand Down Expand Up @@ -48,7 +48,7 @@ namespace ctranslate2 {
CPU_ISA_CASE(cpu::CpuIsa::AVX, SINGLE_ARG(STMTS)) \
CPU_ISA_DEFAULT(cpu::CpuIsa::GENERIC, SINGLE_ARG(STMTS)) \
}
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
# define CPU_ISA_DISPATCH(STMTS) \
switch (cpu::get_cpu_isa()) { \
CPU_ISA_CASE(cpu::CpuIsa::NEON, SINGLE_ARG(STMTS)) \
Expand Down
16 changes: 16 additions & 0 deletions src/cpu/vec_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,35 @@ namespace ctranslate2 {
}

static inline value_type div(value_type a, value_type b) {
#ifdef __aarch64__
return vdivq_f32(a, b);
#else
return a / b;
#endif
}

static inline value_type mul_add(value_type a, value_type b, value_type c) {
#ifdef __aarch64__
return vfmaq_f32(c, a, b);
#else
return a * b + c;
#endif
}

static inline float reduce_add(value_type a) {
#ifdef __aarch64__
return vaddvq_f32(a);
#else
return a[0] + a[1] + a[2] + a[3];
#endif
}

static inline float reduce_max(value_type a) {
#ifdef __aarch64__
return vmaxvq_f32(a);
#else
return std::max({a[0], a[1], a[2], a[3]});
#endif
}

static inline value_type round(value_type v) {
Expand Down
2 changes: 1 addition & 1 deletion src/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ namespace ctranslate2 {
cpu::cpu_supports_avx(),
cpu::cpu_supports_avx2(),
cpu::cpu_supports_avx512());
#elif defined(CT2_ARM64_BUILD)
#elif defined(CT2_ARM_BUILD)
spdlog::info("CPU: {} (NEON={})",
cpu::cpu_vendor(),
cpu::cpu_supports_neon());
Expand Down
Loading