diff --git a/Dockerfile-aarch64 b/Dockerfile-aarch64 index 22e4e9fc1..cba03fe7b 100644 --- a/Dockerfile-aarch64 +++ b/Dockerfile-aarch64 @@ -18,12 +18,4 @@ RUN add-apt-repository -y universe && \ apt-get install -yq \ libtbb-dev -# Patch to run with SIMD Everywhere (simde) for aarch64 case. -# https://github.com/nemequ/simde -# https://gitlab.com/arm-hpc/packages/wikis/packages/bowtie2 -RUN sed -i 's/__m/simde__m/g' aligner_* -RUN sed -i 's/__m/simde__m/g' sse_util* -RUN sed -i 's/_mm_/simde_mm_/g' aligner_* -RUN sed -i 's/_mm_/simde_mm_/g' sse_util* - CMD bash diff --git a/Makefile b/Makefile index b5ed9900b..d56c07744 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,7 @@ CPP ?= $(GCC_PREFIX)/g++$(GCC_SUFFIX) CXX ?= $(CPP) CXXFLAGS += -std=c++98 ifeq (aarch64,$(shell uname -m)) - CXXFLAGS += -fopenmp-simd -DWITH_AARCH64 + CXXFLAGS += -fopenmp-simd CPPFLAGS += -Ithird_party/simde endif @@ -326,8 +326,8 @@ both-sanitized: bowtie2-align-s-sanitized bowtie2-build-s-sanitized bowtie2-alig DEFS := -fno-strict-aliasing \ -DBOWTIE2_VERSION="\"`cat VERSION`\"" \ - -DBUILD_HOST="\"`hostname`\"" \ - -DBUILD_TIME="\"`date`\"" \ + -DBUILD_HOST="\"${HOSTNAME:-`hostname`}\"" \ + -DBUILD_TIME="\"`date -u -r NEWS`\"" \ -DCOMPILER_VERSION="\"`$(CXX) -v 2>&1 | tail -1`\"" \ $(FILE_FLAGS) \ $(PREF_DEF) \ diff --git a/aligner_sw.h b/aligner_sw.h index 9c298217c..78440cbca 100644 --- a/aligner_sw.h +++ b/aligner_sw.h @@ -70,11 +70,7 @@ #include #include #include "threading.h" -#ifdef WITH_AARCH64 -#include "simde/x86/sse2.h" -#else -#include -#endif +#include "sse_wrap.h" #include "aligner_sw_common.h" #include "aligner_sw_nuc.h" #include "ds.h" diff --git a/sse_util.h b/sse_util.h index c199855f3..b799ca0d0 100644 --- a/sse_util.h +++ b/sse_util.h @@ -24,11 +24,7 @@ #include "ds.h" #include "limit.h" #include -#ifdef WITH_AARCH64 -#include "simde/x86/sse2.h" -#else -#include -#endif +#include "sse_wrap.h" class EList_m128i { public: diff --git a/sse_wrap.h b/sse_wrap.h new file mode 100644 index 000000000..94ed431e8 --- /dev/null +++ b/sse_wrap.h @@ -0,0 +1,67 @@ +/* + * Copyright 2011, Ben Langmead + * + * This file is part of Bowtie 2. + * + * Bowtie 2 is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Bowtie 2 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Bowtie 2. If not, see . + */ + +/* + * sse_wrap.h + * + * Routines to wrap Streaming SIMD Extensions (SSE) emmintrin.h + * for an Intel x86 CPU and SIMD Everywhere (simde) for other CPUs. + */ + +#ifndef SSE_WRAP_H_ +#define SSE_WRAP_H_ + +#ifdef __aarch64__ +#include "simde/x86/sse2.h" +#else +#include +#endif + +#ifdef __aarch64__ +typedef simde__m128i __m128i; +#define _mm_adds_epi16(x, y) simde_mm_adds_epi16(x, y) +#define _mm_adds_epu8(x, y) simde_mm_adds_epu8(x, y) +#define _mm_cmpeq_epi16(x, y) simde_mm_cmpeq_epi16(x, y) +#define _mm_cmpeq_epi8(x, y) simde_mm_cmpeq_epi8(x, y) +#define _mm_cmpgt_epi16(x, y) simde_mm_cmpgt_epi16(x, y) +#define _mm_cmpgt_epi8(x, y) simde_mm_cmpgt_epi8(x, y) +#define _mm_cmplt_epi16(x, y) simde_mm_cmplt_epi16(x, y) +#define _mm_cmplt_epu8(x, y) simde_mm_cmplt_epu8(x, y) +#define _mm_extract_epi16(x, y) simde_mm_extract_epi16(x, y) +#define _mm_insert_epi16(x, y, z) simde_mm_insert_epi16(x, y, z) +#define _mm_load_si128(x) simde_mm_load_si128(x) +#define _mm_max_epi16(x, y) simde_mm_max_epi16(x, y) +#define _mm_max_epu8(x, y) simde_mm_max_epu8(x, y) +#define _mm_movemask_epi8(x) simde_mm_movemask_epi8(x) +#define _mm_or_si128(x, y) simde_mm_or_si128(x, y) +#define _mm_setzero_si128() simde_mm_setzero_si128() +#define _mm_shuffle_epi32(x, y) simde_mm_shuffle_epi32(x, y) +#define _mm_shufflelo_epi16(x, y) simde_mm_shufflelo_epi16(x, y) +#define _mm_slli_epi16(x, y) simde_mm_slli_epi16(x, y) +#define _mm_slli_si128(x, y) simde_mm_slli_si128(x, y) +#define _mm_srli_epi16(x, y) simde_mm_srli_epi16(x, y) +#define _mm_srli_epu8(x, y) simde_mm_srli_epu8(x, y) +#define _mm_srli_si128(x, y) simde_mm_srli_si128(x, y) +#define _mm_store_si128(x, y) simde_mm_store_si128(x, y) +#define _mm_subs_epi16(x, y) simde_mm_subs_epi16(x, y) +#define _mm_subs_epu8(x, y) simde_mm_subs_epu8(x, y) +#define _mm_xor_si128(x, y) simde_mm_xor_si128(x, y) +#endif + +#endif /* SSE_WRAP_H_ */