Skip to content

Commit

Permalink
AES-NI x64 support
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasjones committed May 21, 2014
1 parent 7a3e569 commit 4f5ae20
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 9 deletions.
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ if ARCH_x86
minerd_SOURCES += sha2-x86.S scrypt-x86.S
endif
if ARCH_x86_64
minerd_SOURCES += sha2-x64.S scrypt-x64.S
minerd_SOURCES += sha2-x64.S scrypt-x64.S aesb-x64.S
endif
if ARCH_ARM
minerd_SOURCES += sha2-arm.S scrypt-arm.S
Expand Down
59 changes: 59 additions & 0 deletions aesb-x64.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include "cpuminer-config.h"

#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

.text
.p2align 6
.globl fast_aesb_single_round
.globl _fast_aesb_single_round
fast_aesb_single_round:
_fast_aesb_single_round:
#if defined(_WIN64) || defined(__CYGWIN__)
movdqu (%rcx), %xmm1
movdqu (%r8), %xmm2
aesenc %xmm2, %xmm1
movdqu %xmm1, (%rdx)
#else
movdqu (%rdi), %xmm1
movdqu (%rdx), %xmm2
aesenc %xmm2, %xmm1
movdqu %xmm1, (%rsi)
#endif
ret

.text
.p2align 6
.globl fast_aesb_pseudo_round_mut
.globl _fast_aesb_pseudo_round_mut
fast_aesb_pseudo_round_mut:
_fast_aesb_pseudo_round_mut:
#if defined(_WIN64) || defined(__CYGWIN__)
mov $0, %r9
mov $10, %r10
movdqu (%rcx), %xmm1

.LOOP:
aesenc (%rdx), %xmm1
add $0x10, %rdx
inc %r9
cmp %r10, %r9
jl .LOOP

movdqu %xmm1, (%rcx)
#else
mov $0, %r9
mov $10, %r10
movdqu (%rdi), %xmm1

.LOOP:
aesenc (%rsi), %xmm1
add $0x10, %rsi
inc %r9
cmp %r10, %r9
jl .LOOP

movdqu %xmm1, (%rdi)
#endif
ret
106 changes: 98 additions & 8 deletions cryptonight.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,23 @@ static void do_skein_hash(const void* input, size_t len, char* output) {
assert(likely(SKEIN_SUCCESS == r));
}

extern int fast_aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);
extern int fast_aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);

static inline int cpuid(int code, uint32_t where[4]) {
asm volatile("cpuid":"=a"(*where),"=b"(*(where+1)),
"=c"(*(where+2)),"=d"(*(where+3)):"a"(code));
return (int)where[0];
}

static bool has_aes_ni()
{
uint32_t cpu_info[4];
cpuid(1, cpu_info);
return cpu_info[2] & (1 << 25);
}

static void (* const extra_hashes[4])(const void *, size_t, char *) = {
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
Expand Down Expand Up @@ -182,8 +197,72 @@ void cryptonight_hash(void* output, const void* input, size_t len) {
cryptonight_hash_ctx(output, input, len, alloca(sizeof(struct cryptonight_ctx)));
}

void cryptonight_hash_ctx_aes_ni(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) {
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);

oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
#define RND(p) fast_aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * p], ctx->aes_ctx->key->exp_data);
RND(0);
RND(1);
RND(2);
RND(3);
RND(4);
RND(5);
RND(6);
RND(7);
memcpy(&ctx->long_state[i], ctx->text, INIT_SIZE_BYTE);
}

xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);

for (i = 0; likely(i < ITER / 4); ++i) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
*/
/* Iteration 1 */
j = e2i(ctx->a) * AES_BLOCK_SIZE;
fast_aesb_single_round(&ctx->long_state[j], ctx->c, ctx->a);
xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j]);
/* Iteration 2 */
mul_sum_xor_dst(ctx->c, ctx->a, &ctx->long_state[e2i(ctx->c) * AES_BLOCK_SIZE]);
/* Iteration 3 */
j = e2i(ctx->a) * AES_BLOCK_SIZE;
fast_aesb_single_round(&ctx->long_state[j], ctx->b, ctx->a);
xor_blocks_dst(ctx->b, ctx->c, &ctx->long_state[j]);
/* Iteration 4 */
mul_sum_xor_dst(ctx->b, ctx->a, &ctx->long_state[e2i(ctx->b) * AES_BLOCK_SIZE]);
}

memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY); i += INIT_SIZE_BYTE) {
#define RND(p) xor_blocks(&ctx->text[p * AES_BLOCK_SIZE], &ctx->long_state[i + p * AES_BLOCK_SIZE]); \
fast_aesb_pseudo_round_mut(&ctx->text[p * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
RND(0);
RND(1);
RND(2);
RND(3);
RND(4);
RND(5);
RND(6);
RND(7);
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
hash_permutation(&ctx->state.hs);
/*memcpy(hash, &state, 32);*/
extra_hashes[ctx->state.hs.b[0] & 3](&ctx->state, 200, output);
oaes_free((OAES_CTX **) &ctx->aes_ctx);
}

int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) {
bool aes_ni = has_aes_ni();
uint32_t *nonceptr = (uint32_t*) (((char*)pdata) + 39);
uint32_t n = *nonceptr - 1;
const uint32_t first_nonce = n + 1;
Expand All @@ -192,14 +271,25 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget,

struct cryptonight_ctx *ctx = alloca(sizeof(struct cryptonight_ctx));

do {
*nonceptr = ++n;
cryptonight_hash_ctx(hash, pdata, 76, ctx);
if (unlikely(hash[7] < ptarget[7])) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
if (aes_ni) {
do {
*nonceptr = ++n;
cryptonight_hash_ctx_aes_ni(hash, pdata, 76, ctx);
if (unlikely(hash[7] < ptarget[7])) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
} else {
do {
*nonceptr = ++n;
cryptonight_hash_ctx(hash, pdata, 76, ctx);
if (unlikely(hash[7] < ptarget[7])) {
*hashes_done = n - first_nonce + 1;
return true;
}
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
}
*hashes_done = n - first_nonce + 1;
return 0;
}

0 comments on commit 4f5ae20

Please sign in to comment.