Skip to content

Commit

Permalink
CryptoNight optimizations
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasjones committed May 19, 2014
1 parent 6ecfa90 commit d64eb48
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 43 deletions.
38 changes: 16 additions & 22 deletions crypto/aesb.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,33 +142,27 @@ d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);

void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
{
uint32_t b0[4], b1[4];
const uint32_t *kp = (uint32_t *) expandedKey;
state_in(b0, in);

round(fwd_rnd, b1, b0, kp);

state_out(out, b1);
uint32_t *i = (uint32_t*) in;
uint32_t *o = (uint32_t*) out;
round(fwd_rnd, o, i, kp);
}

void aesb_pseudo_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
void aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey)
{
uint32_t b0[4], b1[4];
uint32_t b1[4];
uint32_t *v = (uint32_t*) val;
const uint32_t *kp = (uint32_t *) expandedKey;
state_in(b0, in);

round(fwd_rnd, b1, b0, kp);
round(fwd_rnd, b0, b1, kp + 1 * N_COLS);
round(fwd_rnd, b1, b0, kp + 2 * N_COLS);
round(fwd_rnd, b0, b1, kp + 3 * N_COLS);
round(fwd_rnd, b1, b0, kp + 4 * N_COLS);
round(fwd_rnd, b0, b1, kp + 5 * N_COLS);
round(fwd_rnd, b1, b0, kp + 6 * N_COLS);
round(fwd_rnd, b0, b1, kp + 7 * N_COLS);
round(fwd_rnd, b1, b0, kp + 8 * N_COLS);
round(fwd_rnd, b0, b1, kp + 9 * N_COLS);

state_out(out, b0);
round(fwd_rnd, b1, v, kp);
round(fwd_rnd, v, b1, kp + 1 * N_COLS);
round(fwd_rnd, b1, v, kp + 2 * N_COLS);
round(fwd_rnd, v, b1, kp + 3 * N_COLS);
round(fwd_rnd, b1, v, kp + 4 * N_COLS);
round(fwd_rnd, v, b1, kp + 5 * N_COLS);
round(fwd_rnd, b1, v, kp + 6 * N_COLS);
round(fwd_rnd, v, b1, kp + 7 * N_COLS);
round(fwd_rnd, b1, v, kp + 8 * N_COLS);
round(fwd_rnd, v, b1, kp + 9 * N_COLS);
}


Expand Down
43 changes: 22 additions & 21 deletions cryptonight.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ static void do_skein_hash(const void* input, size_t len, char* output) {
}

extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
extern int aesb_pseudo_round_mut(uint8_t *val, uint8_t *expandedKey);

static void (* const extra_hashes[4])(const void *, size_t, char *) = {
do_blake_hash, do_groestl_hash, do_jh_hash, do_skein_hash
Expand Down Expand Up @@ -119,34 +119,35 @@ struct cryptonight_ctx {
uint8_t long_state[MEMORY];
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE];
uint8_t a[AES_BLOCK_SIZE];
uint8_t b[AES_BLOCK_SIZE];
uint8_t c[AES_BLOCK_SIZE];
uint8_t aes_key[AES_KEY_SIZE];
uint8_t a[AES_BLOCK_SIZE] __attribute__((aligned(64)));
uint8_t b[AES_BLOCK_SIZE] __attribute__((aligned(64)));
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(64)));
oaes_ctx* aes_ctx;
};

void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) {
hash_process(&ctx->state.hs, (const uint8_t*) input, len);
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE);
ctx->aes_ctx = (oaes_ctx*) oaes_alloc();
size_t i, j;
memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);

oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE);
for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) {
for (j = 0; j < INIT_SIZE_BLK; j++) {
aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], &ctx->text[AES_BLOCK_SIZE * j], ctx->aes_ctx->key->exp_data);
}
oaes_key_import_data(ctx->aes_ctx, ctx->state.hs.b, AES_KEY_SIZE);
for (i = 0; likely(i < MEMORY / INIT_SIZE_BYTE); ++i) {
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 0], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 1], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 2], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 3], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 4], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 5], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 6], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[AES_BLOCK_SIZE * 7], ctx->aes_ctx->key->exp_data);
memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE);
}

for (i = 0; i < 16; i++) {
ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i];
ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i];
}
xor_blocks_dst(&ctx->state.k[0], &ctx->state.k[32], ctx->a);
xor_blocks_dst(&ctx->state.k[16], &ctx->state.k[48], ctx->b);

for (i = 0; i < ITER / 2; i++) {
for (i = 0; likely(i < ITER / 2); ++i) {
/* Dependency chain: address -> read value ------+
* written value <-+ hard function (AES or MUL) <+
* next address <-+
Expand All @@ -162,11 +163,11 @@ void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cr

memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE);
oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE);
for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) {
for (j = 0; j < INIT_SIZE_BLK; j++) {
for (i = 0; likely(i < MEMORY / INIT_SIZE_BYTE); ++i) {
for (j = 0; likely(j < INIT_SIZE_BLK); ++j) {
xor_blocks(&ctx->text[j * AES_BLOCK_SIZE],
&ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], &ctx->text[j * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
aesb_pseudo_round_mut(&ctx->text[j * AES_BLOCK_SIZE], ctx->aes_ctx->key->exp_data);
}
}
memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE);
Expand Down Expand Up @@ -197,7 +198,7 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
*hashes_done = n - first_nonce + 1;
return true;
}
} while (likely((n <= max_nonce) && !work_restart[thr_id].restart));
} while (likely((n <= max_nonce && !work_restart[thr_id].restart)));
*hashes_done = n - first_nonce + 1;
return 0;
}

0 comments on commit d64eb48

Please sign in to comment.