Skip to content

Commit

Permalink
merge vtnerd's tweaks to monerov1 pow changes
Browse files Browse the repository at this point in the history
  • Loading branch information
NaN-git committed Mar 12, 2018
1 parent 6349287 commit 0015387
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 29 deletions.
2 changes: 2 additions & 0 deletions algorithm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,8 @@ static cl_int queue_cryptonight_kernel(_clState *clState, dev_blk_ctx *blk, __ma
kernel = clState->extra_kernels;
CL_SET_ARG(clState->Scratchpads);
CL_SET_ARG(clState->States);
if (variant > 0)
CL_SET_ARG(*(cl_uint*)(clState->cldata + 35));

num = 0;
CL_NEXTKERNEL_SET_ARG(clState->Scratchpads);
Expand Down
28 changes: 13 additions & 15 deletions algorithm/cryptonight.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,23 @@
#include "algorithm/cn-aes-tbls.h"

#define VARIANT1_1(p) \
do if (Variant > 0) \
{ \
const uint8_t tmp = ((const uint8_t*)(p))[11]; \
do if (Variant > 0) { \
const uint32_t tmp = (p); \
static const uint32_t table = 0x75310; \
const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \
((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \
const uint8_t index = ((tmp >> 26) & 12) | ((tmp >> 23) & 2); \
(p) ^= ((table >> index) & 0x30) << 24; \
} while(0)

#define VARIANT1_2(p) \
do \
{ \
((uint32_t*)(p))[2] ^= nonce; \
do if (Variant > 0) { \
(p) ^= tweak1_2; \
} while(0)

#define VARIANT1_INIT() \
if (Variant > 0 && Length < 43) \
{ \
if (Variant > 0 && Length < 43) { \
quit(1, "Cryptonight variants need at least 43 bytes of data"); \
} \
const uint32_t nonce = Variant > 0 ? *(uint32_t*)(Input + 39) : 0
const uint64_t tweak1_2 = Variant > 0 ? *(uint64_t*)(Input + 35) ^ CNCtx.State[24] : 0


static const uint64_t keccakf_rndc[24] =
Expand Down Expand Up @@ -213,9 +210,9 @@ void cryptonight(uint8_t *Output, uint8_t *Input, uint32_t Length, int Variant)
uint64_t text[16], a[2], b[2];
uint32_t ExpandedKey1[64], ExpandedKey2[64];

VARIANT1_INIT();

CNKeccak(CNCtx.State, Input, Length);

VARIANT1_INIT();

for(int i = 0; i < 4; ++i) ((uint64_t *)ExpandedKey1)[i] = CNCtx.State[i];
for(int i = 0; i < 4; ++i) ((uint64_t *)ExpandedKey2)[i] = CNCtx.State[i + 4];
Expand Down Expand Up @@ -250,8 +247,8 @@ void cryptonight(uint8_t *Output, uint8_t *Input, uint32_t Length, int Variant)
b[0] ^= c[0];
b[1] ^= c[1];

VARIANT1_1(b[1]);
memcpy(CNCtx.Scratchpad + ((a[0] & 0x1FFFF0) >> 3), b, 16);
VARIANT1_1(CNCtx.Scratchpad + ((a[0] & 0x1FFFF0) >> 3));

memcpy(b, CNCtx.Scratchpad + ((c[0] & 0x1FFFF0) >> 3), 16);

Expand All @@ -260,8 +257,9 @@ void cryptonight(uint8_t *Output, uint8_t *Input, uint32_t Length, int Variant)
a[1] += mul128(c[0], b[0], &hi);
a[0] += hi;

VARIANT1_2(a[1]);
memcpy(CNCtx.Scratchpad + ((c[0] & 0x1FFFF0) >> 3), a, 16);
VARIANT1_2(CNCtx.Scratchpad + ((c[0] & 0x1FFFF0) >> 3));
VARIANT1_2(a[1]);

a[0] ^= b[0];
a[1] ^= b[1];
Expand Down
2 changes: 1 addition & 1 deletion algorithm/cryptonight.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ typedef struct _CryptonightCtx
uint64_t Scratchpad[1 << 18];
} CryptonightCtx;

inline int monero_variant(struct work *work) {
static inline int monero_variant(struct work *work) {
return (work->is_monero && work->data[0] >= 7) ? work->data[0] - 6 : 0;
}

Expand Down
31 changes: 18 additions & 13 deletions kernel/cryptonight.cl
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
#include "blake256.cl"
#include "groestl256.cl"

#define VARIANT0_PARAMS
#define VARIANT0_1(p)
#define VARIANT0_2(p)
#define VARIANT0_INIT()

#define VARIANT1_PARAMS , const uint tweak1
#define VARIANT1_1(p) \
do \
{ \
Expand All @@ -20,13 +22,13 @@
} while(0)

#define VARIANT1_2(p) \
do \
{ \
(p).s2 ^= get_global_id(0); \
} while(0)

#define VARIANT1_INIT()
(p) ^= tweak1_2

#define VARIANT1_INIT() \
uint2 tweak1_2; \
tweak1_2.s0 = tweak1; \
tweak1_2.s1 = get_global_id(0); \
tweak1_2 ^= as_uint2(states[24]);

static const __constant ulong keccakf_rndc[24] =
{
Expand Down Expand Up @@ -263,7 +265,7 @@ __kernel void search(__global ulong *input, uint InputLen, __global uint4 *Scrat
keccakf1600_2(State);

mem_fence(CLK_GLOBAL_MEM_FENCE);

#pragma unroll
for(int i = 0; i < 25; ++i) states[i] = State[i];

Expand All @@ -289,16 +291,16 @@ __kernel void search(__global ulong *input, uint InputLen, __global uint4 *Scrat

#define SEARCH1(VAR) \
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) \
__kernel void search1_var##VAR(__global uint4 *Scratchpad, __global ulong *states) \
__kernel void search1_var##VAR(__global uint4 *Scratchpad, __global ulong *states VARIANT##VAR##_PARAMS) \
{ \
uint4 a, b; \
__local uint AES0[256], AES1[256], AES2[256], AES3[256]; \
\
VARIANT##VAR##_INIT(); \
\
Scratchpad += ((get_global_id(0) - get_global_offset(0))); \
states += (25 * (get_global_id(0) - get_global_offset(0))); \
\
VARIANT##VAR##_INIT(); \
\
for(int i = get_local_id(0); i < 256; i += WORKSIZE) \
{ \
const uint tmp = AES0_C[i]; \
Expand All @@ -325,17 +327,20 @@ __kernel void search1_var##VAR(__global uint4 *Scratchpad, __global ulong *state
c = Scratchpad[IDX((as_ulong(a.s01) & 0x1FFFF0) >> 4)]; \
c = AES_Round(AES0, AES1, AES2, AES3, c, a); \
\
Scratchpad[IDX((as_ulong(a.s01) & 0x1FFFF0) >> 4)] = b_x ^ c; \
VARIANT##VAR##_1(Scratchpad[IDX((as_ulong(a.s01) & 0x1FFFF0) >> 4)]); \
b_x ^= c; \
VARIANT##VAR##_1(b_x); \
\
Scratchpad[IDX((as_ulong(a.s01) & 0x1FFFF0) >> 4)] = b_x; \
\
uint4 tmp; \
tmp = Scratchpad[IDX((as_ulong(c.s01) & 0x1FFFF0) >> 4)]; \
\
a.s23 = as_uint2(as_ulong(a.s23) + as_ulong(c.s01) * as_ulong(tmp.s01)); \
a.s01 = as_uint2(as_ulong(a.s01) + mul_hi(as_ulong(c.s01), as_ulong(tmp.s01))); \
\
VARIANT##VAR##_2(a.s23); \
Scratchpad[IDX((as_ulong(c.s01) & 0x1FFFF0) >> 4)] = a; \
VARIANT##VAR##_2(Scratchpad[IDX((as_ulong(c.s01) & 0x1FFFF0) >> 4)]); \
VARIANT##VAR##_2(a.s23); \
\
a ^= tmp; \
\
Expand Down

0 comments on commit 0015387

Please sign in to comment.