diff --git a/OpenCL/inc_hash_whirlpool.cl b/OpenCL/inc_hash_whirlpool.cl index 2cd08dd91..e4735e917 100644 --- a/OpenCL/inc_hash_whirlpool.cl +++ b/OpenCL/inc_hash_whirlpool.cl @@ -560,7 +560,78 @@ CONSTANT_VK u64a RC[16] = // input buf needs to be in algorithm native byte order (md5 = LE, sha256 = BE, etc) // input buf needs to be 64 byte aligned when using whirlpool_update() -DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +#define F1(i,v,m) \ +{ \ + const u8 Lp0 = v8h_from_v64_S ((v)[((i) + 8) & 7]); \ + const u8 Lp1 = v8g_from_v64_S ((v)[((i) + 7) & 7]); \ + const u8 Lp2 = v8f_from_v64_S ((v)[((i) + 6) & 7]); \ + const u8 Lp3 = v8e_from_v64_S ((v)[((i) + 5) & 7]); \ + const u8 Lp4 = v8d_from_v64_S ((v)[((i) + 4) & 7]); \ + const u8 Lp5 = v8c_from_v64_S ((v)[((i) + 3) & 7]); \ + const u8 Lp6 = v8b_from_v64_S ((v)[((i) + 2) & 7]); \ + const u8 Lp7 = v8a_from_v64_S ((v)[((i) + 1) & 7]); \ + \ + const u64 X0 = BOX64_S ((m), 0, Lp0); \ + const u64 X1 = BOX64_S ((m), 1, Lp1); \ + const u64 X2 = BOX64_S ((m), 2, Lp2); \ + const u64 X3 = BOX64_S ((m), 3, Lp3); \ + const u64 X4 = BOX64_S ((m), 4, Lp4); \ + const u64 X5 = BOX64_S ((m), 5, Lp5); \ + const u64 X6 = BOX64_S ((m), 6, Lp6); \ + const u64 X7 = BOX64_S ((m), 7, Lp7); \ + \ + L[(i)] = X0 \ + ^ X1 \ + ^ X2 \ + ^ X3 \ + ^ X4 \ + ^ X5 \ + ^ X6 \ + ^ X7; \ +} + +#define F0(rc) \ +{ \ + u64 L[8]; \ + \ + F1 (0, K, s_MT); \ + F1 (1, K, s_MT); \ + F1 (2, K, s_MT); \ + F1 (3, K, s_MT); \ + F1 (4, K, s_MT); \ + F1 (5, K, s_MT); \ + F1 (6, K, s_MT); \ + F1 (7, K, s_MT); \ + \ + K[0] = L[0] ^ (rc); \ + K[1] = L[1]; \ + K[2] = L[2]; \ + K[3] = L[3]; \ + K[4] = L[4]; \ + K[5] = L[5]; \ + K[6] = L[6]; \ + K[7] = L[7]; \ + \ + F1 (0, state, s_MT); \ + F1 (1, state, s_MT); \ + F1 (2, state, s_MT); \ + F1 (3, state, s_MT); \ + F1 (4, state, s_MT); \ + F1 (5, state, s_MT); \ + F1 (6, state, s_MT); \ + F1 (7, state, s_MT); \ + \ + state[0] = L[0] ^ K[0]; \ + state[1] = L[1] ^ K[1]; \ + state[2] = L[2] ^ K[2]; \ + state[3] = L[3] ^ K[3]; \ + state[4] = L[4] ^ K[4]; \ + state[5] = L[5] ^ K[5]; \ + state[6] = L[6] ^ K[6]; \ + state[7] = L[7] ^ K[7]; \ +} + +DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u64 (*s_MT)[256]) { u64 D[8]; @@ -606,96 +677,16 @@ DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, state[6] = K[6] ^ W[6]; state[7] = K[7] ^ W[7]; - for (u32 r = 0; r < R; r++) - { - u64 L[8]; - - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 8; i++) - { - const u8 Lp0 = v8h_from_v64_S (K[(i + 8) & 7]); - const u8 Lp1 = v8g_from_v64_S (K[(i + 7) & 7]); - const u8 Lp2 = v8f_from_v64_S (K[(i + 6) & 7]); - const u8 Lp3 = v8e_from_v64_S (K[(i + 5) & 7]); - const u8 Lp4 = v8d_from_v64_S (K[(i + 4) & 7]); - const u8 Lp5 = v8c_from_v64_S (K[(i + 3) & 7]); - const u8 Lp6 = v8b_from_v64_S (K[(i + 2) & 7]); - const u8 Lp7 = v8a_from_v64_S (K[(i + 1) & 7]); - - const u64 X0 = BOX64_S (s_MT, 0, Lp0); - const u64 X1 = BOX64_S (s_MT, 1, Lp1); - const u64 X2 = BOX64_S (s_MT, 2, Lp2); - const u64 X3 = BOX64_S (s_MT, 3, Lp3); - const u64 X4 = BOX64_S (s_MT, 4, Lp4); - const u64 X5 = BOX64_S (s_MT, 5, Lp5); - const u64 X6 = BOX64_S (s_MT, 6, Lp6); - const u64 X7 = BOX64_S (s_MT, 7, Lp7); - - L[i] = X0 - ^ X1 - ^ X2 - ^ X3 - ^ X4 - ^ X5 - ^ X6 - ^ X7; - } - - const u64 rc = s_RC[r]; - - K[0] = L[0] ^ rc; - K[1] = L[1]; - K[2] = L[2]; - K[3] = L[3]; - K[4] = L[4]; - K[5] = L[5]; - K[6] = L[6]; - K[7] = L[7]; - - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 8; i++) - { - const u8 Lp0 = v8h_from_v64_S (state[(i + 8) & 7]); - const u8 Lp1 = v8g_from_v64_S (state[(i + 7) & 7]); - const u8 Lp2 = v8f_from_v64_S (state[(i + 6) & 7]); - const u8 Lp3 = v8e_from_v64_S (state[(i + 5) & 7]); - const u8 Lp4 = v8d_from_v64_S (state[(i + 4) & 7]); - const u8 Lp5 = v8c_from_v64_S (state[(i + 3) & 7]); - const u8 Lp6 = v8b_from_v64_S (state[(i + 2) & 7]); - const u8 Lp7 = v8a_from_v64_S (state[(i + 1) & 7]); - - const u64 X0 = BOX64_S (s_MT, 0, Lp0); - const u64 X1 = BOX64_S (s_MT, 1, Lp1); - const u64 X2 = BOX64_S (s_MT, 2, Lp2); - const u64 X3 = BOX64_S (s_MT, 3, Lp3); - const u64 X4 = BOX64_S (s_MT, 4, Lp4); - const u64 X5 = BOX64_S (s_MT, 5, Lp5); - const u64 X6 = BOX64_S (s_MT, 6, Lp6); - const u64 X7 = BOX64_S (s_MT, 7, Lp7); - - L[i] = X0 - ^ X1 - ^ X2 - ^ X3 - ^ X4 - ^ X5 - ^ X6 - ^ X7; - } - - state[0] = L[0] ^ K[0]; - state[1] = L[1] ^ K[1]; - state[2] = L[2] ^ K[2]; - state[3] = L[3] ^ K[3]; - state[4] = L[4] ^ K[4]; - state[5] = L[5] ^ K[5]; - state[6] = L[6] ^ K[6]; - state[7] = L[7] ^ K[7]; - } + F0 (RC[0]); + F0 (RC[1]); + F0 (RC[2]); + F0 (RC[3]); + F0 (RC[4]); + F0 (RC[5]); + F0 (RC[6]); + F0 (RC[7]); + F0 (RC[8]); + F0 (RC[9]); W[0] ^= D[0] ^ state[0]; W[1] ^= D[1] ^ state[1]; @@ -724,7 +715,7 @@ DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, digest[15] = l32_from_64_S (W[7]); } -DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 (*s_MT)[256]) { ctx->h[ 0] = 0; ctx->h[ 1] = 0; @@ -763,7 +754,6 @@ DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 (*s_MT)[256], S ctx->len = 0; ctx->s_MT = s_MT; - ctx->s_RC = s_RC; } DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) @@ -793,7 +783,7 @@ DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 * if (len == 64) { - whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); ctx->w0[0] = 0; ctx->w0[1] = 0; @@ -862,7 +852,7 @@ DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 * ctx->w3[2] |= w3[2]; ctx->w3[3] |= w3[3]; - whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); ctx->w0[0] = c0[0]; ctx->w0[1] = c0[1]; @@ -1404,7 +1394,7 @@ DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) if (pos >= 32) { - whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); ctx->w0[0] = 0; ctx->w0[1] = 0; @@ -1427,12 +1417,12 @@ DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx) ctx->w3[2] = 0; ctx->w3[3] = ctx->len * 8; - whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); } // whirlpool_hmac -DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64 (*s_MT)[256]) { u32 t0[4]; u32 t1[4]; @@ -1458,7 +1448,7 @@ DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, t3[2] = w3[2] ^ 0x36363636; t3[3] = w3[3] ^ 0x36363636; - whirlpool_init (&ctx->ipad, s_MT, s_RC); + whirlpool_init (&ctx->ipad, s_MT); whirlpool_update_64 (&ctx->ipad, t0, t1, t2, t3, 64); @@ -1481,12 +1471,12 @@ DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, t3[2] = w3[2] ^ 0x5c5c5c5c; t3[3] = w3[3] ^ 0x5c5c5c5c; - whirlpool_init (&ctx->opad, s_MT, s_RC); + whirlpool_init (&ctx->opad, s_MT); whirlpool_update_64 (&ctx->opad, t0, t1, t2, t3, 64); } -DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]) { u32 w0[4]; u32 w1[4]; @@ -1497,7 +1487,7 @@ DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, cons { whirlpool_ctx_t tmp; - whirlpool_init (&tmp, s_MT, s_RC); + whirlpool_init (&tmp, s_MT); whirlpool_update (&tmp, w, len); @@ -1540,10 +1530,10 @@ DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, cons w3[3] = w[15]; } - whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT); } -DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]) { u32 w0[4]; u32 w1[4]; @@ -1554,7 +1544,7 @@ DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, { whirlpool_ctx_t tmp; - whirlpool_init (&tmp, s_MT, s_RC); + whirlpool_init (&tmp, s_MT); whirlpool_update_swap (&tmp, w, len); @@ -1597,10 +1587,10 @@ DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, w3[3] = hc_swap32_S (w[15]); } - whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT); } -DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]) { u32 w0[4]; u32 w1[4]; @@ -1611,7 +1601,7 @@ DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS c { whirlpool_ctx_t tmp; - whirlpool_init (&tmp, s_MT, s_RC); + whirlpool_init (&tmp, s_MT); whirlpool_update_global (&tmp, w, len); @@ -1654,10 +1644,10 @@ DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS c w3[3] = w[15]; } - whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT); } -DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]) { u32 w0[4]; u32 w1[4]; @@ -1668,7 +1658,7 @@ DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL { whirlpool_ctx_t tmp; - whirlpool_init (&tmp, s_MT, s_RC); + whirlpool_init (&tmp, s_MT); whirlpool_update_global_swap (&tmp, w, len); @@ -1711,7 +1701,7 @@ DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL w3[3] = hc_swap32_S (w[15]); } - whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (ctx, w0, w1, w2, w3, s_MT); } DECLSPEC void whirlpool_hmac_update_64 (whirlpool_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len) @@ -1782,7 +1772,7 @@ DECLSPEC void whirlpool_hmac_final (whirlpool_hmac_ctx_t *ctx) ctx->opad.len += 64; - whirlpool_transform (ctx->opad.w0, ctx->opad.w1, ctx->opad.w2, ctx->opad.w3, ctx->opad.h, ctx->opad.s_MT, ctx->opad.s_RC); + whirlpool_transform (ctx->opad.w0, ctx->opad.w1, ctx->opad.w2, ctx->opad.w3, ctx->opad.h, ctx->opad.s_MT); ctx->opad.w0[0] = 0; ctx->opad.w0[1] = 0; @@ -1806,7 +1796,78 @@ DECLSPEC void whirlpool_hmac_final (whirlpool_hmac_ctx_t *ctx) // while input buf can be a vector datatype, the length of the different elements can not -DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +#define F1x(i,v,m) \ +{ \ + const u8x Lp0 = v8h_from_v64 ((v)[((i) + 8) & 7]); \ + const u8x Lp1 = v8g_from_v64 ((v)[((i) + 7) & 7]); \ + const u8x Lp2 = v8f_from_v64 ((v)[((i) + 6) & 7]); \ + const u8x Lp3 = v8e_from_v64 ((v)[((i) + 5) & 7]); \ + const u8x Lp4 = v8d_from_v64 ((v)[((i) + 4) & 7]); \ + const u8x Lp5 = v8c_from_v64 ((v)[((i) + 3) & 7]); \ + const u8x Lp6 = v8b_from_v64 ((v)[((i) + 2) & 7]); \ + const u8x Lp7 = v8a_from_v64 ((v)[((i) + 1) & 7]); \ + \ + const u64x X0 = BOX64 ((m), 0, Lp0); \ + const u64x X1 = BOX64 ((m), 1, Lp1); \ + const u64x X2 = BOX64 ((m), 2, Lp2); \ + const u64x X3 = BOX64 ((m), 3, Lp3); \ + const u64x X4 = BOX64 ((m), 4, Lp4); \ + const u64x X5 = BOX64 ((m), 5, Lp5); \ + const u64x X6 = BOX64 ((m), 6, Lp6); \ + const u64x X7 = BOX64 ((m), 7, Lp7); \ + \ + L[(i)] = X0 \ + ^ X1 \ + ^ X2 \ + ^ X3 \ + ^ X4 \ + ^ X5 \ + ^ X6 \ + ^ X7; \ +} + +#define F0x(rc) \ +{ \ + u64x L[8]; \ + \ + F1x (0, K, s_MT); \ + F1x (1, K, s_MT); \ + F1x (2, K, s_MT); \ + F1x (3, K, s_MT); \ + F1x (4, K, s_MT); \ + F1x (5, K, s_MT); \ + F1x (6, K, s_MT); \ + F1x (7, K, s_MT); \ + \ + K[0] = L[0] ^ (rc); \ + K[1] = L[1]; \ + K[2] = L[2]; \ + K[3] = L[3]; \ + K[4] = L[4]; \ + K[5] = L[5]; \ + K[6] = L[6]; \ + K[7] = L[7]; \ + \ + F1x (0, state, s_MT); \ + F1x (1, state, s_MT); \ + F1x (2, state, s_MT); \ + F1x (3, state, s_MT); \ + F1x (4, state, s_MT); \ + F1x (5, state, s_MT); \ + F1x (6, state, s_MT); \ + F1x (7, state, s_MT); \ + \ + state[0] = L[0] ^ K[0]; \ + state[1] = L[1] ^ K[1]; \ + state[2] = L[2] ^ K[2]; \ + state[3] = L[3] ^ K[3]; \ + state[4] = L[4] ^ K[4]; \ + state[5] = L[5] ^ K[5]; \ + state[6] = L[6] ^ K[6]; \ + state[7] = L[7] ^ K[7]; \ +} + +DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { u64x D[8]; @@ -1852,96 +1913,16 @@ DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const state[6] = K[6] ^ W[6]; state[7] = K[7] ^ W[7]; - for (u32 r = 0; r < R; r++) - { - u64x L[8]; - - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 8; i++) - { - const u8x Lp0 = v8h_from_v64 (K[(i + 8) & 7]); - const u8x Lp1 = v8g_from_v64 (K[(i + 7) & 7]); - const u8x Lp2 = v8f_from_v64 (K[(i + 6) & 7]); - const u8x Lp3 = v8e_from_v64 (K[(i + 5) & 7]); - const u8x Lp4 = v8d_from_v64 (K[(i + 4) & 7]); - const u8x Lp5 = v8c_from_v64 (K[(i + 3) & 7]); - const u8x Lp6 = v8b_from_v64 (K[(i + 2) & 7]); - const u8x Lp7 = v8a_from_v64 (K[(i + 1) & 7]); - - const u64x X0 = BOX64 (s_MT, 0, Lp0); - const u64x X1 = BOX64 (s_MT, 1, Lp1); - const u64x X2 = BOX64 (s_MT, 2, Lp2); - const u64x X3 = BOX64 (s_MT, 3, Lp3); - const u64x X4 = BOX64 (s_MT, 4, Lp4); - const u64x X5 = BOX64 (s_MT, 5, Lp5); - const u64x X6 = BOX64 (s_MT, 6, Lp6); - const u64x X7 = BOX64 (s_MT, 7, Lp7); - - L[i] = X0 - ^ X1 - ^ X2 - ^ X3 - ^ X4 - ^ X5 - ^ X6 - ^ X7; - } - - const u64 rc = s_RC[r]; - - K[0] = L[0] ^ rc; - K[1] = L[1]; - K[2] = L[2]; - K[3] = L[3]; - K[4] = L[4]; - K[5] = L[5]; - K[6] = L[6]; - K[7] = L[7]; - - #ifdef _unroll - #pragma unroll - #endif - for (int i = 0; i < 8; i++) - { - const u8x Lp0 = v8h_from_v64 (state[(i + 8) & 7]); - const u8x Lp1 = v8g_from_v64 (state[(i + 7) & 7]); - const u8x Lp2 = v8f_from_v64 (state[(i + 6) & 7]); - const u8x Lp3 = v8e_from_v64 (state[(i + 5) & 7]); - const u8x Lp4 = v8d_from_v64 (state[(i + 4) & 7]); - const u8x Lp5 = v8c_from_v64 (state[(i + 3) & 7]); - const u8x Lp6 = v8b_from_v64 (state[(i + 2) & 7]); - const u8x Lp7 = v8a_from_v64 (state[(i + 1) & 7]); - - const u64x X0 = BOX64 (s_MT, 0, Lp0); - const u64x X1 = BOX64 (s_MT, 1, Lp1); - const u64x X2 = BOX64 (s_MT, 2, Lp2); - const u64x X3 = BOX64 (s_MT, 3, Lp3); - const u64x X4 = BOX64 (s_MT, 4, Lp4); - const u64x X5 = BOX64 (s_MT, 5, Lp5); - const u64x X6 = BOX64 (s_MT, 6, Lp6); - const u64x X7 = BOX64 (s_MT, 7, Lp7); - - L[i] = X0 - ^ X1 - ^ X2 - ^ X3 - ^ X4 - ^ X5 - ^ X6 - ^ X7; - } - - state[0] = L[0] ^ K[0]; - state[1] = L[1] ^ K[1]; - state[2] = L[2] ^ K[2]; - state[3] = L[3] ^ K[3]; - state[4] = L[4] ^ K[4]; - state[5] = L[5] ^ K[5]; - state[6] = L[6] ^ K[6]; - state[7] = L[7] ^ K[7]; - } + F0x (RC[0]); + F0x (RC[1]); + F0x (RC[2]); + F0x (RC[3]); + F0x (RC[4]); + F0x (RC[5]); + F0x (RC[6]); + F0x (RC[7]); + F0x (RC[8]); + F0x (RC[9]); W[0] ^= D[0] ^ state[0]; W[1] ^= D[1] ^ state[1]; @@ -1970,7 +1951,7 @@ DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const digest[15] = l32_from_64 (W[7]); } -DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 (*s_MT)[256]) { ctx->h[ 0] = 0; ctx->h[ 1] = 0; @@ -2009,7 +1990,6 @@ DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 ( ctx->len = 0; ctx->s_MT = s_MT; - ctx->s_RC = s_RC; } DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, whirlpool_ctx_t *ctx0) @@ -2051,7 +2031,6 @@ DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, wh ctx->len = ctx0->len; ctx->s_MT = ctx0->s_MT; - ctx->s_RC = ctx0->s_RC; } DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) @@ -2081,7 +2060,7 @@ DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, if (len == 64) { - whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); ctx->w0[0] = 0; ctx->w0[1] = 0; @@ -2150,7 +2129,7 @@ DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, ctx->w3[2] |= w3[2]; ctx->w3[3] |= w3[3]; - whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); ctx->w0[0] = c0[0]; ctx->w0[1] = c0[1]; @@ -2436,7 +2415,7 @@ DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx) if (pos >= 32) { - whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); ctx->w0[0] = 0; ctx->w0[1] = 0; @@ -2459,12 +2438,12 @@ DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx) ctx->w3[2] = 0; ctx->w3[3] = ctx->len * 8; - whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT, ctx->s_RC); + whirlpool_transform_vector (ctx->w0, ctx->w1, ctx->w2, ctx->w3, ctx->h, ctx->s_MT); } // HMAC + Vector -DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64 (*s_MT)[256]) { u32x t0[4]; u32x t1[4]; @@ -2490,7 +2469,7 @@ DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, c t3[2] = w3[2] ^ 0x36363636; t3[3] = w3[3] ^ 0x36363636; - whirlpool_init_vector (&ctx->ipad, s_MT, s_RC); + whirlpool_init_vector (&ctx->ipad, s_MT); whirlpool_update_vector_64 (&ctx->ipad, t0, t1, t2, t3, 64); @@ -2513,12 +2492,12 @@ DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, c t3[2] = w3[2] ^ 0x5c5c5c5c; t3[3] = w3[3] ^ 0x5c5c5c5c; - whirlpool_init_vector (&ctx->opad, s_MT, s_RC); + whirlpool_init_vector (&ctx->opad, s_MT); whirlpool_update_vector_64 (&ctx->opad, t0, t1, t2, t3, 64); } -DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64 (*s_MT)[256]) { u32x w0[4]; u32x w1[4]; @@ -2529,7 +2508,7 @@ DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, cons { whirlpool_ctx_vector_t tmp; - whirlpool_init_vector (&tmp, s_MT, s_RC); + whirlpool_init_vector (&tmp, s_MT); whirlpool_update_vector (&tmp, w, len); @@ -2572,7 +2551,7 @@ DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, cons w3[3] = w[15]; } - whirlpool_hmac_init_vector_64 (ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_vector_64 (ctx, w0, w1, w2, w3, s_MT); } DECLSPEC void whirlpool_hmac_update_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len) @@ -2608,7 +2587,7 @@ DECLSPEC void whirlpool_hmac_final_vector (whirlpool_hmac_ctx_vector_t *ctx) ctx->opad.len += 64; - whirlpool_transform_vector (ctx->opad.w0, ctx->opad.w1, ctx->opad.w2, ctx->opad.w3, ctx->opad.h, ctx->opad.s_MT, ctx->opad.s_RC); + whirlpool_transform_vector (ctx->opad.w0, ctx->opad.w1, ctx->opad.w2, ctx->opad.w3, ctx->opad.h, ctx->opad.s_MT); ctx->opad.w0[0] = 0; ctx->opad.w0[1] = 0; @@ -2633,3 +2612,7 @@ DECLSPEC void whirlpool_hmac_final_vector (whirlpool_hmac_ctx_vector_t *ctx) #undef R #undef BOX #undef BOX_S +#undef F0 +#undef F0x +#undef F1 +#undef F1x diff --git a/OpenCL/inc_hash_whirlpool.h b/OpenCL/inc_hash_whirlpool.h index f7c762da5..2375725a9 100644 --- a/OpenCL/inc_hash_whirlpool.h +++ b/OpenCL/inc_hash_whirlpool.h @@ -40,7 +40,6 @@ typedef struct whirlpool_ctx int len; SHM_TYPE u64 (*s_MT)[256]; - SHM_TYPE u64 *s_RC; } whirlpool_ctx_t; @@ -63,7 +62,6 @@ typedef struct whirlpool_ctx_vector int len; SHM_TYPE u64 (*s_MT)[256]; - SHM_TYPE u64 *s_RC; } whirlpool_ctx_vector_t; @@ -74,8 +72,8 @@ typedef struct whirlpool_hmac_ctx_vector } whirlpool_hmac_ctx_vector_t; -DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); +DECLSPEC void whirlpool_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *digest, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_init (whirlpool_ctx_t *ctx, SHM_TYPE u64 (*s_MT)[256]); DECLSPEC void whirlpool_update_64 (whirlpool_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void whirlpool_update (whirlpool_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void whirlpool_update_swap (whirlpool_ctx_t *ctx, const u32 *w, const int len); @@ -86,11 +84,11 @@ DECLSPEC void whirlpool_update_global_swap (whirlpool_ctx_t *ctx, GLOBAL_AS cons DECLSPEC void whirlpool_update_global_utf16le (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void whirlpool_update_global_utf16le_swap (whirlpool_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void whirlpool_final (whirlpool_ctx_t *ctx); -DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); +DECLSPEC void whirlpool_hmac_init_64 (whirlpool_hmac_ctx_t *ctx, const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_hmac_init (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_hmac_init_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_hmac_init_global (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_hmac_init_global_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len, SHM_TYPE u64 (*s_MT)[256]); DECLSPEC void whirlpool_hmac_update_64 (whirlpool_hmac_ctx_t *ctx, u32 *w0, u32 *w1, u32 *w2, u32 *w3, const int len); DECLSPEC void whirlpool_hmac_update (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len); DECLSPEC void whirlpool_hmac_update_swap (whirlpool_hmac_ctx_t *ctx, const u32 *w, const int len); @@ -101,8 +99,8 @@ DECLSPEC void whirlpool_hmac_update_global_swap (whirlpool_hmac_ctx_t *ctx, GLOB DECLSPEC void whirlpool_hmac_update_global_utf16le (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void whirlpool_hmac_update_global_utf16le_swap (whirlpool_hmac_ctx_t *ctx, GLOBAL_AS const u32 *w, const int len); DECLSPEC void whirlpool_hmac_final (whirlpool_hmac_ctx_t *ctx); -DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); +DECLSPEC void whirlpool_transform_vector (const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, u32x *digest, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_init_vector (whirlpool_ctx_vector_t *ctx, SHM_TYPE u64 (*s_MT)[256]); DECLSPEC void whirlpool_init_vector_from_scalar (whirlpool_ctx_vector_t *ctx, whirlpool_ctx_t *ctx0); DECLSPEC void whirlpool_update_vector_64 (whirlpool_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len); DECLSPEC void whirlpool_update_vector (whirlpool_ctx_vector_t *ctx, const u32x *w, const int len); @@ -110,8 +108,8 @@ DECLSPEC void whirlpool_update_vector_swap (whirlpool_ctx_vector_t *ctx, const u DECLSPEC void whirlpool_update_vector_utf16le (whirlpool_ctx_vector_t *ctx, const u32x *w, const int len); DECLSPEC void whirlpool_update_vector_utf16le_swap (whirlpool_ctx_vector_t *ctx, const u32x *w, const int len); DECLSPEC void whirlpool_final_vector (whirlpool_ctx_vector_t *ctx); -DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); -DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC); +DECLSPEC void whirlpool_hmac_init_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w0, const u32x *w1, const u32x *w2, const u32x *w3, SHM_TYPE u64 (*s_MT)[256]); +DECLSPEC void whirlpool_hmac_init_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len, SHM_TYPE u64 (*s_MT)[256]); DECLSPEC void whirlpool_hmac_update_vector_64 (whirlpool_hmac_ctx_vector_t *ctx, u32x *w0, u32x *w1, u32x *w2, u32x *w3, const int len); DECLSPEC void whirlpool_hmac_update_vector (whirlpool_hmac_ctx_vector_t *ctx, const u32x *w, const int len); DECLSPEC void whirlpool_hmac_final_vector (whirlpool_hmac_ctx_vector_t *ctx); diff --git a/OpenCL/m06100_a0-optimized.cl b/OpenCL/m06100_a0-optimized.cl index 373875eed..88cfd2a7d 100644 --- a/OpenCL/m06100_a0-optimized.cl +++ b/OpenCL/m06100_a0-optimized.cl @@ -16,9 +16,9 @@ #include "inc_hash_whirlpool.cl" #endif -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { - whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT, s_RC); + whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT); } KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ()) @@ -38,7 +38,6 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -52,17 +51,11 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -143,7 +136,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_RULES ()) dgst[14] = 0; dgst[15] = 0; - whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC); + whirlpool_transform_transport_vector (w, dgst, s_MT); COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]); } @@ -174,7 +167,6 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -188,17 +180,11 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -291,7 +277,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_RULES ()) dgst[14] = 0; dgst[15] = 0; - whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC); + whirlpool_transform_transport_vector (w, dgst, s_MT); COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]); } diff --git a/OpenCL/m06100_a0-pure.cl b/OpenCL/m06100_a0-pure.cl index 7c7e89c65..769ba26e7 100644 --- a/OpenCL/m06100_a0-pure.cl +++ b/OpenCL/m06100_a0-pure.cl @@ -33,7 +33,6 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -47,17 +46,11 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -81,7 +74,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_RULES ()) whirlpool_ctx_t ctx; - whirlpool_init (&ctx, s_MT, s_RC); + whirlpool_init (&ctx, s_MT); whirlpool_update_swap (&ctx, tmp.i, tmp.pw_len); @@ -113,7 +106,6 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -127,17 +119,11 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -173,7 +159,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_RULES ()) whirlpool_ctx_t ctx; - whirlpool_init (&ctx, s_MT, s_RC); + whirlpool_init (&ctx, s_MT); whirlpool_update_swap (&ctx, tmp.i, tmp.pw_len); diff --git a/OpenCL/m06100_a1-optimized.cl b/OpenCL/m06100_a1-optimized.cl index a91f4b0c8..87d62f4ff 100644 --- a/OpenCL/m06100_a1-optimized.cl +++ b/OpenCL/m06100_a1-optimized.cl @@ -14,9 +14,9 @@ #include "inc_hash_whirlpool.cl" #endif -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { - whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT, s_RC); + whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT); } KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) @@ -36,7 +36,6 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -50,17 +49,11 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -199,7 +192,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) dgst[14] = 0; dgst[15] = 0; - whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC); + whirlpool_transform_transport_vector (w, dgst, s_MT); COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]); } @@ -230,7 +223,6 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -244,17 +236,11 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -405,7 +391,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) dgst[14] = 0; dgst[15] = 0; - whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC); + whirlpool_transform_transport_vector (w, dgst, s_MT); COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]); } diff --git a/OpenCL/m06100_a1-pure.cl b/OpenCL/m06100_a1-pure.cl index 9ea4f2102..7d89131a6 100644 --- a/OpenCL/m06100_a1-pure.cl +++ b/OpenCL/m06100_a1-pure.cl @@ -31,7 +31,6 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -45,17 +44,11 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -67,7 +60,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_BASIC ()) whirlpool_ctx_t ctx0; - whirlpool_init (&ctx0, s_MT, s_RC); + whirlpool_init (&ctx0, s_MT); whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); @@ -109,7 +102,6 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -123,17 +115,11 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -157,7 +143,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_BASIC ()) whirlpool_ctx_t ctx0; - whirlpool_init (&ctx0, s_MT, s_RC); + whirlpool_init (&ctx0, s_MT); whirlpool_update_global_swap (&ctx0, pws[gid].i, pws[gid].pw_len); diff --git a/OpenCL/m06100_a3-optimized.cl b/OpenCL/m06100_a3-optimized.cl index 944f3cbbd..02f700eab 100644 --- a/OpenCL/m06100_a3-optimized.cl +++ b/OpenCL/m06100_a3-optimized.cl @@ -14,12 +14,12 @@ #include "inc_hash_whirlpool.cl" #endif -DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void whirlpool_transform_transport_vector (const u32x *w, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { - whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT, s_RC); + whirlpool_transform_vector (w + 0, w + 4, w + 8, w + 12, digest, s_MT); } -DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u64 (*s_MT)[256]) { /** * modifier @@ -82,13 +82,13 @@ DECLSPEC void m06100m (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER dgst[14] = 0; dgst[15] = 0; - whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC); + whirlpool_transform_transport_vector (w, dgst, s_MT); COMPARE_M_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]); } } -DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KERN_ATTR_BASIC (), SHM_TYPE u64 (*s_MT)[256]) { /** * modifier @@ -163,7 +163,7 @@ DECLSPEC void m06100s (u32 *w0, u32 *w1, u32 *w2, u32 *w3, const u32 pw_len, KER dgst[14] = 0; dgst[15] = 0; - whirlpool_transform_transport_vector (w, dgst, s_MT, s_RC); + whirlpool_transform_transport_vector (w, dgst, s_MT); COMPARE_S_SIMD (dgst[0], dgst[1], dgst[2], dgst[3]); } @@ -186,7 +186,6 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -200,17 +199,11 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -254,7 +247,7 @@ KERNEL_FQ void m06100_m04 (KERN_ATTR_BASIC ()) * main */ - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC); + m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT); } KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) @@ -274,7 +267,6 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -288,17 +280,11 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -342,7 +328,7 @@ KERNEL_FQ void m06100_m08 (KERN_ATTR_BASIC ()) * main */ - m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC); + m06100m (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT); } KERNEL_FQ void m06100_m16 (KERN_ATTR_BASIC ()) @@ -366,7 +352,6 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -380,17 +365,11 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -434,7 +413,7 @@ KERNEL_FQ void m06100_s04 (KERN_ATTR_BASIC ()) * main */ - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC); + m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT); } KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) @@ -454,7 +433,6 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -468,17 +446,11 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -522,7 +494,7 @@ KERNEL_FQ void m06100_s08 (KERN_ATTR_BASIC ()) * main */ - m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT, s_RC); + m06100s (w0, w1, w2, w3, pw_len, pws, rules_buf, combs_buf, bfs_buf, tmps, hooks, bitmaps_buf_s1_a, bitmaps_buf_s1_b, bitmaps_buf_s1_c, bitmaps_buf_s1_d, bitmaps_buf_s2_a, bitmaps_buf_s2_b, bitmaps_buf_s2_c, bitmaps_buf_s2_d, plains_buf, digests_buf, hashes_shown, salt_bufs, esalt_bufs, d_return_buf, d_extra0_buf, d_extra1_buf, d_extra2_buf, d_extra3_buf, bitmap_mask, bitmap_shift1, bitmap_shift2, salt_pos, loop_pos, loop_cnt, il_cnt, digests_cnt, digests_offset, combs_mode, gid_max, s_MT); } KERNEL_FQ void m06100_s16 (KERN_ATTR_BASIC ()) diff --git a/OpenCL/m06100_a3-pure.cl b/OpenCL/m06100_a3-pure.cl index 65009e433..83ee08949 100644 --- a/OpenCL/m06100_a3-pure.cl +++ b/OpenCL/m06100_a3-pure.cl @@ -31,7 +31,6 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -45,17 +44,11 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -90,7 +83,7 @@ KERNEL_FQ void m06100_mxx (KERN_ATTR_VECTOR ()) whirlpool_ctx_vector_t ctx; - whirlpool_init_vector (&ctx, s_MT, s_RC); + whirlpool_init_vector (&ctx, s_MT); whirlpool_update_vector (&ctx, w, pw_len); @@ -122,7 +115,6 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ()) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -136,17 +128,11 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ()) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -193,7 +179,7 @@ KERNEL_FQ void m06100_sxx (KERN_ATTR_VECTOR ()) whirlpool_ctx_vector_t ctx; - whirlpool_init_vector (&ctx, s_MT, s_RC); + whirlpool_init_vector (&ctx, s_MT); whirlpool_update_vector (&ctx, w, pw_len); diff --git a/OpenCL/m06231-pure.cl b/OpenCL/m06231-pure.cl index 1310c92f4..ab0c0b99c 100644 --- a/OpenCL/m06231-pure.cl +++ b/OpenCL/m06231-pure.cl @@ -45,7 +45,7 @@ typedef struct tc_tmp } tc_tmp_t; -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -64,7 +64,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = ipad[14]; digest[15] = ipad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -83,7 +83,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = digest[ 0]; w0[1] = digest[ 1]; @@ -119,7 +119,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = opad[14]; digest[15] = opad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -138,7 +138,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); } KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) @@ -169,7 +169,6 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -183,17 +182,11 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -265,7 +258,7 @@ KERNEL_FQ void m06231_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -377,7 +370,6 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -391,17 +383,11 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -507,7 +493,7 @@ KERNEL_FQ void m06231_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) w3[2] = dgst[14]; w3[3] = dgst[15]; - hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC); + hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT); out[ 0] ^= dgst[ 0]; out[ 1] ^= dgst[ 1]; @@ -620,41 +606,6 @@ KERNEL_FQ void m06231_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #endif - /** - * Whirlpool shared - */ - - #ifdef REAL_SHM - - LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; - - for (u32 i = lid; i < 256; i += lsz) - { - s_MT[0][i] = MT[0][i]; - s_MT[1][i] = MT[1][i]; - s_MT[2][i] = MT[2][i]; - s_MT[3][i] = MT[3][i]; - s_MT[4][i] = MT[4][i]; - s_MT[5][i] = MT[5][i]; - s_MT[6][i] = MT[6][i]; - s_MT[7][i] = MT[7][i]; - } - - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - - SYNC_THREADS (); - - #else - - CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; - - #endif - if (gid >= gid_max) return; u32 ukey1[8]; diff --git a/OpenCL/m06232-pure.cl b/OpenCL/m06232-pure.cl index 2af88c987..883fa5cf0 100644 --- a/OpenCL/m06232-pure.cl +++ b/OpenCL/m06232-pure.cl @@ -45,7 +45,7 @@ typedef struct tc_tmp } tc_tmp_t; -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -64,7 +64,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = ipad[14]; digest[15] = ipad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -83,7 +83,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = digest[ 0]; w0[1] = digest[ 1]; @@ -119,7 +119,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = opad[14]; digest[15] = opad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -138,7 +138,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); } KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) @@ -169,7 +169,6 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -183,17 +182,11 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -265,7 +258,7 @@ KERNEL_FQ void m06232_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -377,7 +370,6 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -391,17 +383,11 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -507,7 +493,7 @@ KERNEL_FQ void m06232_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) w3[2] = dgst[14]; w3[3] = dgst[15]; - hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC); + hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT); out[ 0] ^= dgst[ 0]; out[ 1] ^= dgst[ 1]; @@ -620,41 +606,6 @@ KERNEL_FQ void m06232_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #endif - /** - * Whirlpool shared - */ - - #ifdef REAL_SHM - - LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; - - for (u32 i = lid; i < 256; i += lsz) - { - s_MT[0][i] = MT[0][i]; - s_MT[1][i] = MT[1][i]; - s_MT[2][i] = MT[2][i]; - s_MT[3][i] = MT[3][i]; - s_MT[4][i] = MT[4][i]; - s_MT[5][i] = MT[5][i]; - s_MT[6][i] = MT[6][i]; - s_MT[7][i] = MT[7][i]; - } - - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - - SYNC_THREADS (); - - #else - - CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; - - #endif - if (gid >= gid_max) return; u32 ukey1[8]; diff --git a/OpenCL/m06233-pure.cl b/OpenCL/m06233-pure.cl index c6d2b1c6d..c95944450 100644 --- a/OpenCL/m06233-pure.cl +++ b/OpenCL/m06233-pure.cl @@ -45,7 +45,7 @@ typedef struct tc_tmp } tc_tmp_t; -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -64,7 +64,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = ipad[14]; digest[15] = ipad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -83,7 +83,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = digest[ 0]; w0[1] = digest[ 1]; @@ -119,7 +119,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = opad[14]; digest[15] = opad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -138,7 +138,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); } KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) @@ -169,7 +169,6 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -183,17 +182,11 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -265,7 +258,7 @@ KERNEL_FQ void m06233_init (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -377,7 +370,6 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -391,17 +383,11 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -507,7 +493,7 @@ KERNEL_FQ void m06233_loop (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) w3[2] = dgst[14]; w3[3] = dgst[15]; - hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC); + hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT); out[ 0] ^= dgst[ 0]; out[ 1] ^= dgst[ 1]; @@ -620,41 +606,6 @@ KERNEL_FQ void m06233_comp (KERN_ATTR_TMPS_ESALT (tc_tmp_t, tc_t)) #endif - /** - * Whirlpool shared - */ - - #ifdef REAL_SHM - - LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; - - for (u32 i = lid; i < 256; i += lsz) - { - s_MT[0][i] = MT[0][i]; - s_MT[1][i] = MT[1][i]; - s_MT[2][i] = MT[2][i]; - s_MT[3][i] = MT[3][i]; - s_MT[4][i] = MT[4][i]; - s_MT[5][i] = MT[5][i]; - s_MT[6][i] = MT[6][i]; - s_MT[7][i] = MT[7][i]; - } - - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - - SYNC_THREADS (); - - #else - - CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; - - #endif - if (gid >= gid_max) return; u32 ukey1[8]; diff --git a/OpenCL/m13731-pure.cl b/OpenCL/m13731-pure.cl index 2f685c700..971e72cb3 100644 --- a/OpenCL/m13731-pure.cl +++ b/OpenCL/m13731-pure.cl @@ -86,7 +86,7 @@ DECLSPEC int check_header_0512 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32 return -1; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -105,7 +105,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = ipad[14]; digest[15] = ipad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -124,7 +124,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = digest[ 0]; w0[1] = digest[ 1]; @@ -160,7 +160,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = opad[14]; digest[15] = opad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -179,7 +179,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); } KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) @@ -210,7 +210,6 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -224,17 +223,11 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -306,7 +299,7 @@ KERNEL_FQ void m13731_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -469,7 +462,6 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -483,17 +475,11 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -630,7 +616,7 @@ KERNEL_FQ void m13731_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) w3[2] = dgst[14]; w3[3] = dgst[15]; - hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC); + hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT); out[ 0] ^= dgst[ 0]; out[ 1] ^= dgst[ 1]; @@ -776,7 +762,6 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -790,17 +775,11 @@ KERNEL_FQ void m13731_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif diff --git a/OpenCL/m13732-pure.cl b/OpenCL/m13732-pure.cl index 786b98594..68b4f5283 100644 --- a/OpenCL/m13732-pure.cl +++ b/OpenCL/m13732-pure.cl @@ -137,7 +137,7 @@ DECLSPEC int check_header_1024 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32 return -1; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -156,7 +156,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = ipad[14]; digest[15] = ipad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -175,7 +175,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = digest[ 0]; w0[1] = digest[ 1]; @@ -211,7 +211,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = opad[14]; digest[15] = opad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -230,7 +230,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); } KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) @@ -261,7 +261,6 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -275,17 +274,11 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -357,7 +350,7 @@ KERNEL_FQ void m13732_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -520,7 +513,6 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -534,17 +526,11 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -681,7 +667,7 @@ KERNEL_FQ void m13732_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) w3[2] = dgst[14]; w3[3] = dgst[15]; - hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC); + hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT); out[ 0] ^= dgst[ 0]; out[ 1] ^= dgst[ 1]; @@ -828,7 +814,6 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -842,17 +827,11 @@ KERNEL_FQ void m13732_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif diff --git a/OpenCL/m13733-pure.cl b/OpenCL/m13733-pure.cl index 77c1b0131..1136733bc 100644 --- a/OpenCL/m13733-pure.cl +++ b/OpenCL/m13733-pure.cl @@ -202,7 +202,7 @@ DECLSPEC int check_header_1536 (GLOBAL_AS const vc_t *esalt_bufs, GLOBAL_AS u32 return -1; } -DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256], SHM_TYPE u64 *s_RC) +DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x *ipad, u32x *opad, u32x *digest, SHM_TYPE u64 (*s_MT)[256]) { digest[ 0] = ipad[ 0]; digest[ 1] = ipad[ 1]; @@ -221,7 +221,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = ipad[14]; digest[15] = ipad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -240,7 +240,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = digest[ 0]; w0[1] = digest[ 1]; @@ -276,7 +276,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x digest[14] = opad[14]; digest[15] = opad[15]; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); w0[0] = 0x80000000; w0[1] = 0; @@ -295,7 +295,7 @@ DECLSPEC void hmac_whirlpool_run_V (u32x *w0, u32x *w1, u32x *w2, u32x *w3, u32x w3[2] = 0; w3[3] = (64 + 64) * 8; - whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT, s_RC); + whirlpool_transform_vector (w0, w1, w2, w3, digest, s_MT); } KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) @@ -326,7 +326,6 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -340,17 +339,11 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -422,7 +415,7 @@ KERNEL_FQ void m13733_init (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) whirlpool_hmac_ctx_t whirlpool_hmac_ctx; - whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT, s_RC); + whirlpool_hmac_init_64 (&whirlpool_hmac_ctx, w0, w1, w2, w3, s_MT); tmps[gid].ipad[ 0] = whirlpool_hmac_ctx.ipad.h[ 0]; tmps[gid].ipad[ 1] = whirlpool_hmac_ctx.ipad.h[ 1]; @@ -585,7 +578,6 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -599,17 +591,11 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif @@ -746,7 +732,7 @@ KERNEL_FQ void m13733_loop (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) w3[2] = dgst[14]; w3[3] = dgst[15]; - hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT, s_RC); + hmac_whirlpool_run_V (w0, w1, w2, w3, ipad, opad, dgst, s_MT); out[ 0] ^= dgst[ 0]; out[ 1] ^= dgst[ 1]; @@ -894,7 +880,6 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) #ifdef REAL_SHM LOCAL_VK u64 s_MT[8][256]; - LOCAL_VK u64 s_RC[16]; for (u32 i = lid; i < 256; i += lsz) { @@ -908,17 +893,11 @@ KERNEL_FQ void m13733_comp (KERN_ATTR_TMPS_ESALT (vc_tmp_t, vc_t)) s_MT[7][i] = MT[7][i]; } - for (u32 i = lid; i < 16; i += lsz) - { - s_RC[i] = RC[i]; - } - SYNC_THREADS (); #else CONSTANT_AS u64a (*s_MT)[256] = MT; - CONSTANT_AS u64a *s_RC = RC; #endif