From 8dfd1bf066a675e6783c9c87fcf8058c6d0eb02b Mon Sep 17 00:00:00 2001 From: DoZ10 Date: Mon, 15 May 2017 18:34:34 -0400 Subject: [PATCH] Final. Implemented offset parameter to reach next keystream in kernels. Tested all kernels with scalar and vector modes --- OpenCL/m15400_a0.cl | 75 +++++++++++++++++++++++++--- OpenCL/m15400_a1.cl | 114 ++++++++++++++++++++++++++++++++++++++---- OpenCL/m15400_a3.cl | 118 +++++++++++++++++++++++++++++++++++++++----- src/interface.c | 2 +- tools/test.pl | 4 +- 5 files changed, 281 insertions(+), 32 deletions(-) diff --git a/OpenCL/m15400_a0.cl b/OpenCL/m15400_a0.cl index ead3984f3..c030a9245 100644 --- a/OpenCL/m15400_a0.cl +++ b/OpenCL/m15400_a0.cl @@ -33,6 +33,10 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[2], const u32 offset, const u32 iv[2], const u32 plain[4], u32x digest[4]) { + /** + * Key expansion + */ + u32x ctx[16]; ctx[ 0] = CHACHA_CONST_00; @@ -52,7 +56,11 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ ctx[14] = iv[1]; ctx[15] = iv[0]; - u32x x[16]; + /** + * Generate 64 byte keystream + */ + + u32x x[32]; x[ 0] = ctx[ 0]; x[ 1] = ctx[ 1]; @@ -103,12 +111,69 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] += ctx[14]; x[15] += ctx[15]; + if (offset > 36) + { + /** + * Generate a second 64 byte keystream + */ + + ctx[12]++; + + if (all(ctx[12] == 0)) ctx[13]++; + + x[16] = ctx[ 0]; + x[17] = ctx[ 1]; + x[18] = ctx[ 2]; + x[19] = ctx[ 3]; + x[20] = ctx[ 4]; + x[21] = ctx[ 5]; + x[22] = ctx[ 6]; + x[23] = ctx[ 7]; + x[24] = ctx[ 8]; + x[25] = ctx[ 9]; + x[26] = ctx[10]; + x[27] = ctx[11]; + x[28] = ctx[12]; + x[29] = ctx[13]; + x[30] = ctx[14]; + x[31] = ctx[15]; + + for (u8 i = 0; i < 10; ++i) + { + /* Column round */ + QR(16, 20, 24, 28); + QR(17, 21, 25, 29); + QR(18, 22, 26, 30); + QR(19, 23, 27, 31); + + /* Diagonal round */ + QR(16, 21, 26, 31); + QR(17, 22, 27, 28); + QR(18, 23, 24, 29); + QR(19, 20, 25, 30); + } + + x[16] += ctx[ 0]; + x[17] += ctx[ 1]; + x[18] += ctx[ 2]; + x[19] += ctx[ 3]; + x[20] += ctx[ 4]; + x[21] += ctx[ 5]; + x[22] += ctx[ 6]; + x[23] += ctx[ 7]; + x[24] += ctx[ 8]; + x[25] += ctx[ 9]; + x[26] += ctx[10]; + x[27] += ctx[11]; + x[28] += ctx[12]; + x[29] += ctx[13]; + x[30] += ctx[14]; + x[31] += ctx[15]; + } u32 index = offset / 4; u32 remain = offset % 4; - //printf("index: %d, offset: %d, remain: %d\n", index, offset, remain); - digest[0] = plain[1]; digest[1] = plain[0]; @@ -130,8 +195,6 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ digest[1] ^= x[index + 0]; digest[0] ^= x[index + 1]; } - - //printf("digest[0]: %08x, x[0]: %08x, digest[1]: %08x, x[1]: %08x\n", digest[0], x[0], digest[1], x[1]); } __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const chacha20_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -291,8 +354,6 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule const u32x r2 = digest[2]; const u32x r3 = digest[3]; - // printf("r0: %08x, search[0]: %08x, r1: %08x, search[1]: %08x, r2: %08x, search[2]: %08x, r3: %08x, search[3]: %08x\n", r0, search[0], r1, search[1], r2, search[2], r3, search[3]); - COMPARE_S_SIMD(r0, r1, r2, r3); } } diff --git a/OpenCL/m15400_a1.cl b/OpenCL/m15400_a1.cl index b82881c60..9fd51913c 100644 --- a/OpenCL/m15400_a1.cl +++ b/OpenCL/m15400_a1.cl @@ -31,8 +31,12 @@ x[b] = rotl32(x[b] ^ x[c], 7); \ } while (0); -void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[2], const u32 iv[2], const u32 plain[4], u32x digest[4]) +void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[2], const u32 offset, const u32 iv[2], const u32 plain[4], u32x digest[4]) { + /** + * Key expansion + */ + u32x ctx[16]; ctx[ 0] = CHACHA_CONST_00; @@ -52,7 +56,11 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ ctx[14] = iv[1]; ctx[15] = iv[0]; - u32x x[16]; + /** + * Generate 64 byte keystream + */ + + u32x x[32]; x[ 0] = ctx[ 0]; x[ 1] = ctx[ 1]; @@ -71,8 +79,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] = ctx[14]; x[15] = ctx[15]; - for (u8 i = 0; i < 10; ++i) { - + for (u8 i = 0; i < 10; ++i) + { /* Column round */ QR(0, 4, 8, 12); QR(1, 5, 9, 13); @@ -103,10 +111,90 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] += ctx[14]; x[15] += ctx[15]; - digest[1] = plain[0] ^ x[0]; - digest[0] = plain[1] ^ x[1]; - digest[3] = plain[2] ^ x[2]; - digest[2] = plain[3] ^ x[3]; + if (offset > 36) + { + /** + * Generate a second 64 byte keystream + */ + + ctx[12]++; + + if (all(ctx[12] == 0)) ctx[13]++; + + x[16] = ctx[ 0]; + x[17] = ctx[ 1]; + x[18] = ctx[ 2]; + x[19] = ctx[ 3]; + x[20] = ctx[ 4]; + x[21] = ctx[ 5]; + x[22] = ctx[ 6]; + x[23] = ctx[ 7]; + x[24] = ctx[ 8]; + x[25] = ctx[ 9]; + x[26] = ctx[10]; + x[27] = ctx[11]; + x[28] = ctx[12]; + x[29] = ctx[13]; + x[30] = ctx[14]; + x[31] = ctx[15]; + + for (u8 i = 0; i < 10; ++i) + { + /* Column round */ + QR(16, 20, 24, 28); + QR(17, 21, 25, 29); + QR(18, 22, 26, 30); + QR(19, 23, 27, 31); + + /* Diagonal round */ + QR(16, 21, 26, 31); + QR(17, 22, 27, 28); + QR(18, 23, 24, 29); + QR(19, 20, 25, 30); + } + + x[16] += ctx[ 0]; + x[17] += ctx[ 1]; + x[18] += ctx[ 2]; + x[19] += ctx[ 3]; + x[20] += ctx[ 4]; + x[21] += ctx[ 5]; + x[22] += ctx[ 6]; + x[23] += ctx[ 7]; + x[24] += ctx[ 8]; + x[25] += ctx[ 9]; + x[26] += ctx[10]; + x[27] += ctx[11]; + x[28] += ctx[12]; + x[29] += ctx[13]; + x[30] += ctx[14]; + x[31] += ctx[15]; + } + + u32 index = offset / 4; + u32 remain = offset % 4; + + digest[0] = plain[1]; + digest[1] = plain[0]; + + if (remain > 0) + { + u32x tmp[3]; + tmp[0] = x[index + 0]; + tmp[1] = x[index + 1]; + tmp[2] = x[index + 2]; + + digest[1] ^= tmp[0] >> (remain * 8); + digest[1] ^= tmp[1] << (32 - remain * 8); + + digest[0] ^= tmp[1] >> (remain * 8); + digest[0] ^= tmp[2] << (32 - remain * 8); + } + else + { + digest[1] ^= x[index + 0]; + digest[0] ^= x[index + 1]; + } } __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const bf_t *bfs_buf, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const chacha20_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -139,10 +227,13 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 iv[2] = { 0 }; u32 plain[2] = { 0 }; u32 position[2] = { 0 }; + u32 offset = 0; position[0] = esalt_bufs->position[0]; position[1] = esalt_bufs->position[1]; + offset = esalt_bufs->offset; + iv[0] = esalt_bufs->iv[0]; iv[1] = esalt_bufs->iv[1]; @@ -224,7 +315,7 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x digest[4] = { 0 }; - chacha20_transform (w0, w1, position, iv, plain, digest); + chacha20_transform (w0, w1, position, offset, iv, plain, digest); const u32x r0 = digest[0]; const u32x r1 = digest[1]; @@ -276,10 +367,13 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 iv[2] = { 0 }; u32 plain[2] = { 0 }; u32 position[2] = { 0 }; + u32 offset = 0; position[0] = esalt_bufs->position[0]; position[1] = esalt_bufs->position[1]; + offset = esalt_bufs->offset; + iv[0] = esalt_bufs->iv[0]; iv[1] = esalt_bufs->iv[1]; @@ -373,7 +467,7 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x digest[4] = { 0 }; - chacha20_transform (w0, w1, position, iv, plain, digest); + chacha20_transform (w0, w1, position, offset, iv, plain, digest); const u32x r0 = digest[0]; const u32x r1 = digest[1]; diff --git a/OpenCL/m15400_a3.cl b/OpenCL/m15400_a3.cl index 7131c4c12..3896a8c23 100644 --- a/OpenCL/m15400_a3.cl +++ b/OpenCL/m15400_a3.cl @@ -29,8 +29,12 @@ x[b] = rotl32(x[b] ^ x[c], 7); \ } while (0); -void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[2], const u32 iv[2], const u32 plain[4], u32x digest[4]) +void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[2], const u32 offset, const u32 iv[2], const u32 plain[4], u32x digest[4]) { + /** + * Key expansion + */ + u32x ctx[16]; ctx[ 0] = CHACHA_CONST_00; @@ -50,7 +54,11 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ ctx[14] = iv[1]; ctx[15] = iv[0]; - u32x x[16]; + /** + * Generate 64 byte keystream + */ + + u32x x[32]; x[ 0] = ctx[ 0]; x[ 1] = ctx[ 1]; @@ -69,8 +77,8 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] = ctx[14]; x[15] = ctx[15]; - for (u8 i = 0; i < 10; ++i) { - + for (u8 i = 0; i < 10; ++i) + { /* Column round */ QR(0, 4, 8, 12); QR(1, 5, 9, 13); @@ -101,10 +109,90 @@ void chacha20_transform (const u32x w0[4], const u32x w1[4], const u32 position[ x[14] += ctx[14]; x[15] += ctx[15]; - digest[1] = plain[0] ^ x[0]; - digest[0] = plain[1] ^ x[1]; - digest[3] = plain[2] ^ x[2]; - digest[2] = plain[3] ^ x[3]; + if (offset > 36) + { + /** + * Generate a second 64 byte keystream + */ + + ctx[12]++; + + if (all(ctx[12] == 0)) ctx[13]++; + + x[16] = ctx[ 0]; + x[17] = ctx[ 1]; + x[18] = ctx[ 2]; + x[19] = ctx[ 3]; + x[20] = ctx[ 4]; + x[21] = ctx[ 5]; + x[22] = ctx[ 6]; + x[23] = ctx[ 7]; + x[24] = ctx[ 8]; + x[25] = ctx[ 9]; + x[26] = ctx[10]; + x[27] = ctx[11]; + x[28] = ctx[12]; + x[29] = ctx[13]; + x[30] = ctx[14]; + x[31] = ctx[15]; + + for (u8 i = 0; i < 10; ++i) + { + /* Column round */ + QR(16, 20, 24, 28); + QR(17, 21, 25, 29); + QR(18, 22, 26, 30); + QR(19, 23, 27, 31); + + /* Diagonal round */ + QR(16, 21, 26, 31); + QR(17, 22, 27, 28); + QR(18, 23, 24, 29); + QR(19, 20, 25, 30); + } + + x[16] += ctx[ 0]; + x[17] += ctx[ 1]; + x[18] += ctx[ 2]; + x[19] += ctx[ 3]; + x[20] += ctx[ 4]; + x[21] += ctx[ 5]; + x[22] += ctx[ 6]; + x[23] += ctx[ 7]; + x[24] += ctx[ 8]; + x[25] += ctx[ 9]; + x[26] += ctx[10]; + x[27] += ctx[11]; + x[28] += ctx[12]; + x[29] += ctx[13]; + x[30] += ctx[14]; + x[31] += ctx[15]; + } + + u32 index = offset / 4; + u32 remain = offset % 4; + + digest[0] = plain[1]; + digest[1] = plain[0]; + + if (remain > 0) + { + u32x tmp[3]; + tmp[0] = x[index + 0]; + tmp[1] = x[index + 1]; + tmp[2] = x[index + 2]; + + digest[1] ^= tmp[0] >> (remain * 8); + digest[1] ^= tmp[1] << (32 - remain * 8); + + digest[0] ^= tmp[1] >> (remain * 8); + digest[0] ^= tmp[2] << (32 - remain * 8); + } + else + { + digest[1] ^= x[index + 0]; + digest[0] ^= x[index + 1]; + } } __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rules_buf, __global const comb_t *combs_buf, __global const u32x *words_buf_r, __global void *tmps, __global void *hooks, __global const u32 *bitmaps_buf_s1_a, __global const u32 *bitmaps_buf_s1_b, __global const u32 *bitmaps_buf_s1_c, __global const u32 *bitmaps_buf_s1_d, __global const u32 *bitmaps_buf_s2_a, __global const u32 *bitmaps_buf_s2_b, __global const u32 *bitmaps_buf_s2_c, __global const u32 *bitmaps_buf_s2_d, __global plain_t *plains_buf, __global const digest_t *digests_buf, __global u32 *hashes_shown, __global const salt_t *salt_bufs, __global const chacha20_t *esalt_bufs, __global u32 *d_return_buf, __global u32 *d_scryptV0_buf, __global u32 *d_scryptV1_buf, __global u32 *d_scryptV2_buf, __global u32 *d_scryptV3_buf, const u32 bitmap_mask, const u32 bitmap_shift1, const u32 bitmap_shift2, const u32 salt_pos, const u32 loop_pos, const u32 loop_cnt, const u32 il_cnt, const u32 digests_cnt, const u32 digests_offset, const u32 combs_mode, const u32 gid_max) @@ -123,10 +211,13 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 iv[2] = { 0 }; u32 plain[2] = { 0 }; u32 position[2] = { 0 }; - + u32 offset = 0; + position[0] = esalt_bufs->position[0]; position[1] = esalt_bufs->position[1]; + offset = esalt_bufs->offset; + iv[0] = esalt_bufs->iv[0]; iv[1] = esalt_bufs->iv[1]; @@ -170,7 +261,7 @@ __kernel void m15400_m04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x digest[4] = { 0 }; - chacha20_transform (w0, w1, position, iv, plain, digest); + chacha20_transform (w0, w1, position, offset, iv, plain, digest); const u32x r0 = digest[0]; const u32x r1 = digest[1]; @@ -205,10 +296,13 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32 iv[2] = { 0 }; u32 plain[2] = { 0 }; u32 position[2] = { 0 }; - + u32 offset = 0; + position[0] = esalt_bufs->position[0]; position[1] = esalt_bufs->position[1]; + offset = esalt_bufs->offset; + iv[0] = esalt_bufs->iv[0]; iv[1] = esalt_bufs->iv[1]; @@ -264,7 +358,7 @@ __kernel void m15400_s04 (__global pw_t *pws, __global const kernel_rule_t *rule u32x digest[4] = { 0 }; - chacha20_transform (w0, w1, position, iv, plain, digest); + chacha20_transform (w0, w1, position, offset, iv, plain, digest); const u32x r0 = digest[0]; const u32x r1 = digest[1]; diff --git a/src/interface.c b/src/interface.c index 00536c152..cfc0f67ab 100644 --- a/src/interface.c +++ b/src/interface.c @@ -5330,7 +5330,7 @@ int chacha20_parse_hash (u8 *input_buf, u32 input_len, hash_t *hash_buf, MAYBE_U if (offset_marker == NULL) return (PARSER_SEPARATOR_UNMATCHED); int offset = atoi ((char*) offset_marker); - if (offset > 36) return (PARSER_SALT_VALUE); + if (offset > 63) return (PARSER_SALT_VALUE); u8 *iv_marker = (u8 *) strchr ((const char *) offset_marker, '*') + 1; if (iv_marker == NULL) return (PARSER_SEPARATOR_UNMATCHED); diff --git a/tools/test.pl b/tools/test.pl index 60988d121..41c23701a 100755 --- a/tools/test.pl +++ b/tools/test.pl @@ -8158,14 +8158,14 @@ END_CODE { my $eight_byte_iv = pack("H*", "0000000000000000"); my $eight_byte_counter = pack("H*", "0100000000000000"); # little endian 64 bits - my $offset = int(rand(36)); + my $offset = int(rand(63)); my $pad_len = 32 - length $word_buf; my $key = $word_buf . "\0" x $pad_len; my $cipher = Crypt::OpenSSH::ChachaPoly->new($key); $cipher->ivsetup($eight_byte_iv, $eight_byte_counter); - my $enc = $cipher->encrypt("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); + my $enc = $cipher->encrypt("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"); my $enc_offset = substr($enc, $offset, 8); $hash_buf = $enc_offset;