diff --git a/OpenCL/inc_cipher_aes-gcm.cl b/OpenCL/inc_cipher_aes-gcm.cl index d08bc40f6..efc05bd09 100644 --- a/OpenCL/inc_cipher_aes-gcm.cl +++ b/OpenCL/inc_cipher_aes-gcm.cl @@ -10,6 +10,7 @@ #include "inc_cipher_aes.h" #include "inc_cipher_aes-gcm.h" +#ifndef AES_GCM_ALT1 DECLSPEC void AES_GCM_shift_right_block(uchar *block) { u32 val; @@ -36,6 +37,7 @@ DECLSPEC void AES_GCM_shift_right_block(uchar *block) val >>= 1; p[0].x = hc_swap32_S (val); } +#endif // AES_GCM_ALT1 DECLSPEC void AES_GCM_inc32 (u32 *block) { @@ -52,14 +54,21 @@ DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src) DECLSPEC void AES_GCM_gf_mult (const uchar16 *x, const uchar16 *y, uchar16 *z) { - u32 i, j, k; + u32 i, j; z[0] = 0; + uchar16 v = y[0].s32107654ba98fedc; u8 x_char[16] = { x[0].s3, x[0].s2, x[0].s1, x[0].s0, x[0].s7, x[0].s6, x[0].s5, x[0].s4, x[0].sb, x[0].sa, x[0].s9, x[0].s8, x[0].sf, x[0].se, x[0].sd, x[0].sc }; + #ifndef AES_GCM_ALT1 u8 *v_char = (u8 *) &v; + #endif + + u32 *i_char = (u32 *) &v; + + u8 t = 0; for (i = 0; i < 16; i++) { @@ -70,15 +79,35 @@ DECLSPEC void AES_GCM_gf_mult (const uchar16 *x, const uchar16 *y, uchar16 *z) z[0] ^= v; } - if (v.sf & 0x01) + t = v.sf & 0x01; + + #ifndef AES_GCM_ALT1 + + AES_GCM_shift_right_block(v_char); + + #else + + i_char[0] = hc_swap32_S (i_char[0]); + i_char[1] = hc_swap32_S (i_char[1]); + i_char[2] = hc_swap32_S (i_char[2]); + i_char[3] = hc_swap32_S (i_char[3]); + + i_char[3] = (i_char[3] >> 1) | (i_char[2] << 31); + i_char[2] = (i_char[2] >> 1) | (i_char[1] << 31); + i_char[1] = (i_char[1] >> 1) | (i_char[0] << 31); + i_char[0] >>= 1; + + i_char[0] = hc_swap32_S (i_char[0]); + i_char[1] = hc_swap32_S (i_char[1]); + i_char[2] = hc_swap32_S (i_char[2]); + i_char[3] = hc_swap32_S (i_char[3]); + + #endif // AES_GCM_ALT1 + + if (t) { - AES_GCM_shift_right_block(v_char); v.s0 ^= 0xe1; } - else - { - AES_GCM_shift_right_block(v_char); - } } } } @@ -87,7 +116,7 @@ DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 * { u32 m = in_len / 16; - u32 *xpos = in; + const u32 *xpos = in; u32 tmp[4] = { 0 }; @@ -97,7 +126,7 @@ DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 * xpos += 4; - AES_GCM_gf_mult (out, subkey, tmp); + AES_GCM_gf_mult ((uchar16 *) out, (uchar16 *) subkey, (uchar16 *) tmp); tmp[0] = hc_swap32_S (tmp[0]); tmp[1] = hc_swap32_S (tmp[1]); @@ -126,7 +155,7 @@ DECLSPEC void AES_GCM_ghash (const u32 *subkey, const u32 *in, u32 in_len, u32 * AES_GCM_xor_block (out, tmp); - AES_GCM_gf_mult (out, subkey, tmp); + AES_GCM_gf_mult ((uchar16 *) out, (uchar16 *) subkey, (uchar16 *) tmp); out[0] = tmp[0]; out[1] = tmp[1]; diff --git a/OpenCL/inc_cipher_aes-gcm.h b/OpenCL/inc_cipher_aes-gcm.h index 97049a702..33e43ed12 100644 --- a/OpenCL/inc_cipher_aes-gcm.h +++ b/OpenCL/inc_cipher_aes-gcm.h @@ -6,7 +6,10 @@ #ifndef _INC_CIPHER_AES_GCM_H #define _INC_CIPHER_AES_GCM_H +#ifndef AES_GCM_ALT1 DECLSPEC void AES_GCM_shift_right_block(uchar *block); +#endif + DECLSPEC void AES_GCM_inc32 (u32 *block); DECLSPEC void AES_GCM_xor_block (u32 *dst, const u32 *src); DECLSPEC void AES_GCM_gf_mult (const uchar16 *x, const uchar16 *y, uchar16 *z); diff --git a/OpenCL/m27000-optimized.cl b/OpenCL/m27000-optimized.cl index f05d456ac..53cde203f 100644 --- a/OpenCL/m27000-optimized.cl +++ b/OpenCL/m27000-optimized.cl @@ -4,6 +4,7 @@ */ #define NEW_SIMD_CODE +#define AES_GCM_ALT1 #ifdef KERNEL_STATIC #include "inc_vendor.h" @@ -281,15 +282,15 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh if (gid >= gid_max) return; - const u64 lid = get_local_id (0); - const u64 lsz = get_local_size (0); - /** * aes shared */ #ifdef REAL_SHM + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + LOCAL_VK u32 s_te0[256]; LOCAL_VK u32 s_te1[256]; LOCAL_VK u32 s_te2[256]; @@ -332,9 +333,8 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh ukey[6] = tmps[gid].out[6]; ukey[7] = tmps[gid].out[7]; - u32 key_len = 32 * 8; - u32 key[60] = { 0 }; + u32 subKey[4] = { 0 }; AES256_set_encrypt_key (key, ukey, s_te0, s_te1, s_te2, s_te3); @@ -350,8 +350,6 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh esalt_bufs[DIGESTS_OFFSET].iv_buf[3] }; - const u32 iv_len = esalt_bufs[DIGESTS_OFFSET].iv_len; - u32 J0[4] = { iv[0], iv[1], @@ -389,7 +387,7 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh S[2] ^= enc[2]; S[3] ^= enc[3]; - AES_GCM_gf_mult (S, subKey, t); + AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t); t[0] = hc_swap32_S (t[0]); t[1] = hc_swap32_S (t[1]); @@ -401,7 +399,7 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh S[2] = t[2] ^ enc[6]; S[3] = t[3] ^ enc[7]; - AES_GCM_gf_mult (S, subKey, t); + AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t); t[0] = hc_swap32_S (t[0]); t[1] = hc_swap32_S (t[1]); @@ -413,7 +411,7 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh S[2] = t[2] ^ enc[10]; S[3] = t[3] ^ enc[11]; - AES_GCM_gf_mult (S, subKey, t); + AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t); t[0] = hc_swap32_S (t[0]); t[1] = hc_swap32_S (t[1]); @@ -435,7 +433,7 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh S[2] ^= t[2]; S[3] ^= t[3]; - AES_GCM_gf_mult (S, subKey, t); + AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t); S[0] = hc_swap32_S (t[0]); S[1] = hc_swap32_S (t[1]); @@ -452,7 +450,7 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh S[2] ^= len_buf[2]; S[3] ^= len_buf[3]; - AES_GCM_gf_mult (S, subKey, t); + AES_GCM_gf_mult ((uchar16 *) S, (uchar16 *) subKey, (uchar16 *) t); S[0] = hc_swap32_S (t[0]); S[1] = hc_swap32_S (t[1]); diff --git a/OpenCL/m27000-pure.cl b/OpenCL/m27000-pure.cl index 23a377985..30151a0dc 100644 --- a/OpenCL/m27000-pure.cl +++ b/OpenCL/m27000-pure.cl @@ -4,6 +4,7 @@ */ #define NEW_SIMD_CODE +#define AES_GCM_ALT1 #ifdef KERNEL_STATIC #include "inc_vendor.h" @@ -281,15 +282,15 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh if (gid >= gid_max) return; - const u64 lid = get_local_id (0); - const u64 lsz = get_local_size (0); - /** * aes shared */ #ifdef REAL_SHM + const u64 lid = get_local_id (0); + const u64 lsz = get_local_size (0); + LOCAL_VK u32 s_te0[256]; LOCAL_VK u32 s_te1[256]; LOCAL_VK u32 s_te2[256]; @@ -386,7 +387,7 @@ KERNEL_FQ void m27000_comp (KERN_ATTR_TMPS_ESALT (pbkdf2_sha256_tmp_t, pbkdf2_sh u32 S[4] = { 0 }; u32 S_len = 16; - u32 aad_buf = 0; + u32 aad_buf[4] = { 0 }; u32 aad_len = 0; AES_GCM_GHASH (subKey, aad_buf, aad_len, enc, enc_len, S);