Memory Management: Check in OpenCL that CL_DEVICE_HOST_UNIFIED_MEMORY is true and if so, then half of the available memory
NEO driver: Activate the NEO driver (Intel GPU) after it passed several tests in hashcat using the latest NEO driver version
This commit is contained in:
parent
9254603960
commit
76e388ea47
@ -65,6 +65,8 @@
|
|||||||
- HIP Kernels: Dependency on hip/hip runtime.h has been removed to enable easier integration of the HIP backend under Windows
|
- HIP Kernels: Dependency on hip/hip runtime.h has been removed to enable easier integration of the HIP backend under Windows
|
||||||
- Kernel cache: Add kernel threads for hash calculation, which will later be used in the file name of the kernel cache
|
- Kernel cache: Add kernel threads for hash calculation, which will later be used in the file name of the kernel cache
|
||||||
- Memory Management: Refactored the code responsible for limiting kernel accel with the goal to avoid low host memory situations
|
- Memory Management: Refactored the code responsible for limiting kernel accel with the goal to avoid low host memory situations
|
||||||
|
- Memory Management: Check in OpenCL that CL_DEVICE_HOST_UNIFIED_MEMORY is true and if so, then half of the available memory
|
||||||
|
- NEO driver: Activate the NEO driver (Intel GPU) after it passed several tests in hashcat using the latest NEO driver version
|
||||||
- OpenCL Runtime: Workaround for Intel OpenCL runtime: segmentation fault when compiling hc_enc_next() / hc_enc_next_global()
|
- OpenCL Runtime: Workaround for Intel OpenCL runtime: segmentation fault when compiling hc_enc_next() / hc_enc_next_global()
|
||||||
- RC4 Kernels: Use improved native thread derivation for RC4-based hash modes 7500, 13100, 18200, 25400
|
- RC4 Kernels: Use improved native thread derivation for RC4-based hash modes 7500, 13100, 18200, 25400
|
||||||
- Shared Memory: Calculate the dynamic memory size of the kernel based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
|
- Shared Memory: Calculate the dynamic memory size of the kernel based on CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN
|
||||||
|
|||||||
@ -1111,6 +1111,7 @@ typedef struct hc_device_param
|
|||||||
u64 device_maxmem_alloc;
|
u64 device_maxmem_alloc;
|
||||||
u64 device_global_mem;
|
u64 device_global_mem;
|
||||||
u64 device_available_mem;
|
u64 device_available_mem;
|
||||||
|
int device_host_unified_memory;
|
||||||
u32 device_maxclock_frequency;
|
u32 device_maxclock_frequency;
|
||||||
size_t device_maxworkgroup_size;
|
size_t device_maxworkgroup_size;
|
||||||
u64 device_local_mem_size;
|
u64 device_local_mem_size;
|
||||||
|
|||||||
@ -8618,6 +8618,18 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
device_param->device_processors = device_processors;
|
device_param->device_processors = device_processors;
|
||||||
|
|
||||||
|
// device_host_unified_memory
|
||||||
|
|
||||||
|
cl_bool device_host_unified_memory = false;
|
||||||
|
|
||||||
|
if (hc_clGetDeviceInfo (hashcat_ctx, device_param->opencl_device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof (device_host_unified_memory), &device_host_unified_memory, NULL) == -1)
|
||||||
|
{
|
||||||
|
device_param->skipped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
device_param->device_host_unified_memory = (device_host_unified_memory == CL_TRUE) ? 1 : 0;
|
||||||
|
|
||||||
// device_global_mem
|
// device_global_mem
|
||||||
|
|
||||||
cl_ulong device_global_mem = 0;
|
cl_ulong device_global_mem = 0;
|
||||||
@ -8644,6 +8656,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
device_param->device_maxmem_alloc = device_maxmem_alloc;
|
device_param->device_maxmem_alloc = device_maxmem_alloc;
|
||||||
|
|
||||||
|
if (device_param->device_host_unified_memory == 1)
|
||||||
|
{
|
||||||
|
// so, we actually have only half the memory because we need the same buffers on host side
|
||||||
|
|
||||||
|
device_param->device_maxmem_alloc /= 2;
|
||||||
|
}
|
||||||
|
|
||||||
// note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
|
// note we'll limit to 2gb, otherwise this causes all kinds of weird errors because of possible integer overflows in opencl runtimes
|
||||||
// testwise disabling that
|
// testwise disabling that
|
||||||
//device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
|
//device_param->device_maxmem_alloc = MIN (device_maxmem_alloc, 0x7fffffff);
|
||||||
@ -8914,14 +8933,11 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower));
|
lowercase ((u8 *) opencl_device_version_lower, strlen (opencl_device_version_lower));
|
||||||
|
|
||||||
if ((strstr (opencl_device_version_lower, "neo "))
|
if ((strstr (opencl_device_version_lower, "beignet "))
|
||||||
|| (strstr (opencl_device_version_lower, " neo"))
|
|
||||||
|| (strstr (opencl_device_version_lower, "beignet "))
|
|
||||||
|| (strstr (opencl_device_version_lower, " beignet"))
|
|| (strstr (opencl_device_version_lower, " beignet"))
|
||||||
|| (strstr (opencl_device_version_lower, "mesa "))
|
|| (strstr (opencl_device_version_lower, "mesa "))
|
||||||
|| (strstr (opencl_device_version_lower, " mesa")))
|
|| (strstr (opencl_device_version_lower, " mesa")))
|
||||||
{
|
{
|
||||||
// NEO: https://github.com/hashcat/hashcat/issues/2342
|
|
||||||
// BEIGNET: https://github.com/hashcat/hashcat/issues/2243
|
// BEIGNET: https://github.com/hashcat/hashcat/issues/2243
|
||||||
// MESA: https://github.com/hashcat/hashcat/issues/2269
|
// MESA: https://github.com/hashcat/hashcat/issues/2269
|
||||||
|
|
||||||
@ -9694,11 +9710,7 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
|
|
||||||
device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
|
device_param->device_available_mem = device_param->device_global_mem - MAX_ALLOC_CHECKS_SIZE;
|
||||||
|
|
||||||
#if defined (_WIN)
|
if (device_param->opencl_device_type & CL_DEVICE_TYPE_GPU)
|
||||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && (device_param->opencl_platform_vendor_id == VENDOR_ID_NV))
|
|
||||||
#else
|
|
||||||
if ((device_param->opencl_device_type & CL_DEVICE_TYPE_GPU) && ((device_param->opencl_platform_vendor_id == VENDOR_ID_NV) || (device_param->opencl_platform_vendor_id == VENDOR_ID_AMD)))
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
// OK, so the problem here is the following:
|
// OK, so the problem here is the following:
|
||||||
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
|
// There's just CL_DEVICE_GLOBAL_MEM_SIZE to ask OpenCL about the total memory on the device,
|
||||||
@ -9771,6 +9783,13 @@ int backend_ctx_devices_init (hashcat_ctx_t *hashcat_ctx, const int comptime)
|
|||||||
hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
|
hc_clReleaseCommandQueue (hashcat_ctx, command_queue);
|
||||||
|
|
||||||
hc_clReleaseContext (hashcat_ctx, context);
|
hc_clReleaseContext (hashcat_ctx, context);
|
||||||
|
|
||||||
|
if (device_param->device_host_unified_memory == 1)
|
||||||
|
{
|
||||||
|
// so, we actually have only half the memory because we need the same buffers on host side
|
||||||
|
|
||||||
|
device_param->device_available_mem /= 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -14614,7 +14633,7 @@ int backend_session_begin (hashcat_ctx_t *hashcat_ctx)
|
|||||||
|
|
||||||
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * kernel_accel_max;
|
u64 EXTRA_SPACE = (1024ULL * 1024ULL) * kernel_accel_max;
|
||||||
|
|
||||||
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 64ULL * 1024ULL * 1024ULL));
|
EXTRA_SPACE = MAX (EXTRA_SPACE, ( 256ULL * 1024ULL * 1024ULL));
|
||||||
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
|
EXTRA_SPACE = MIN (EXTRA_SPACE, (1024ULL * 1024ULL * 1024ULL));
|
||||||
|
|
||||||
if ((size_pws + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
|
if ((size_pws + EXTRA_SPACE) > device_param->device_maxmem_alloc) memory_limit_hit = 1;
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user