Skip to content

Commit

Permalink
NT-OpenCL: Early partial transfer of keybuffer
Browse files Browse the repository at this point in the history
This is well tested code in other formats.
About 10% boost on 2080ti, against 5300 hashes and pure wordlist, no mask.

Also adds an entry in doc/NEWS.  Closes openwall#5245.
  • Loading branch information
magnumripper committed Mar 16, 2023
1 parent 2c7a4c5 commit 89864cd
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 6 deletions.
5 changes: 5 additions & 0 deletions doc/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,11 @@ Major changes from 1.9.0-jumbo-1 (May 2019) in this bleeding-edge version:

- Added support for cracking SNTP-MS "timeroast". [magnum; 2023]

- Major overhaul of NT-opencl: Performance boost up to 50% depending on GPU.
[magnum; 2023]

- Add NT-long-opencl (password length of up to 125 bytes). [magnum; 2023]


Major changes from 1.8.0-jumbo-1 (December 2014) to 1.9.0-jumbo-1 (May 2019):

Expand Down
30 changes: 24 additions & 6 deletions src/opencl_nt_fmt_plug.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,9 @@ static cl_mem buffer_keys, buffer_idx, buffer_int_keys, buffer_int_key_loc;
static cl_uint *saved_plain, *saved_idx, *saved_int_key_loc;
static int static_gpu_locations[MASK_FMT_INT_PLHDR];

static unsigned int key_idx = 0;
static size_t key_idx;
static size_t key_offset, idx_offset;

static struct fmt_main *self;

#define STEP 0
Expand Down Expand Up @@ -505,6 +507,8 @@ static int get_hash_6(int index) { return bt_hash_table_64[ocl_hc_hash_ids[3 + 3
static void clear_keys(void)
{
key_idx = 0;
key_offset = 0;
idx_offset = 0;
}

static void set_key(char *_key, int index)
Expand Down Expand Up @@ -535,6 +539,20 @@ static void set_key(char *_key, int index)
}
if (len)
saved_plain[key_idx++] = *key & (0xffffffffU >> (32 - (len << 3)));

/* Early partial transfer to GPU every 2 MB */
if (4 * key_idx - key_offset > (2 << 20)) {
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, key_offset, 4 * key_idx - key_offset, saved_plain + key_offset / 4, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_keys.");
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, idx_offset, 4 * (index + 1) - idx_offset, saved_idx + idx_offset / 4, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_idx.");

if (!mask_gpu_is_static)
HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, idx_offset, 4 * (index + 1) - idx_offset, saved_int_key_loc + (idx_offset / 4), 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");

HANDLE_CLERROR(clFlush(queue[gpu_id]), "failed in clFlush");

key_offset = 4 * key_idx;
idx_offset = 4 * (index + 1);
}
}

static char *get_key(int index)
Expand Down Expand Up @@ -593,13 +611,13 @@ static int crypt_all(int *pcount, struct db_salt *salt)
//fprintf(stderr, "%s(%d) lws "Zu" gws "Zu" idx %u int_cand %d\n", __FUNCTION__, count, local_work_size, gws, key_idx, mask_int_cand.num_int_cand);

// copy keys to the device
if (key_idx)
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, 0, 4 * key_idx, saved_plain, 0, NULL, multi_profilingEvent[0]), "failed in clEnqueueWriteBuffer buffer_keys.");

BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, 0, 4 * gws, saved_idx, 0, NULL, multi_profilingEvent[1]), "failed in clEnqueueWriteBuffer buffer_idx.");
if (key_idx) {
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_keys, CL_FALSE, key_offset, 4 * key_idx - key_offset, saved_plain + key_offset / 4, 0, NULL, multi_profilingEvent[0]), "failed in clEnqueueWriteBuffer buffer_keys.");
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_idx, CL_FALSE, idx_offset, 4 * gws - idx_offset, saved_idx + idx_offset / 4, 0, NULL, multi_profilingEvent[1]), "failed in clEnqueueWriteBuffer buffer_idx.");
}

if (!mask_gpu_is_static)
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, 0, 4 * gws, saved_int_key_loc, 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");
BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], buffer_int_key_loc, CL_FALSE, idx_offset, 4 * gws - idx_offset, saved_int_key_loc + (idx_offset / 4), 0, NULL, NULL), "failed in clEnqueueWriteBuffer buffer_int_key_loc.");

return ocl_hc_64_extract_info(salt, set_kernel_args, set_kernel_args_kpc, init_kernel, gws, lws, pcount);
}
Expand Down

0 comments on commit 89864cd

Please sign in to comment.