Skip to content
This repository was archived by the owner on Jan 9, 2021. It is now read-only.

Commit 94c8be0

Browse files
Wolfbitbandi
Wolf
authored andcommitted
Added host code required to utilize a custom Lyra2REv2 AMD binary.
1 parent bc46bfc commit 94c8be0

File tree

2 files changed

+25
-128
lines changed

2 files changed

+25
-128
lines changed

algorithm.c

+25-24
Original file line numberDiff line numberDiff line change
@@ -880,48 +880,49 @@ static cl_int queue_lyra2rev2_kernel(struct __clState *clState, struct _dev_blk_
880880
unsigned int num;
881881
cl_int status = 0;
882882
cl_ulong le_target;
883+
uint32_t buf[11];
883884

884-
// le_target = *(cl_uint *)(blk->work->device_target + 28);
885885
le_target = *(cl_ulong *)(blk->work->device_target + 24);
886-
flip80(clState->cldata, blk->work->data);
887-
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 80, clState->cldata, 0, NULL, NULL);
888886

889887
// blake - search
890888
kernel = &clState->kernel;
891889
num = 0;
892-
// CL_SET_ARG(clState->CLbuffer0);
893-
CL_SET_ARG(clState->buffer1);
894-
CL_SET_ARG(blk->work->blk.ctx_a);
895-
CL_SET_ARG(blk->work->blk.ctx_b);
896-
CL_SET_ARG(blk->work->blk.ctx_c);
897-
CL_SET_ARG(blk->work->blk.ctx_d);
898-
CL_SET_ARG(blk->work->blk.ctx_e);
899-
CL_SET_ARG(blk->work->blk.ctx_f);
900-
CL_SET_ARG(blk->work->blk.ctx_g);
901-
CL_SET_ARG(blk->work->blk.ctx_h);
902-
CL_SET_ARG(blk->work->blk.cty_a);
903-
CL_SET_ARG(blk->work->blk.cty_b);
904-
CL_SET_ARG(blk->work->blk.cty_c);
890+
891+
buf[0] = blk->work->blk.ctx_a;
892+
buf[1] = blk->work->blk.ctx_b;
893+
buf[2] = blk->work->blk.ctx_c;
894+
buf[3] = blk->work->blk.ctx_d;
895+
buf[4] = blk->work->blk.ctx_e;
896+
buf[5] = blk->work->blk.ctx_f;
897+
buf[6] = blk->work->blk.ctx_g;
898+
buf[7] = blk->work->blk.ctx_h;
899+
buf[8] = blk->work->blk.cty_a;
900+
buf[9] = blk->work->blk.cty_b;
901+
buf[10] = blk->work->blk.cty_c;
902+
903+
status = clEnqueueWriteBuffer(clState->commandQueue, clState->CLbuffer0, true, 0, 44, buf, 0, NULL, NULL);
904+
905+
CL_SET_ARG(clState->CLbuffer0);
906+
CL_SET_ARG(clState->padbuffer8);
905907

906908
// keccak - search1
907909
kernel = clState->extra_kernels;
908-
CL_SET_ARG_0(clState->buffer1);
910+
CL_SET_ARG_0(clState->padbuffer8);
909911
// cubehash - search2
910912
num = 0;
911-
CL_NEXTKERNEL_SET_ARG_0(clState->buffer1);
913+
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
912914
// lyra - search3
913915
num = 0;
914-
CL_NEXTKERNEL_SET_ARG_N(0, clState->buffer1);
915-
CL_SET_ARG_N(1, clState->padbuffer8);
916+
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
916917
// skein -search4
917918
num = 0;
918-
CL_NEXTKERNEL_SET_ARG_0(clState->buffer1);
919+
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
919920
// cubehash - search5
920921
num = 0;
921-
CL_NEXTKERNEL_SET_ARG_0(clState->buffer1);
922+
CL_NEXTKERNEL_SET_ARG_0(clState->padbuffer8);
922923
// bmw - search6
923924
num = 0;
924-
CL_NEXTKERNEL_SET_ARG(clState->buffer1);
925+
CL_NEXTKERNEL_SET_ARG(clState->padbuffer8);
925926
CL_SET_ARG(clState->outputBuffer);
926927
CL_SET_ARG(le_target);
927928

@@ -1258,7 +1259,7 @@ static algorithm_settings_t algos[] = {
12581259
{ "fresh", ALGO_FRESH, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 4 * 16 * 4194304, 0, fresh_regenhash, NULL, NULL, queue_fresh_kernel, gen_hash, NULL },
12591260

12601261
{ "lyra2re", ALGO_LYRA2RE, "", 1, 128, 128, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 4, 2 * 8 * 4194304, 0, lyra2re_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2re_kernel, gen_hash, NULL },
1261-
{ "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, -1, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, lyra2rev2_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2rev2_kernel, gen_hash, append_neoscrypt_compiler_options },
1262+
{ "lyra2rev2", ALGO_LYRA2REV2, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 6, 2 * 8 * 4194304, 0, lyra2rev2_regenhash, blake256_midstate, blake256_prepare_work, queue_lyra2rev2_kernel, gen_hash, NULL },
12621263

12631264
// kernels starting from this will have difficulty calculated by using fuguecoin algorithm
12641265
#define A_FUGUE(a, b, c) \

ocl.c

-104
Original file line numberDiff line numberDiff line change
@@ -612,90 +612,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
612612

613613
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
614614
}
615-
616-
// Lyra2re v2 TC
617-
else if (cgpu->algorithm.type == ALGO_LYRA2REV2 && !cgpu->opt_tc) {
618-
size_t glob_thread_count;
619-
long max_int;
620-
unsigned char type = 0;
621-
622-
// determine which intensity type to use
623-
if (cgpu->rawintensity > 0) {
624-
glob_thread_count = cgpu->rawintensity;
625-
max_int = glob_thread_count;
626-
type = 2;
627-
}
628-
else if (cgpu->xintensity > 0) {
629-
glob_thread_count = clState->compute_shaders * ((cgpu->algorithm.xintensity_shift) ? (1UL << (cgpu->algorithm.xintensity_shift + cgpu->xintensity)) : cgpu->xintensity);
630-
max_int = cgpu->xintensity;
631-
type = 1;
632-
}
633-
else {
634-
glob_thread_count = 1UL << (cgpu->algorithm.intensity_shift + cgpu->intensity);
635-
max_int = ((cgpu->dynamic) ? MAX_INTENSITY : cgpu->intensity);
636-
}
637-
638-
glob_thread_count = ((glob_thread_count < cgpu->work_size) ? cgpu->work_size : glob_thread_count);
639-
640-
// if TC * scratchbuf size is too big for memory... reduce to max
641-
if ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) >= (uint64_t)cgpu->max_alloc) {
642-
643-
/* Selected intensity will not run on this GPU. Not enough memory.
644-
* Adapt the memory setting. */
645-
// depending on intensity type used, reduce the intensity until it fits into the GPU max_alloc
646-
switch (type) {
647-
//raw intensity
648-
case 2:
649-
while ((glob_thread_count * LYRA_SCRATCHBUF_SIZE) > (uint64_t)cgpu->max_alloc) {
650-
--glob_thread_count;
651-
}
652-
653-
max_int = glob_thread_count;
654-
cgpu->rawintensity = glob_thread_count;
655-
break;
656-
657-
//x intensity
658-
case 1:
659-
glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE;
660-
max_int = glob_thread_count / clState->compute_shaders;
661-
662-
while (max_int && ((clState->compute_shaders * (1UL << max_int)) > glob_thread_count)) {
663-
--max_int;
664-
}
665-
666-
/* Check if max_intensity is >0. */
667-
if (max_int < MIN_XINTENSITY) {
668-
applog(LOG_ERR, "GPU %d: Max xintensity is below minimum.", gpu);
669-
max_int = MIN_XINTENSITY;
670-
}
671-
672-
cgpu->xintensity = max_int;
673-
glob_thread_count = clState->compute_shaders * (1UL << max_int);
674-
break;
675-
676-
default:
677-
glob_thread_count = cgpu->max_alloc / LYRA_SCRATCHBUF_SIZE;
678-
while (max_int && ((1UL << max_int) & glob_thread_count) == 0) {
679-
--max_int;
680-
}
681-
682-
/* Check if max_intensity is >0. */
683-
if (max_int < MIN_INTENSITY) {
684-
applog(LOG_ERR, "GPU %d: Max intensity is below minimum.", gpu);
685-
max_int = MIN_INTENSITY;
686-
}
687-
688-
cgpu->intensity = max_int;
689-
glob_thread_count = 1UL << max_int;
690-
break;
691-
}
692-
}
693-
694-
// TC is glob thread count
695-
cgpu->thread_concurrency = glob_thread_count;
696-
697-
applog(LOG_DEBUG, "GPU %d: computing max. global thread count to %u", gpu, (unsigned)(cgpu->thread_concurrency));
698-
}
699615
else if (!cgpu->opt_tc) {
700616
unsigned int sixtyfours;
701617

@@ -827,18 +743,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
827743
applog(LOG_DEBUG, "yescrypt buffer sizes: %lu RW, %lu R", (unsigned long)bufsize, (unsigned long)readbufsize);
828744
// scrypt/n-scrypt
829745
}
830-
else if (algorithm->type == ALGO_LYRA2REV2) {
831-
/* The scratch/pad-buffer needs 32kBytes memory per thread. */
832-
bufsize = LYRA_SCRATCHBUF_SIZE * cgpu->thread_concurrency;
833-
buf1size = 4* 8 * cgpu->thread_concurrency; //matrix
834-
835-
/* This is the input buffer. For yescrypt this is guaranteed to be
836-
* 80 bytes only. */
837-
readbufsize = 80;
838-
839-
applog(LOG_DEBUG, "lyra2REv2 buffer sizes: %lu RW, %lu RW", (unsigned long)bufsize, (unsigned long)buf1size);
840-
// scrypt/n-scrypt
841-
}
842746
else {
843747
size_t ipt = (algorithm->n / cgpu->lookup_gap + (algorithm->n % cgpu->lookup_gap > 0));
844748
bufsize = 128 * ipt * cgpu->thread_concurrency;
@@ -904,14 +808,6 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg
904808
return NULL;
905809
}
906810
}
907-
else if (algorithm->type == ALGO_LYRA2REV2) {
908-
// need additionnal buffers
909-
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, buf1size, NULL, &status);
910-
if (status != CL_SUCCESS && !clState->buffer1) {
911-
applog(LOG_DEBUG, "Error %d: clCreateBuffer (buffer1), decrease TC or increase LG", status);
912-
return NULL;
913-
}
914-
}
915811
else {
916812
clState->buffer1 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status); // we don't need that much just tired...
917813
if (status != CL_SUCCESS && !clState->buffer1) {

0 commit comments

Comments
 (0)