diff --git a/Backup.cpp b/Backup.cpp index e90d74f..07b574f 100644 --- a/Backup.cpp +++ b/Backup.cpp @@ -230,7 +230,7 @@ void Kangaroo::FetchWalks(uint64_t nbWalk,Int *x,Int *y,Int *d) { } -void Kangaroo::FetchWalks(uint64_t nbWalk,std::vector& kangs,Int* x,Int* y,Int* d) { +void Kangaroo::FetchWalks(uint64_t nbWalk,std::vector& kangs,Int* x,Int* y,Int* d) { uint64_t n = 0; @@ -248,8 +248,7 @@ void Kangaroo::FetchWalks(uint64_t nbWalk,std::vector& kangs,Int* x,In for(n = 0; n < avail; n++) { Int dist; - uint32_t type; - HashTable::CalcDistAndType(kangs[n],&dist,&type); + HashTable::CalcDist(&kangs[n],&dist); dists.push_back(dist); } @@ -277,7 +276,6 @@ void Kangaroo::FetchWalks(uint64_t nbWalk,std::vector& kangs,Int* x,In } kangs.erase(kangs.begin(),kangs.begin() + avail); - } if(avail < nbWalk) { @@ -293,7 +291,7 @@ void Kangaroo::FectchKangaroos(TH_PARAM *threads) { double sFetch = Timer::get_tick(); // From server - vector kangs; + vector kangs; if(saveKangarooByServer) { ::printf("FectchKangaroosFromServer"); if(!GetKangaroosFromServer(workFile,kangs)) @@ -492,22 +490,21 @@ void Kangaroo::SaveWork(uint64_t totalCount,double totalTime,TH_PARAM *threads,i if(saveKangarooByServer) { ::printf("\nSaveWork (Kangaroo->Server): %s",fileName.c_str()); - vector kangs; + vector kangs; for(int i = 0; i < nbThread; i++) totalWalk += threads[i].nbKangaroo; kangs.reserve(totalWalk); for(int i = 0; i < nbThread; i++) { - int128_t X; - int128_t D; - uint64_t h; + int256_t X; + int256_t D; for(uint64_t n = 0; n < threads[i].nbKangaroo; n++) { - HashTable::Convert(&threads[i].px[n],&threads[i].distance[n],n%2,&h,&X,&D); - kangs.push_back(D); + HashTable::Convert(&threads[i].px[n],&threads[i].distance[n],&X,&D); + kangs.push_back(D); } } SendKangaroosToServer(fileName,kangs); - size = kangs.size()*16 + 16; + size = kangs.size()*32 + 32; goto end; } else { diff --git a/Check.cpp b/Check.cpp index 8eeb4f0..aac26d1 100644 --- a/Check.cpp +++ b/Check.cpp @@ -46,8 +46,8 @@ uint32_t Kangaroo::CheckHash(uint32_t h,uint32_t nbItem,HashTable* hT,FILE* f) { for(uint32_t i = 0; i < nbItem; i++) { e = hT->E[h].items[i]; Int dist; - uint32_t kType; - HashTable::CalcDistAndType(e->d,&dist,&kType); + uint32_t kType = e->kType; + HashTable::CalcDist(&(e->d),&dist); dists.push_back(dist); types.push_back(kType); } @@ -60,8 +60,8 @@ uint32_t Kangaroo::CheckHash(uint32_t h,uint32_t nbItem,HashTable* hT,FILE* f) { ::fread(items+i,32,1,f); e = items + i; Int dist; - uint32_t kType; - HashTable::CalcDistAndType(e->d,&dist,&kType); + uint32_t kType = e->kType; + HashTable::CalcDist(&(e->d),&dist); dists.push_back(dist); types.push_back(kType); } @@ -88,8 +88,7 @@ uint32_t Kangaroo::CheckHash(uint32_t h,uint32_t nbItem,HashTable* hT,FILE* f) { if(hT) e = hT->E[h].items[i]; else e = items + i; - uint32_t hC = S[i].x.bits64[2] & HASH_MASK; - ok = (hC == h) && (S[i].x.bits64[0] == e->x.i64[0]) && (S[i].x.bits64[1] == e->x.i64[1]); + ok = (S[i].x.bits64[0] == e->x.i64[0]) && (S[i].x.bits64[1] == e->x.i64[1]) && (S[i].x.bits64[2] == e->x.i64[2]) && (S[i].x.bits64[3] == e->x.i64[3]);; if(!ok) nbWrong++; //if(!ok) { // ::printf("\nCheckWorkFile wrong at: %06X [%d]\n",h,i); diff --git a/GPU/.GPUMath.h.swp b/GPU/.GPUMath.h.swp new file mode 100644 index 0000000..0f2a33e Binary files /dev/null and b/GPU/.GPUMath.h.swp differ diff --git a/GPU/GPUCompute.h b/GPU/GPUCompute.h index 95a358a..878ceb0 100644 --- a/GPU/GPUCompute.h +++ b/GPU/GPUCompute.h @@ -23,7 +23,7 @@ __device__ void ComputeKangaroos(uint64_t *kangaroos,uint32_t maxFound,uint32_t uint64_t px[GPU_GRP_SIZE][4]; uint64_t py[GPU_GRP_SIZE][4]; - uint64_t dist[GPU_GRP_SIZE][2]; + uint64_t dist[GPU_GRP_SIZE][4]; #ifdef USE_SYMMETRY uint64_t lastJump[GPU_GRP_SIZE]; #endif diff --git a/GPU/GPUEngine.cu b/GPU/GPUEngine.cu index 86be42b..634ae6c 100644 --- a/GPU/GPUEngine.cu +++ b/GPU/GPUEngine.cu @@ -121,6 +121,8 @@ int _ConvertSMVer2Cores(int major,int minor) { { 0x70, 64 }, { 0x72, 64 }, { 0x75, 64 }, + { 0x80, 64 }, + { 0x86, 64 }, { -1, -1 } }; int index = 0; @@ -392,16 +394,18 @@ void GPUEngine::SetKangaroos(Int *px,Int *py,Int *d) { for(int t = 0; t < nbThreadPerGroup; t++) { // X - inputKangarooPinned[g * strideSize + t + 0 * nbThreadPerGroup] = px[idx].bits64[0]; - inputKangarooPinned[g * strideSize + t + 1 * nbThreadPerGroup] = px[idx].bits64[1]; - inputKangarooPinned[g * strideSize + t + 2 * nbThreadPerGroup] = px[idx].bits64[2]; - inputKangarooPinned[g * strideSize + t + 3 * nbThreadPerGroup] = px[idx].bits64[3]; + Int tpx = px[idx]; + inputKangarooPinned[g * strideSize + t + 0 * nbThreadPerGroup] = tpx.bits64[0]; + inputKangarooPinned[g * strideSize + t + 1 * nbThreadPerGroup] = tpx.bits64[1]; + inputKangarooPinned[g * strideSize + t + 2 * nbThreadPerGroup] = tpx.bits64[2]; + inputKangarooPinned[g * strideSize + t + 3 * nbThreadPerGroup] = tpx.bits64[3]; // Y - inputKangarooPinned[g * strideSize + t + 4 * nbThreadPerGroup] = py[idx].bits64[0]; - inputKangarooPinned[g * strideSize + t + 5 * nbThreadPerGroup] = py[idx].bits64[1]; - inputKangarooPinned[g * strideSize + t + 6 * nbThreadPerGroup] = py[idx].bits64[2]; - inputKangarooPinned[g * strideSize + t + 7 * nbThreadPerGroup] = py[idx].bits64[3]; + Int tpy = py[idx]; + inputKangarooPinned[g * strideSize + t + 4 * nbThreadPerGroup] = tpy.bits64[0]; + inputKangarooPinned[g * strideSize + t + 5 * nbThreadPerGroup] = tpy.bits64[1]; + inputKangarooPinned[g * strideSize + t + 6 * nbThreadPerGroup] = tpy.bits64[2]; + inputKangarooPinned[g * strideSize + t + 7 * nbThreadPerGroup] = tpy.bits64[3]; // Distance Int dOff; @@ -409,10 +413,11 @@ void GPUEngine::SetKangaroos(Int *px,Int *py,Int *d) { if(idx % 2 == WILD) dOff.ModAddK1order(&wildOffset); inputKangarooPinned[g * strideSize + t + 8 * nbThreadPerGroup] = dOff.bits64[0]; inputKangarooPinned[g * strideSize + t + 9 * nbThreadPerGroup] = dOff.bits64[1]; - + inputKangarooPinned[g * stridesize + t + 10 * nbThreadPerGroup] = dOff.bits64[2]; + inputKangarooPinned[g * strideSize + t + 11 * nbThreadPerGroup] = dOff.bits64[3]; #ifdef USE_SYMMETRY // Last jump - inputKangarooPinned[t + 10 * nbThreadPerGroup] = (uint64_t)NB_JUMP; + inputKangarooPinned[t + 12 * nbThreadPerGroup] = (uint64_t)NB_JUMP; #endif idx++; @@ -490,6 +495,7 @@ void GPUEngine::GetKangaroos(Int *px,Int *py,Int *d) { } +// I think this is for public leys and initial distances void GPUEngine::SetKangaroo(uint64_t kIdx,Int *px,Int *py,Int *d) { int gSize = KSIZE * GPU_GRP_SIZE; @@ -528,11 +534,15 @@ void GPUEngine::SetKangaroo(uint64_t kIdx,Int *px,Int *py,Int *d) { cudaMemcpy(inputKangaroo + (b * blockSize + g * strideSize + t + 8 * nbThreadPerGroup),inputKangarooPinned,8,cudaMemcpyHostToDevice); inputKangarooPinned[0] = dOff.bits64[1]; cudaMemcpy(inputKangaroo + (b * blockSize + g * strideSize + t + 9 * nbThreadPerGroup),inputKangarooPinned,8,cudaMemcpyHostToDevice); + inputKangarooPinned[0] = dOff.bits64[2]; + cudaMemcpy(inputKangaroo + (b * blockSize + g * strideSize + t + 10 * nbThreadPerGroup),inputKangarooPinned,8,cudaMemcpyHostToDevice); + inputKangarooPinned[0] = dOff.bits64[3]; + cudaMemcpy(inputKangaroo + (b * blockSize + g * strideSize + t + 11 * nbThreadPerGroup),inputKangarooPinned,8,cudaMemcpyHostToDevice); #ifdef USE_SYMMETRY // Last jump inputKangarooPinned[0] = (uint64_t)NB_JUMP; - cudaMemcpy(inputKangaroo + (b * blockSize + g * strideSize + t + 10 * nbThreadPerGroup),inputKangarooPinned,8,cudaMemcpyHostToDevice); + cudaMemcpy(inputKangaroo + (b * blockSize + g * strideSize + t + 12 * nbThreadPerGroup),inputKangarooPinned,8,cudaMemcpyHostToDevice);" #endif } @@ -561,8 +571,8 @@ void GPUEngine::SetParams(uint64_t dpMask,Int *distance,Int *px,Int *py) { this->dpMask = dpMask; for(int i=0;i< NB_JUMP;i++) - memcpy(jumpPinned + 2*i,distance[i].bits64,16); - cudaMemcpyToSymbol(jD,jumpPinned,jumpSize/2); + memcpy(jumpPinned + 4*i,distance[i].bits64,32); + cudaMemcpyToSymbol(jD,jumpPinned,jumpSize); cudaError_t err = cudaGetLastError(); if(err != cudaSuccess) { printf("GPUEngine: SetParams: Failed to copy to constant memory: %s\n",cudaGetErrorString(err)); @@ -666,8 +676,8 @@ bool GPUEngine::Launch(std::vector &hashFound,bool spinWait) { uint64_t *d = (uint64_t *)(itemPtr + 8); it.d.bits64[0] = d[0]; it.d.bits64[1] = d[1]; - it.d.bits64[2] = 0; - it.d.bits64[3] = 0; + it.d.bits64[2] = d[2]; + it.d.bits64[3] = d[3]; it.d.bits64[4] = 0; if(it.kIdx % 2 == WILD) it.d.ModSubK1order(&wildOffset); diff --git a/GPU/GPUEngine.h b/GPU/GPUEngine.h index 8f61099..7131bd7 100644 --- a/GPU/GPUEngine.h +++ b/GPU/GPUEngine.h @@ -34,7 +34,8 @@ typedef struct { Int x; Int d; - uint64_t kIdx; + uint64_t kIdx; // Appears like this is used as kType + uint64_t h; } ITEM; class GPUEngine { @@ -44,9 +45,9 @@ class GPUEngine { GPUEngine(int nbThreadGroup,int nbThreadPerGroup,int gpuId,uint32_t maxFound); ~GPUEngine(); void SetParams(uint64_t dpMask,Int *distance,Int *px,Int *py); - void SetKangaroos(Int *px,Int *py,Int *d); + void SetKangaroos(uint64_t kIdx, Int *px,Int *py,Int *d); void GetKangaroos(Int *px,Int *py,Int *d); - void SetKangaroo(uint64_t kIdx,Int *px,Int *py,Int *d); + void SetKangaroo(Int *px,Int *py,Int *d); bool Launch(std::vector &hashFound,bool spinWait = false); void SetWildOffset(Int *offset); int GetNbThread(); diff --git a/GPU/GPUGenerate.cpp b/GPU/GPUGenerate.cpp index 5f4782d..c531724 100644 --- a/GPU/GPUGenerate.cpp +++ b/GPU/GPUGenerate.cpp @@ -24,13 +24,13 @@ using namespace std; void GPUEngine::GenerateCode(Secp256K1 *secp) { // Compute generator table - Int jumpDistance[129]; - Point jumpPoint[129]; + Int jumpDistance[257]; + Point jumpPoint[257]; // Kangaroo jumps jumpPoint[0] = secp->G; jumpDistance[0].SetInt32(1); - for(int i = 1; i < 129; ++i) { + for(int i = 1; i < 257; ++i) { jumpDistance[i].Add(&jumpDistance[i - 1],&jumpDistance[i - 1]); jumpPoint[i] = secp->DoubleDirect(jumpPoint[i - 1]); } @@ -40,22 +40,22 @@ void GPUEngine::GenerateCode(Secp256K1 *secp) { fprintf(f,"// File generated by GPUEngine::GenerateCode()\n"); - fprintf(f,"// Jump distance table (Contains 1,2,4,...,2^129\n"); + fprintf(f,"// Jump distance table (Contains 1,2,4,...,2^257\n"); fprintf(f,"__device__ __constant__ uint64_t jD[][4] = {\n"); - for(int i = 0; i < 129; i++) { + for(int i = 0; i < 257; i++) { fprintf(f," %s,\n",jumpDistance[i].GetC64Str(4).c_str()); } fprintf(f,"};\n"); - fprintf(f,"// Jump point table (Contains G,2G,4G,...,2^129.G)\n"); + fprintf(f,"// Jump point table (Contains G,2G,4G,...,2^257.G)\n"); fprintf(f,"__device__ __constant__ uint64_t jPx[][4] = {\n"); - for(int i = 0; i < 129; i++) { + for(int i = 0; i < 257; i++) { fprintf(f," %s,\n",jumpPoint[i].x.GetC64Str(4).c_str()); } fprintf(f,"};\n"); fprintf(f,"__device__ __constant__ uint64_t jPy[][4] = {\n"); - for(int i = 0; i < 129; i++) { + for(int i = 0; i < 257; i++) { fprintf(f," %s,\n",jumpPoint[i].y.GetC64Str(4).c_str()); } fprintf(f,"};\n\n"); diff --git a/GPU/GPUMath.h b/GPU/GPUMath.h index b67e005..67fff07 100644 --- a/GPU/GPUMath.h +++ b/GPU/GPUMath.h @@ -48,7 +48,7 @@ #define MADDS(r,a,b,c) asm volatile ("madc.hi.s64 %0, %1, %2, %3;" : "=l"(r) : "l"(a), "l"(b), "l"(c)); // Jump distance -__device__ __constant__ uint64_t jD[NB_JUMP][2]; +__device__ __constant__ uint64_t jD[NB_JUMP][4]; // jump points __device__ __constant__ uint64_t jPx[NB_JUMP][4]; __device__ __constant__ uint64_t jPy[NB_JUMP][4]; @@ -122,6 +122,14 @@ __device__ __constant__ uint64_t _O[] = { 0xBFD25E8CD0364141ULL,0xBAAEDCE6AF48A0 // --------------------------------------------------------------------------------------- +#define Add256(r,a) { \ + UADDO1((r)[0], (a)[0]); \ + UADDO1((r)[1], (a)[1]); \ + UADDO1((r)[2], (a)[2]); \ + UADD1((r)[3], (a)[3]);} + +// --------------------------------------------------------------------------------------- + #define Neg(r) {\ USUBO(r[0],0ULL,r[0]); \ USUBC(r[1],0ULL,r[1]); \ @@ -183,16 +191,20 @@ out[pos*ITEM_SIZE32 + 9] = ((uint32_t *)d)[0]; \ out[pos*ITEM_SIZE32 + 10] = ((uint32_t *)d)[1]; \ out[pos*ITEM_SIZE32 + 11] = ((uint32_t *)d)[2]; \ out[pos*ITEM_SIZE32 + 12] = ((uint32_t *)d)[3]; \ -out[pos*ITEM_SIZE32 + 13] = ((uint32_t *)idx)[0]; \ -out[pos*ITEM_SIZE32 + 14] = ((uint32_t *)idx)[1]; \ +out[pos*ITEM_SIZE32 + 13] = ((uint32_t *)d)[4]; \ +out[pos*ITEM_SIZE32 + 14] = ((uint32_t *)d)[5]; \ +out[pos*ITEM_SIZE32 + 15] = ((uint32_t *)d)[6]; \ +out[pos*ITEM_SIZE32 + 16] = ((uint32_t *)d)[7]; \ +out[pos*ITEM_SIZE32 + 17] = ((uint32_t *)idx)[0]; \ +out[pos*ITEM_SIZE32 + 18] = ((uint32_t *)idx)[1]; \ } // --------------------------------------------------------------------------------------- #ifdef USE_SYMMETRY -__device__ void LoadKangaroos(uint64_t *a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][2],uint64_t *jumps) { +__device__ void LoadKangaroos(uint64_t *a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][4],uint64_t *jumps) { #else -__device__ void LoadKangaroos(uint64_t * a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][2]) { +__device__ void LoadKangaroos(uint64_t * a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][4]) { #endif __syncthreads(); @@ -216,15 +228,17 @@ __device__ void LoadKangaroos(uint64_t * a,uint64_t px[GPU_GRP_SIZE][4],uint64_t d64[0] = (a)[IDX + 8 * blockDim.x + stride]; d64[1] = (a)[IDX + 9 * blockDim.x + stride]; + d64[2] = (a)[IDX + 10 * blockDim.x + stride]; + d64[3] = (a)[IDX + 11 * blockDim.x + stride]; #ifdef USE_SYMMETRY - jumps[g] = (a)[IDX + 10 * blockDim.x + stride]; + jumps[g] = (a)[IDX + 12 * blockDim.x + stride]; #endif } } -__device__ void LoadDists(uint64_t* a,uint64_t dist[GPU_GRP_SIZE][2]) { +__device__ void LoadDists(uint64_t* a,uint64_t dist[GPU_GRP_SIZE][4]) { __syncthreads(); @@ -235,6 +249,8 @@ __device__ void LoadDists(uint64_t* a,uint64_t dist[GPU_GRP_SIZE][2]) { d64[0] = (a)[IDX + 8 * blockDim.x + stride]; d64[1] = (a)[IDX + 9 * blockDim.x + stride]; + d64[2] = (a)[IDX + 10 * blockDim.x + stride]; + d64[3] = (a)[IDX + 11 * blockDim.x + stride]; } @@ -271,9 +287,9 @@ __device__ void LoadKangaroo(uint64_t* a,uint32_t stride,uint64_t px[4]) { // --------------------------------------------------------------------------------------- #ifdef USE_SYMMETRY -__device__ void StoreKangaroos(uint64_t *a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][2],uint64_t *jumps) { +__device__ void StoreKangaroos(uint64_t *a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][4],uint64_t *jumps) { #else -__device__ void StoreKangaroos(uint64_t * a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][2]) { +__device__ void StoreKangaroos(uint64_t * a,uint64_t px[GPU_GRP_SIZE][4],uint64_t py[GPU_GRP_SIZE][4],uint64_t dist[GPU_GRP_SIZE][4]) { #endif __syncthreads(); @@ -296,9 +312,11 @@ __device__ void StoreKangaroos(uint64_t * a,uint64_t px[GPU_GRP_SIZE][4],uint64_ (a)[IDX + 8 * blockDim.x + stride] = d64[0]; (a)[IDX + 9 * blockDim.x + stride] = d64[1]; + (a)[IDX + 10 * blockDim.x + stride] = d64[2]; + (a)[IDX + 11 * blockDim.x + stride] = d64[3]; #ifdef USE_SYMMETRY - (a)[IDX + 10 * blockDim.x + stride] = jumps[g]; + (a)[IDX + 12 * blockDim.x + stride] = jumps[g]; #endif } @@ -321,7 +339,7 @@ __device__ void StoreKangaroo(uint64_t* a,uint32_t stride,uint64_t px[4],uint64_ } -__device__ void StoreDists(uint64_t* a,uint64_t dist[GPU_GRP_SIZE][2]) { +__device__ void StoreDists(uint64_t* a,uint64_t dist[GPU_GRP_SIZE][4]) { __syncthreads(); @@ -331,6 +349,8 @@ __device__ void StoreDists(uint64_t* a,uint64_t dist[GPU_GRP_SIZE][2]) { (a)[IDX + 8 * blockDim.x + stride] = d64[0]; (a)[IDX + 9 * blockDim.x + stride] = d64[1]; + (a)[IDX + 10 * blockDim.x + stride] = d64[2]; + (a)[IDX + 11 * blockDim.x + stride] = d64[3]; } diff --git a/HashTable.cpp b/HashTable.cpp index 20ab658..06a3e59 100644 --- a/HashTable.cpp +++ b/HashTable.cpp @@ -54,13 +54,19 @@ uint64_t HashTable::GetNbItem() { } -ENTRY *HashTable::CreateEntry(int128_t *x,int128_t *d) { +ENTRY *HashTable::CreateEntry(int256_t *x,int256_t *d, uint32_t kType) { ENTRY *e = (ENTRY *)malloc(sizeof(ENTRY)); e->x.i64[0] = x->i64[0]; e->x.i64[1] = x->i64[1]; + e->x.i64[2] = x->i64[2]; + e->x.i64[3] = x->i64[3]; e->d.i64[0] = d->i64[0]; e->d.i64[1] = d->i64[1]; + e->d.i64[2] = d->i64[2]; + e->d.i64[3] = d->i64[3]; + e->kType = kType; + return e; } @@ -72,31 +78,26 @@ ENTRY *HashTable::CreateEntry(int128_t *x,int128_t *d) { E[h].items[st] = entry; \ E[h].nbItem++;} -void HashTable::Convert(Int *x,Int *d,uint32_t type,uint64_t *h,int128_t *X,int128_t *D) { - - uint64_t sign = 0; - uint64_t type64 = (uint64_t)type << 62; - - X->i64[0] = x->bits64[0]; - X->i64[1] = x->bits64[1]; - - // Probability of failure (1/2^128) - if(d->bits64[3] > 0x7FFFFFFFFFFFFFFFULL) { - Int N(d); - N.ModNegK1order(); - D->i64[0] = N.bits64[0]; - D->i64[1] = N.bits64[1] & 0x3FFFFFFFFFFFFFFFULL; - sign = 1ULL << 63; - } else { - D->i64[0] = d->bits64[0]; - D->i64[1] = d->bits64[1] & 0x3FFFFFFFFFFFFFFFULL; - } +void HashTable::toint256t(Int *a, int256_t *b) +{ + b->i64[0] = a->bits64[0]; + b->i64[1] = a->bits64[1]; + b->i64[2] = a->bits64[2]; + b->i64[3] = a->bits64[3]; +} - D->i64[1] |= sign; - D->i64[1] |= type64; +void HashTable::toInt(int256_t *a, Int *b) +{ + b->bits64[0] = a->i64[0]; + b->bits64[1] = a->i64[1]; + b->bits64[2] = a->i64[2]; + b->bits64[3] = a->i64[3]; +} - *h = (x->bits64[2] & HASH_MASK); +void HashTable::Convert(Int *x,Int *d,int256_t *X,int256_t *D) { + toint256t(x,X); + toint256t(d,D); } @@ -157,12 +158,14 @@ int HashTable::MergeH(uint32_t h,FILE* f1,FILE* f2,FILE* fd,uint32_t* nbDP,uint3 AV1(); nb1--; } else if (comp==0) { - if((e1.d.i64[0] == e2.d.i64[0]) && (e1.d.i64[1] == e2.d.i64[1])) { + if((e1.d.i64[0] == e2.d.i64[0]) && (e1.d.i64[1] == e2.d.i64[1]) && (e1.d.i64[2] == e2.d.i64[2]) && (e1.d.i64[3] == e2.d.i64[3])) { *duplicate = *duplicate + 1; } else { // Collision - CalcDistAndType(e1.d,d1,k1); - CalcDistAndType(e2.d,d2,k2); + *k1 = e1.kType; + *k2 = e2.kType; + CalcDist(&(e1.d),d1); + CalcDist(&(e2.d),d2); collisionFound = true; } memcpy(output + nbd,&e1,32); @@ -220,11 +223,11 @@ int HashTable::MergeH(uint32_t h,FILE* f1,FILE* f2,FILE* fd,uint32_t* nbDP,uint3 int HashTable::Add(Int *x,Int *d,uint32_t type) { - int128_t X; - int128_t D; - uint64_t h; - Convert(x,d,type,&h,&X,&D); - ENTRY* e = CreateEntry(&X,&D); + int256_t X; + int256_t D; + Convert(x,d,&X,&D); + uint64_t h = (x->bits64[0] ^ x->bits64[1] ^ x->bits64[2] ^ x->bits64[3]) % HASH_SIZE; + ENTRY* e = CreateEntry(&X,&D,type); return Add(h,e); } @@ -239,28 +242,19 @@ void HashTable::ReAllocate(uint64_t h,uint32_t add) { } -int HashTable::Add(uint64_t h,int128_t *x,int128_t *d) { - - ENTRY *e = CreateEntry(x,d); +int HashTable::Add(int256_t *x,int256_t *d, uint32_t type) { + uint64_t h = (x->i64[0] ^ x->i64[1] ^ x->i64[2] ^ x->i64[3]) % HASH_SIZE; + ENTRY *e = CreateEntry(x,d,type); return Add(h,e); } -void HashTable::CalcDistAndType(int128_t d,Int* kDist,uint32_t* kType) { - - *kType = (d.i64[1] & 0x4000000000000000ULL) != 0; - int sign = (d.i64[1] & 0x8000000000000000ULL) != 0; - d.i64[1] &= 0x3FFFFFFFFFFFFFFFULL; - +void HashTable::CalcDist(int256_t *d,Int* kDist) { kDist->SetInt32(0); - kDist->bits64[0] = d.i64[0]; - kDist->bits64[1] = d.i64[1]; - if(sign) kDist->ModNegK1order(); - + toInt(d,kDist); } int HashTable::Add(uint64_t h,ENTRY* e) { - if(E[h].maxItem == 0) { E[h].maxItem = 16; E[h].items = (ENTRY **)malloc(sizeof(ENTRY *) * E[h].maxItem); @@ -286,14 +280,22 @@ int HashTable::Add(uint64_t h,ENTRY* e) { if(comp<0) { ed = mi - 1; } else if (comp==0) { - - if((e->d.i64[0] == GET(h,mi)->d.i64[0]) && (e->d.i64[1] == GET(h,mi)->d.i64[1])) { - // Same point added 2 times or collision in same herd ! - return ADD_DUPLICATE; + ENTRY *ent = GET(h,mi); + uint64_t d10 = e->d.i64[0]; + uint64_t d11 = e->d.i64[1]; + uint64_t d12 = e->d.i64[2]; + uint64_t d13 = e->d.i64[3]; + uint64_t d20 = ent->d.i64[0]; + uint64_t d21 = ent->d.i64[1]; + uint64_t d22 = ent->d.i64[2]; + uint64_t d23 = ent->d.i64[3]; + if (d10 == d20 && d11 == d21 && d12 == d22 || d13 == d23) { + // Same point added twice or collision in the same herd! + return ADD_DUPLICATE; } - // Collision - CalcDistAndType(GET(h,mi)->d , &kDist, &kType); + kType = ent->kType; + CalcDist(&(ent->d), &kDist); return ADD_COLLISION; } else { @@ -306,19 +308,27 @@ int HashTable::Add(uint64_t h,ENTRY* e) { } -int HashTable::compare(int128_t *i1,int128_t *i2) { +int HashTable::compare(int256_t *i1,int256_t *i2) { uint64_t *a = i1->i64; uint64_t *b = i2->i64; - if(a[1] == b[1]) { - if(a[0] == b[0]) { - return 0; + if(a[3] == b[3]) { + if(a[2] == b[2]) { + if(a[1] == b[1]) { + if(a[0] == b[0]) { + return 0; + } else { + return (a[0] > b[0]) ? 1 : -1; + } + } else { + return (a[1] > b[1]) ? 1 : -1; + } } else { - return (a[0] > b[0]) ? 1 : -1; + return (a[2] > b[2]) ? 1 : -1; } } else { - return (a[1] > b[1]) ? 1 : -1; + return (a[3] > b[3]) ? 1 : -1; } } @@ -356,7 +366,7 @@ std::string HashTable::GetSizeInfo() { } -std::string HashTable::GetStr(int128_t *i) { +std::string HashTable::GetStr(int256_t *i) { std::string ret; char tmp[256]; @@ -381,8 +391,9 @@ void HashTable::SaveTable(FILE* f,uint32_t from,uint32_t to,bool printPoint) { fwrite(&E[h].nbItem,sizeof(uint32_t),1,f); fwrite(&E[h].maxItem,sizeof(uint32_t),1,f); for(uint32_t i = 0; i < E[h].nbItem; i++) { - fwrite(&(E[h].items[i]->x),16,1,f); - fwrite(&(E[h].items[i]->d),16,1,f); + fwrite(&(E[h].items[i]->x),32,1,f); + fwrite(&(E[h].items[i]->d),32,1,f); + fwrite(&(E[h].items[i]->kType),4,1,f); if(printPoint) { pointPrint++; if(pointPrint > point) { @@ -451,8 +462,9 @@ void HashTable::LoadTable(FILE* f,uint32_t from,uint32_t to) { for(uint32_t i = 0; i < E[h].nbItem; i++) { ENTRY* e = (ENTRY*)malloc(sizeof(ENTRY)); - fread(&(e->x),16,1,f); - fread(&(e->d),16,1,f); + fread(&(e->x),32,1,f); + fread(&(e->d),32,1,f); + fread(&(e->kType),4,1,f); E[h].items[i] = e; } diff --git a/HashTable.h b/HashTable.h index e59d898..6de0aef 100644 --- a/HashTable.h +++ b/HashTable.h @@ -42,16 +42,23 @@ union int128_s { }; +union int256_s { + uint8_t i8[32]; + uint16_t i16[16]; + uint32_t i32[8]; + uint64_t i64[4]; +}; + typedef union int128_s int128_t; +typedef union int256_s int256_t; #define safe_free(x) if(x) {free(x);x=NULL;} -// We store only 128 (+18) bit a the x value which give a probabilty a wrong collision after 2^73 entries - typedef struct { - int128_t x; // Poisition of kangaroo (128bit LSB) - int128_t d; // Travelled distance (b127=sign b126=kangaroo type, b125..b0 distance + int256_t x; // Position of kangaroo (256bit LSB) + int256_t d; // Travelled distance (b255..b0 distance) + uint32_t kType; // Kangaroo type } ENTRY; @@ -68,8 +75,8 @@ class HashTable { public: HashTable(); - int Add(Int *x,Int *d,uint32_t type); - int Add(uint64_t h,int128_t *x,int128_t *d); + int Add(Int *x,Int *d, uint32_t type); + int Add(int256_t *x,int256_t *d, uint32_t type); int Add(uint64_t h,ENTRY *e); uint64_t GetNbItem(); void Reset(); @@ -88,17 +95,17 @@ class HashTable { Int kDist; uint32_t kType; - static void Convert(Int *x,Int *d,uint32_t type,uint64_t *h,int128_t *X,int128_t *D); + static void Convert(Int *x,Int *d,int256_t *X,int256_t *D); static int MergeH(uint32_t h,FILE* f1,FILE* f2,FILE* fd,uint32_t *nbDP,uint32_t* duplicate, Int* d1,uint32_t* k1,Int* d2,uint32_t* k2); - static void CalcDistAndType(int128_t d,Int* kDist,uint32_t* kType); - + static void CalcDist(int256_t *d,Int* kDist); + static void toint256t(Int *a, int256_t *b); + static void toInt(int256_t *a, Int *b); private: - ENTRY *CreateEntry(int128_t *x,int128_t *d); - static int compare(int128_t *i1,int128_t *i2); - std::string GetStr(int128_t *i); - + ENTRY *CreateEntry(int256_t *x,int256_t *d, uint32_t kType); + static int compare(int256_t *i1,int256_t *i2); + std::string GetStr(int256_t *i); }; #endif // HASHTABLEH diff --git a/Kangaroo.cpp b/Kangaroo.cpp index 3310474..863e2e5 100644 --- a/Kangaroo.cpp +++ b/Kangaroo.cpp @@ -145,9 +145,12 @@ bool Kangaroo::ParseConfigFile(std::string &fileName) { // ---------------------------------------------------------------------------- -bool Kangaroo::IsDP(uint64_t x) { +bool Kangaroo::IsDP(Int *x) { - return (x & dMask) == 0; + return ((x->bits64[3] & dMask.i64[3]) == 0) && + ((x->bits64[2] & dMask.i64[2]) == 0) && + ((x->bits64[1] & dMask.i64[1]) == 0) && + ((x->bits64[0] & dMask.i64[0]) == 0); } @@ -155,18 +158,23 @@ void Kangaroo::SetDP(int size) { // Mask for distinguised point dpSize = size; - if(dpSize == 0) { - dMask = 0; - } else { - if(dpSize > 64) dpSize = 64; - dMask = (1ULL << (64 - dpSize)) - 1; - dMask = ~dMask; + dMask.i64[0] = 0; + dMask.i64[1] = 0; + dMask.i64[2] = 0; + dMask.i64[3] = 0; + if (dpSize > 0) { + if(dpSize > 256) dpSize = 256; + for (int i = 0; i < size; i += 64) { + int end = (i + 64 > size) ? (size-1) % 64 : 63; + uint64_t mask = ((1ULL << end) - 1) << 1 | 1ULL; + dMask.i64[(int)(i/64)] = mask; + } } #ifdef WIN64 - ::printf("DP size: %d [0x%016I64X]\n",dpSize,dMask); + ::printf("DP size: %d [0x%016I64X%016I64%016IX64X%016I64X]\n",dpSize,dMask.i64[3],dMask.i64[2],dMask.i64[1],dMask.i64[0]); #else - ::printf("DP size: %d [0x%" PRIx64 "]\n",dpSize,dMask); + ::printf("DP size: %d [0x%" PRIx64 "%" PRIx64 "%" PRIx64 "%" PRIx64 "]\n",dpSize,dMask.i64[3],dMask.i64[2],dMask.i64[1],dMask.i64[0]); #endif } @@ -274,7 +282,7 @@ bool Kangaroo::CollisionCheck(Int* d1,uint32_t type1,Int* d2,uint32_t type2) { } endOfSearch = CheckKey(Td,Wd,0) || CheckKey(Td,Wd,1) || CheckKey(Td,Wd,2) || CheckKey(Td,Wd,3); - + // TODO we can literally attack any point around Td+Wd, but which??? if(!endOfSearch) { // Should not happen, reset the kangaroo @@ -313,14 +321,13 @@ bool Kangaroo::AddToTable(Int *pos,Int *dist,uint32_t kType) { } -bool Kangaroo::AddToTable(uint64_t h,int128_t *x,int128_t *d) { +bool Kangaroo::AddToTable(int256_t *x,int256_t *d, uint32_t kType) { - int addStatus = hashTable.Add(h,x,d); + int addStatus = hashTable.Add(x,d,kType); if(addStatus== ADD_COLLISION) { Int dist; - uint32_t kType; - HashTable::CalcDistAndType(*d,&dist,&kType); + HashTable::toInt(d,&dist); return CollisionCheck(&hashTable.kDist,hashTable.kType,&dist,kType); } @@ -436,7 +443,7 @@ void Kangaroo::SolveKeyCPU(TH_PARAM *ph) { // Send DP to server for(int g = 0; g < CPU_GRP_SIZE; g++) { - if(IsDP(ph->px[g].bits64[3])) { + if(IsDP(&ph->px[g])) { ITEM it; it.x.Set(&ph->px[g]); it.d.Set(&ph->distance[g]); @@ -460,7 +467,7 @@ void Kangaroo::SolveKeyCPU(TH_PARAM *ph) { // Add to table and collision check for(int g = 0; g < CPU_GRP_SIZE && !endOfSearch; g++) { - if(IsDP(ph->px[g].bits64[3])) { + if(IsDP(&ph->px[g])) { LOCK(ghMutex); if(!endOfSearch) { @@ -747,7 +754,7 @@ void Kangaroo::CreateJumpTable() { int jumpBit = rangePower / 2 + 1; #endif - if(jumpBit > 128) jumpBit = 128; + if(jumpBit > 256) jumpBit = 256; int maxRetry = 100; bool ok = false; double distAvg; diff --git a/Kangaroo.h b/Kangaroo.h index 70f5e34..873d7af 100644 --- a/Kangaroo.h +++ b/Kangaroo.h @@ -74,7 +74,6 @@ typedef struct { Int *px; // Kangaroo position Int *py; // Kangaroo position Int *distance; // Travelled distance - #ifdef USE_SYMMETRY uint64_t *symClass; // Last jump #endif @@ -94,9 +93,8 @@ typedef struct { typedef struct { uint32_t kIdx; - uint32_t h; - int128_t x; - int128_t d; + int256_t x; + int256_t d; } DP; @@ -161,15 +159,17 @@ class Kangaroo { private: - bool IsDP(uint64_t x); + bool IsDP(Int *x); void SetDP(int size); void CreateHerd(int nbKangaroo,Int *px, Int *py, Int *d, int firstType,bool lock=true); void CreateJumpTable(); - bool AddToTable(uint64_t h,int128_t *x,int128_t *d); - bool AddToTable(Int *pos,Int *dist,uint32_t kType); + bool AddToTable(uint64_t h,int256_t *x,int256_t *d); + bool AddToTable(int256_t *x,int256_t *d, uint32_t kType); + bool AddToTable(uint64_t h, int256_t *x,int256_t *d, uint32_t kType); + bool AddToTable(Int *pos,Int *dist, uint32_t kType); bool SendToServer(std::vector &dp,uint32_t threadId,uint32_t gpuId); bool CheckKey(Int d1,Int d2,uint8_t type); - bool CollisionCheck(Int* d1,uint32_t type1,Int* d2,uint32_t type2); + bool CollisionCheck(Int* d1, uint32_t type1,Int* d2, uint32_t type2); void ComputeExpected(double dp,double *op,double *ram,double* overHead = NULL); void InitRange(); void InitSearchKey(); @@ -182,6 +182,7 @@ class Kangaroo { void SaveServerWork(); void FetchWalks(uint64_t nbWalk,Int *x,Int *y,Int *d); void FetchWalks(uint64_t nbWalk,std::vector& kangs,Int* x,Int* y,Int* d); + void FetchWalks(uint64_t nbWalk,std::vector& kangs,Int* x,Int* y,Int* d); void FectchKangaroos(TH_PARAM *threads); FILE *ReadHeader(std::string fileName,uint32_t *version,int type); bool SaveHeader(std::string fileName,FILE* f,int type,uint64_t totalCount,double totalTime); @@ -204,8 +205,8 @@ class Kangaroo { void InitSocket(); void WaitForServer(); int32_t GetServerStatus(); - bool SendKangaroosToServer(std::string& fileName,std::vector& kangs); - bool GetKangaroosFromServer(std::string& fileName,std::vector& kangs); + bool SendKangaroosToServer(std::string& fileName,std::vector& kangs); + bool GetKangaroosFromServer(std::string& fileName,std::vector& kangs); #ifdef WIN64 HANDLE ghMutex; @@ -243,7 +244,7 @@ class Kangaroo { Int rangeWidthDiv4; Int rangeWidthDiv8; - uint64_t dMask; + int256_t dMask; uint32_t dpSize; int32_t initDPSize; uint64_t collisionInSameHerd; diff --git a/Network.cpp b/Network.cpp index e6fc10f..2d440b3 100644 --- a/Network.cpp +++ b/Network.cpp @@ -336,7 +336,7 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { uint64_t nbKangaroo = 0; uint32_t strSize; char fileName[256]; - int128_t* KBuff; + int256_t* KBuff; uint32_t nbK; uint32_t header = HEADKS; uint32_t version = 0; @@ -378,7 +378,7 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { PUT("nbKangaroo",p->clientSock,&nbKangaroo,sizeof(uint64_t),ntimeout); checkSum.SetInt32(0); - KBuff = (int128_t*)malloc(KANG_PER_BLOCK * sizeof(int128_t)); + KBuff = (int256_t*)malloc(KANG_PER_BLOCK * sizeof(int256_t)); while(nbKangaroo > 0) { @@ -389,15 +389,17 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { } for(uint32_t k = 0; k < nbK; k++) { - ::fread(&KBuff[k],16,1,f); + ::fread(&KBuff[k],32,1,f); // Checksum K.SetInt32(0); + K.bits64[3] = KBuff[k].i64[3]; + K.bits64[2] = KBuff[k].i64[2]; K.bits64[1] = KBuff[k].i64[1]; K.bits64[0] = KBuff[k].i64[0]; checkSum.Add(&K); } - PUTFREE("packet",p->clientSock,KBuff,nbK * 16,ntimeout,KBuff); + PUTFREE("packet",p->clientSock,KBuff,nbK * 32,ntimeout,KBuff); nbKangaroo -= nbK; @@ -422,7 +424,7 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { uint32_t fileNameSize; char fileNameTmp[264]; char fileName[256]; - int128_t *KBuff; + int256_t *KBuff; uint32_t nbK; uint32_t header = HEADKS; uint32_t version = 0; @@ -440,7 +442,7 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { strcpy(fileNameTmp,fileName); strcat(fileNameTmp,".tmp"); - FILE* f = fopen(fileNameTmp,"wb"); + FILE* f = fopen(fileNameTmp,"wb"); if(f == NULL) { ::printf("\nCannot open %s for writing\n",fileNameTmp); ::printf("%s\n",::strerror(errno)); @@ -459,7 +461,7 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { checkSum.SetInt32(0); - KBuff = (int128_t *)malloc(KANG_PER_BLOCK*sizeof(int128_t)); + KBuff = (int256_t *)malloc(KANG_PER_BLOCK*sizeof(int256_t)); while(nbKangaroo>0) { @@ -469,12 +471,14 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { nbK = (uint32_t)nbKangaroo; } - GETFREE("packet",p->clientSock,KBuff,nbK * 16,ntimeout,KBuff); + GETFREE("packet",p->clientSock,KBuff,nbK * 32,ntimeout,KBuff); for(uint32_t k = 0; k < nbK; k++) { - ::fwrite(&KBuff[k],16,1,f); + ::fwrite(&KBuff[k],32,1,f); // Checksum K.SetInt32(0); + K.bits64[3] = KBuff[k].i64[3]; + K.bits64[2] = KBuff[k].i64[2]; K.bits64[1] = KBuff[k].i64[1]; K.bits64[0] = KBuff[k].i64[0]; checkSum.Add(&K); @@ -564,9 +568,10 @@ bool Kangaroo::HandleRequest(TH_PARAM *p) { if(kType == WILD) P = secp->AddDirect(keyToSearch,P); - - uint32_t hC = P.x.bits64[2] & HASH_MASK; - bool ok = (hC == h) && (P.x.bits64[0] == dp[i].x.i64[0]) && (P.x.bits64[1] == dp[i].x.i64[1]); + + uint32_t hC = dp[i].h & HASH_MASK; + bool ok = (hC == h) && (P.x.bits64[0] == dp[i].x.i64[0]) && (P.x.bits64[1] == dp[i].x.i64[1]) && + (P.x.bits64[2] == dp[i].x.i64[2]) && (P.x.bits64[3] == dp[i].x.i64[3]); if(!ok) { if(kType==TAME) { ::printf("\nWrong TAME point from: %s [dp=%d PID=%u thId=%u gpuId=%u]\n",p->clientInfo,i, @@ -980,14 +985,14 @@ void Kangaroo::WaitForServer() { } // Get Kangaroo from server -bool Kangaroo::GetKangaroosFromServer(std::string& fileName,std::vector& kangs) { +bool Kangaroo::GetKangaroosFromServer(std::string& fileName,std::vector& kangs) { int nbRead; int nbWrite; uint32_t fileNameSize = (uint32_t)fileName.length(); uint64_t nbKangaroo = 0; uint32_t nbK; - int128_t* KBuff; + int256_t* KBuff; Int checkSum; WaitForServer(); @@ -1011,7 +1016,7 @@ bool Kangaroo::GetKangaroosFromServer(std::string& fileName,std::vector& kangs) { +bool Kangaroo::SendKangaroosToServer(std::string& fileName,std::vector& kangs) { int nbWrite; uint32_t fileNameSize = (uint32_t)fileName.length(); uint64_t nbKangaroo = kangs.size(); uint64_t pos; uint32_t nbK; - int128_t *KBuff; + int256_t *KBuff; Int checkSum; WaitForServer(); @@ -1088,7 +1095,7 @@ bool Kangaroo::SendKangaroosToServer(std::string& fileName,std::vector PUT("fileName",serverConn,fileName.c_str(),fileNameSize,ntimeout); PUT("nbKangaroo",serverConn,&nbKangaroo,sizeof(uint64_t),ntimeout); - KBuff = (int128_t*)malloc(KANG_PER_BLOCK * sizeof(int128_t)); + KBuff = (int256_t*)malloc(KANG_PER_BLOCK * sizeof(int256_t)); checkSum.SetInt32(0); pos = 0; @@ -1107,17 +1114,19 @@ bool Kangaroo::SendKangaroosToServer(std::string& fileName,std::vector } for(uint32_t k = 0; k < nbK; k++) { - memcpy(&KBuff[k],&kangs[pos],16); + memcpy(&KBuff[k],&kangs[pos],32); pos++; // Checksum Int K; K.SetInt32(0); + K.bits64[3] = KBuff[k].i64[3]; + K.bits64[2] = KBuff[k].i64[2]; K.bits64[1] = KBuff[k].i64[1]; K.bits64[0] = KBuff[k].i64[0]; checkSum.Add(&K); } - PUTFREE("packet",serverConn,KBuff,nbK * 16,ntimeout,KBuff); + PUTFREE("packet",serverConn,KBuff,nbK * 32,ntimeout,KBuff); nbKangaroo -= nbK; @@ -1153,17 +1162,19 @@ bool Kangaroo::SendToServer(std::vector &dps,uint32_t threadId,uint32_t gp DP *dp = (DP *)malloc(sizeof(DP)*nbDP); for(uint32_t i = 0; iSecpK1 Fast modular multiplication (2 steps folding 512bits to 256bits reduction using 64 bits digits)
  • Multi-GPU support
  • CUDA optimisation via inline PTX assembly
  • +
  • (new) Full 256-bit interval search
  • # Discussion Thread diff --git a/SECPK1/Int.cpp b/SECPK1/Int.cpp index 9bf3453..88a766f 100644 --- a/SECPK1/Int.cpp +++ b/SECPK1/Int.cpp @@ -56,6 +56,21 @@ Int::Int(uint64_t u64) { } +void Int::Copy(Int *a) +{ + bits64[0] = a->bits64[0]; + bits64[1] = a->bits64[1]; + bits64[2] = a->bits64[2]; + bits64[3] = a->bits64[3]; + bits64[4] = a->bits64[4]; + +#if NB64BLOCK > 5 + bits64[5] = a->bits64[5]; + bits64[6] = a->bits64[6]; + bits64[7] = a->bits64[7]; + bits64[8] = a->bits64[8]; +#endif +} // ------------------------------------------------ void Int::CLEAR() { diff --git a/SECPK1/Int.h b/SECPK1/Int.h index ed19794..1b7eded 100644 --- a/SECPK1/Int.h +++ b/SECPK1/Int.h @@ -45,7 +45,7 @@ class Int { Int(int64_t i64); Int(uint64_t u64); Int(Int *a); - + // Op void Add(uint64_t a); void Add(Int *a); @@ -65,6 +65,7 @@ class Int { void MultModN(Int *a, Int *b, Int *n); void Neg(); void Abs(); + void Copy(Int *a); // Right shift (signed) void ShiftR(uint32_t n); diff --git a/Thread.cpp b/Thread.cpp index fcc23e3..11c8cc6 100644 --- a/Thread.cpp +++ b/Thread.cpp @@ -194,8 +194,7 @@ void Kangaroo::ProcessServer() { for(int i = 0; i<(int)localCache.size() && !endOfSearch; i++) { DP_CACHE dp = localCache[i]; for(int j = 0; j<(int)dp.nbDP && !endOfSearch; j++) { - uint64_t h = dp.dp[j].h; - if(!AddToTable(h,&dp.dp[j].x,&dp.dp[j].d)) { + if(!AddToTable(&dp.dp[j].x,&dp.dp[j].d,dp.dp[j].kIdx % 2)) { // Collision inside the same herd collisionInSameHerd++; }