diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c4bb72 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +### Sudoku Solver +This is an sudoku-solver implementation coded in three days for GPU-class homework. + +## Parallelizing Sudoku +is hard. The most popular cpu-solution is backtracking, which is built on backtracking and recursion. + +## What cuda-sudoku-solver does. +Generates 50000 blocks, each is capable of solving a logic based sudoku. First one block starts and forks whenever necessary. diff --git a/inp.in b/inp.in new file mode 100644 index 0000000..0f7878d --- /dev/null +++ b/inp.in @@ -0,0 +1,949 @@ +400000805 +030000000 +000700000 +020000060 +000080400 +000010000 +000603070 +500200000 +104000000 + +520006000 +000000701 +300000000 +000400800 +600000050 +000000000 +041800000 +000030020 +008700000 + +600000803 +040700000 +000000000 +000504070 +300200000 +106000000 +020000050 +000080600 +000010000 + +480300000 +000000071 +020000000 +705000060 +000200800 +000000000 +001076000 +300000400 +000050000 + +000014000 +030000200 +070000000 +000900030 +601000000 +000000080 +200000104 +000050600 +000708000 + +000000520 +080400000 +030009000 +501000600 +200700000 +000300000 +600010000 +000000704 +000000030 + +602050000 +000003040 +000000000 +430008000 +010000200 +000000700 +500270000 +000000081 +000600000 + +052400000 +000070100 +000000000 +000802000 +300000600 +090500000 +106030000 +000000089 +700000000 + +602050000 +000004030 +000000000 +430008000 +010000200 +000000700 +500270000 +000000081 +000600000 + +092300000 +000080100 +000000000 +107040000 +000000065 +800000000 +060502000 +400000700 +000900000 + +600302000 +050000010 +000000000 +702600000 +000000054 +300000000 +080150000 +000040200 +000000700 + +060501090 +100090053 +900007000 +040800070 +000000508 +081705030 +000050200 +000000000 +076008000 + +005000987 +040050001 +007000000 +200048000 +090100000 +600200000 +300600200 +000009070 +000000500 + +306070000 +000000051 +800000000 +010405000 +700000600 +000200000 +020000040 +000080300 +000500000 + +100000308 +070400000 +000000000 +203010000 +000000095 +800000000 +050600070 +000080200 +040000000 + +600302000 +040000010 +000000000 +702600000 +000000054 +300000000 +080150000 +000040200 +000000700 + +000030090 +000200001 +050900000 +000000000 +102080406 +080500020 +075000000 +401006003 +000004060 + +450000030 +000801000 +090000000 +000050090 +200700000 +800000000 +010040000 +000000702 +000600800 + +023700006 +800060590 +900000700 +000040970 +307096002 +000000000 +500470000 +000002000 +080000000 + +008400030 +000300000 +900001574 +790008000 +000007005 +140000020 +009060002 +050000400 +000090056 + +098010000 +200000060 +000000000 +000302050 +084000000 +000600000 +000040809 +300500000 +000000100 + +002470058 +000000000 +000001040 +000020009 +528090400 +009000100 +000000030 +300007500 +685002000 + +400000805 +030000000 +000700000 +020000060 +000050400 +000010000 +000603070 +500200000 +109000000 + +020300000 +063000005 +800000001 +500009030 +000700000 +000100008 +087900260 +000006070 +006007004 + +100000709 +040007200 +800000000 +070010060 +300000005 +060040020 +000000008 +005300070 +702000046 + +400000300 +000802000 +000700000 +000100087 +340000000 +600000000 +500060000 +000010400 +082000000 + +000000071 +020800000 +000403000 +700060050 +000200300 +900000000 +600070000 +080000400 +000050000 + +600302000 +040000080 +000000000 +702600000 +000000054 +300000000 +080150000 +000080200 +000000700 + +047080001 +000000000 +000600700 +600003570 +000005000 +010060000 +280040000 +090100040 +000020690 + +000000801 +700200000 +000506000 +000700050 +010000300 +080000000 +500000020 +040080000 +600030000 + +380600000 +009000000 +020030510 +000005000 +030010060 +000400000 +017050080 +000000900 +000007032 + +000500000 +000000506 +970000020 +004802000 +250100030 +080030000 +000004070 +013050090 +020003100 + +020000000 +305062009 +068000300 +050000000 +000640802 +004700900 +003000001 +000006000 +170430000 + +080040000 +300000010 +000000020 +005000406 +900100800 +200000000 +000309000 +060000500 +000200000 + +008090100 +060500020 +000006000 +030107050 +000000009 +004000300 +050000200 +070003080 +200700004 + +400000508 +030000000 +000700000 +020000060 +000050800 +000010000 +000603070 +500200000 +108000000 + +100000308 +060400000 +000000000 +203010000 +000000095 +800000000 +050600070 +000080200 +040000000 + +100006080 +064000000 +000040007 +000090600 +070400500 +500070100 +050000320 +300008000 +400000000 + +249060003 +030000200 +800000005 +000006000 +000200000 +010040820 +090500700 +004000001 +070003000 + +000800009 +087300040 +600700000 +008500970 +000000000 +043007500 +000003000 +030001450 +400002001 + +000501000 +090000800 +060000000 +401000000 +000070090 +000000030 +800000105 +000200400 +000360000 + +000000801 +600200000 +000705000 +000600020 +010000300 +080000000 +200000070 +030080000 +500040000 + +047600050 +803000002 +000009000 +000805006 +000100000 +602400000 +078000510 +006000040 +090004007 + +000007095 +000001000 +860020000 +020073008 +500000060 +003004900 +305000417 +240000000 +000000000 + +040500000 +800090030 +076020000 +014600000 +000009007 +000003600 +001004050 +060000003 +007100200 + +083400000 +000070050 +000000000 +040108000 +000000027 +000300000 +206050000 +500000800 +000000100 + +009000003 +000009000 +700000506 +006500400 +000300000 +028000000 +300750600 +600000000 +000120308 + +026039000 +000600001 +900000700 +000004009 +050000200 +008500000 +300200900 +400007620 +000000004 + +203080000 +800700000 +000000100 +060507000 +400000030 +000100000 +000000082 +050000600 +010000000 + +600302000 +010000050 +000000000 +702600000 +000000084 +300000000 +080150000 +000080200 +000000700 + +100000900 +064001070 +070040000 +000300000 +308900500 +007000020 +000060709 +000004010 +000129030 + +000000000 +900000084 +062300050 +000600045 +300010006 +000900070 +000100000 +405002000 +030800009 + +020000593 +800500460 +940060008 +002030000 +060080730 +700200000 +000040380 +070000600 +000000005 + +904005000 +250600100 +310000008 +070009000 +400260000 +001470000 +700000002 +000300806 +040000090 + +000520000 +090003004 +000000700 +010000040 +080045300 +600010008 +702000000 +008000032 +040080010 + +530020900 +024030050 +009000000 +000010827 +000700000 +000098100 +000000000 +006400009 +102050430 + +100007860 +007008010 +800200009 +000000002 +400010000 +009005000 +608000000 +000050900 +000009304 + +000050001 +100000070 +060000080 +000004000 +009010300 +000596020 +080062007 +007000000 +305070200 + +047020000 +800001000 +030000902 +000005000 +600810050 +000040000 +070000304 +000900010 +400270800 + +000000940 +000090005 +300005070 +080400100 +463000000 +000007080 +800700000 +700000028 +050260000 + +020000006 +000041000 +007800001 +000000700 +003700000 +600412000 +010074005 +008050070 +000003900 + +100000308 +060400000 +000000000 +203010000 +000000075 +800000000 +070500060 +000080200 +040000000 + +200001090 +010030700 +900800020 +000000850 +060400000 +000070003 +020300060 +000500000 +109000205 + +007008000 +006020300 +030000009 +010050060 +000010000 +070900002 +000000004 +083004000 +260000510 + +000360000 +850000000 +904008000 +000006800 +000000017 +009004500 +010500060 +400009002 +000003000 + +340600000 +007000000 +020080570 +000005000 +070010020 +000400000 +036020010 +000000900 +000007082 + +000000401 +800200000 +000607000 +000800060 +040000300 +010000000 +600000020 +050010000 +700030000 + +040050067 +000100040 +000200000 +100800300 +000000200 +060000000 +000040050 +300000800 +200000000 + +000000040 +002004001 +070050090 +003007000 +040060000 +600100800 +020000100 +850900060 +000080003 + +800700004 +050000600 +000000000 +030970008 +000043005 +000020900 +006000000 +200060007 +071008302 + +080004050 +000700300 +000000000 +010085000 +600000200 +000040000 +302600000 +000000041 +700000000 + +000070080 +006000500 +020003061 +010007002 +008005340 +200900000 +002000000 +580006030 +400010000 + +000000801 +600200000 +000705000 +000600020 +010000300 +080000000 +200000070 +040080000 +500030000 + +020000000 +000600003 +074080000 +000003002 +080040010 +600500000 +000010780 +500009000 +000000040 + +052006800 +000007020 +000000600 +004800900 +200410000 +001000008 +006100380 +000090006 +300600109 + +000010780 +500009000 +000000040 +020000000 +000600003 +074080000 +000003002 +080040010 +600500000 + +100000003 +060300700 +070005001 +210700090 +007000000 +008010020 +000806400 +009020060 +000400000 + +400070100 +001904605 +000001000 +000700002 +002030000 +847006000 +014000806 +020000300 +600090000 + +000000801 +700200000 +000506000 +000700050 +010000300 +080000000 +500000020 +030080000 +600040000 + +963000000 +100008000 +000205000 +040800000 +010000700 +000030025 +700000030 +009020407 +000000900 + +150300000 +070040200 +004072000 +008000000 +000900108 +010080790 +000003800 +000000000 +600007423 + +000000000 +057240009 +800009470 +009003000 +500900120 +003010900 +060000250 +000560000 +070000006 + +000075000 +010020000 +040003000 +500000302 +000800010 +000000600 +000100480 +200000000 +700000000 + +600000703 +040800000 +000000000 +000504080 +700200000 +103000000 +020000050 +000070900 +000010000 + +000060004 +006030000 +100400507 +700000805 +000800000 +608000090 +002090000 +400003200 +009700100 + +032000005 +800300000 +904280001 +000400039 +000600050 +000010000 +020006708 +000004000 +095000060 + +000503000 +000060700 +508000016 +360020000 +000401000 +000030005 +670000208 +004070000 +000200500 + +050307040 +100000000 +030000000 +508030610 +000800509 +060010000 +000040006 +000692700 +002000900 + +005008001 +800000090 +000000780 +000400000 +640000900 +000053002 +060000000 +001380050 +000907140 + +000000000 +072060100 +005100082 +080001300 +400000000 +037090010 +000023800 +504009000 +000000790 + +000658000 +004000000 +120000000 +000009607 +000300500 +002080003 +001900800 +306000004 +000047300 + +020300000 +006008090 +830500000 +000200080 +709005000 +000006004 +000000010 +001000402 +200700809 + +050090000 +100000600 +000308000 +008040009 +514000000 +030000200 +000000004 +080006007 +700150060 + +000002000 +000070001 +700300090 +800700000 +020890600 +013006000 +090050824 +000008910 +000000000 + +300080000 +000700005 +100000000 +000000360 +002004000 +070000000 +000060130 +045200000 +000000800 diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..3c9232f --- /dev/null +++ b/run.sh @@ -0,0 +1,2 @@ +nvcc -arch sm_35 -rdc=true -o sudokusolver sudokusolver.cu +./sudokusolver inp.in diff --git a/sudokusolver.cu b/sudokusolver.cu new file mode 100644 index 0000000..6ba78d0 --- /dev/null +++ b/sudokusolver.cu @@ -0,0 +1,434 @@ +/* + * Please write your name and net ID below + * + * Last name: Evci + * First name: Utku + * Net ID: ue225 + * + */ + + +/* + * You compile with: + * nvcc -arch sm_35 -rdc=true -o sudokusolver sudokusolver.cu + */ +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) +// #define DEBUG + +#include +#include +#include +#include +/*****************************************************************/ + +__global__ void fillSudokuSafeAndFork(char* memory,int* stats) +{ + /* + We have nBlocks many available blocks. + */ + uint i,current_poss,j,temp,mat_i,mat_j,k; + char* block_memory = memory+(81*blockIdx.x); + + __shared__ uint row_used_numbers[9]; + __shared__ uint col_used_numbers[9]; + __shared__ uint cell_used_numbers[9]; + __shared__ char progress_flag; + __shared__ char done_flag; + __shared__ char error_flag; + __shared__ int min_forks; + __shared__ int scheduling_thread; + + // check whether all blocks are idle or not.This shouldn't happen + if (blockIdx.x==0){ //first block + if (threadIdx.x==0) { + progress_flag=0; + } + __syncthreads(); + for(i=threadIdx.x;i 0){ + progress_flag=1; + } + } + __syncthreads(); + if (progress_flag==0 and threadIdx.x == 0){ + //no active block terminate. + if (threadIdx.x==0) printf("no active blocks...terminating\n"); + stats[gridDim.x]=2; + } + } + + //If block is active work on it. + if(stats[blockIdx.x]==1){ + if (threadIdx.x==0) { + error_flag = 0; //set to 1 if the board is wrongs + done_flag = 0; //set to 1 if the board is solved + progress_flag=1; //set to 0 if no reterministic progress can be made. + } + __syncthreads(); + + while(!error_flag && !done_flag &&progress_flag ){ + __syncthreads(); + // 1st check whether the board is valid and fill X_used_numbers arrays for rows,columns and cells. + //************************* + if (threadIdx.x<9){ + // TODO optimize here such that there wraps does row/col/cell. + row_used_numbers[threadIdx.x] = 0; + col_used_numbers[threadIdx.x] = 0; + cell_used_numbers[threadIdx.x] = 0; + for(i=0;i<9;i++){ + //rows + temp = block_memory[threadIdx.x*9+i]; + if (temp) { //!=0 + if ((row_used_numbers[threadIdx.x]>>(temp-1)) & 1){ + // This is bad, you have the same number in the same row. This solution fails + error_flag=10+i; + } + //set n'th bit to 1. + row_used_numbers[threadIdx.x] |= 1<<(temp-1); + } + //columns + temp = block_memory[i*9+threadIdx.x]; + if (temp) { //!=0 + if ((col_used_numbers[threadIdx.x]>>(temp-1)) & 1){ + // This is bad, you have the same number in the same column. This solution fails + error_flag=20+i; + } + //set n'th bit to 1. + col_used_numbers[threadIdx.x] |= 1<<(temp-1); + } + } + //cells + for (i=(threadIdx.x/3)*3;i<((threadIdx.x/3+1)*3);i++){ + for (j=(threadIdx.x%3)*3;j<((threadIdx.x%3+1)*3);j++){ + temp = block_memory[i*9+j]; + if (temp) { //!=0 + if ((cell_used_numbers[threadIdx.x]>>(temp-1)) & 1){ + // This is bad, you have the same number in the same cell. This solution fails + error_flag=30+i; + } + //set n'th bit to 1. + cell_used_numbers[threadIdx.x] |= 1<<(temp-1); + } + } + } + + } + __syncthreads(); + if (error_flag==0){ + if (threadIdx.x==0) { + progress_flag = 0; + done_flag = 1; + } + __syncthreads(); + if (threadIdx.x<81){ + // 2nd for each cell calculate available numbers(row_used OR col_used OR cell_used) and if there is one 0 + //************************* + current_poss = 0; + mat_i = threadIdx.x/9; + mat_j = threadIdx.x%9; + if (block_memory[threadIdx.x] == 0){ + done_flag = 0; + current_poss = (row_used_numbers[mat_i] | col_used_numbers[mat_j] | cell_used_numbers[(mat_i/3)*3+(mat_j/3)]); + //printf("thredix=%d,current_poss=%d\n",threadIdx.x,current_poss); + temp = 0; // temp for count + for (i=0;i<9;i++){ + if ((current_poss & (1<1 + if (stats[j] == (gridDim.x*blockIdx.x+threadIdx.x+2)){ + //succesful scheduling + #ifdef DEBUG + if (blockIdx.x<5){ + printf("Error_flag:%d,current_poss=%d,rowi=%d\n",error_flag,current_poss,row_used_numbers[mat_i]); + printf("From-Block:%d,Forkedinto: Block:%d,i=%d,j=%d,new_val=%d,k=%d/%d\n",blockIdx.x,j,mat_i+1,mat_j+1,i+1,k,min_forks); + } + #endif + memcpy(memory+j*81,block_memory,81); + memory[j*81+threadIdx.x] = i+1; + stats[j] = 1; + break; + } + } + if (j == gridDim.x){ + // #ifdef DEBUG + printf("From-Block:%d,i=%d,j=%d,k=%d/%d,c_poss=%d,b_mem=%d,chouldnt schedule\n",blockIdx.x,mat_i+1,mat_j+1,k,min_forks,current_poss,block_memory[threadIdx.x]); + // #endif + } + } + k++; + } + } + } + __syncthreads(); + } + } + } + + +__device__ void printSudokuDev(char *arr){ + int i; + printf("|++++++++++++++++++++"); + for (i = 0;i<81;i++){ + if (i%27==0) printf("|\n|--------------------"); + if (i%9==0) printf("|\n"); + if (i%3==0) printf("|"); + if (arr[i]){ + printf("%d ",arr[i]); + } + else{ + printf(". "); + } + } + printf("|\n"); +} + +__global__ void controller(char* arr_dev,int* block_stat,int nBlocks, int nThreads){ + int i = 0; + while (block_stat[nBlocks]!=2 && i<55){//stats[gridDim.x]==2 means, solution is coppied to the last 81 char of memory. + fillSudokuSafeAndFork<<>>(arr_dev,block_stat); + cudaDeviceSynchronize(); + #ifdef DEBUG + printf("Hey\n"); + for (int j=0;j=nBlocks;j++){ + printf("%d,",block_stat[j]); + } + printf("\n"); + printSudokuDev(arr_dev); + #endif + i++; + } +} + +void gpu_sudoku_solver(char* arr,int version) +{ + char *arr_dev; + int *block_stat; + + int nThreads = 96; // wrap_size 32, each thread will have responsible from one cell. + int nBlocks = 20000; // max available concurent blocks/searches running. + int memSize = 81*(nBlocks+1); // 0.81 MB for N=9 + //copy array and create a new one temp. last block/stat is for the result + cudaMalloc((void**) &block_stat,(nBlocks+1)*sizeof(int)); + cudaMemset(block_stat, 0, (nBlocks+1)*sizeof(int)); + cudaMemset(block_stat, 1, 1); + if( !block_stat ) + { + fprintf(stderr, " Cannot allocate block_stat array of size %d on the device\n", (nBlocks+1)*sizeof(int)); + exit(1); + } + + cudaMalloc((void**) &arr_dev,memSize); + cudaMemcpy(arr_dev,arr,81,cudaMemcpyHostToDevice); + + if( !arr_dev ) + { + fprintf(stderr, " Cannot allocate arr_dev of size %d on the device\n", memSize); + exit(1); + } + + if (version==1){ + printf("Block=%d,threads=%d starting\n",nBlocks,nThreads); + controller<<<1,1>>>(arr_dev,block_stat,nBlocks,nThreads); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) + printf("Error: %s\n", cudaGetErrorString(err)); + } + else{ + printf("Invalid version'\n"); + exit(1); + } + cudaMemcpy(arr,arr_dev+81*nBlocks,81,cudaMemcpyDeviceToHost); + cudaFree(arr_dev); + cudaFree(block_stat); +} + +void readNextSudoku(FILE *fp, char *sudoku){ + int i,j; + for (i=0;i<9;i++){ + fscanf(fp, "%s", sudoku); + for (j=0;j<9;j++){ + sudoku[j] = sudoku[j]-48; //48 = char '0' + } + sudoku = sudoku + 9; + } +} + +void printSudoku(char *arr){ + int i; + printf("|++++++++++++++++++++"); + for (i = 0;i<81;i++){ + if (i%27==0) printf("|\n|--------------------"); + if (i%9==0) printf("|\n"); + if (i%3==0) printf("|"); + if (arr[i]){ + printf("%d ",arr[i]); + } + else{ + printf(". "); + } + } + printf("|\n"); +} + +void printSudokuToFile(char *arr,FILE* out_fp){ + for (int i = 0;i<81;i++){ + fprintf(out_fp, "%d",arr[i]); + if (i%9==8){ + fprintf(out_fp, "\n"); + } + } + fprintf(out_fp, "\n"); +} + +int main(int argc, char *argv[]) +{ + cudaFree(0); + cudaSetDevice ( 1 ); + int version=1,flag=1,i; + char cur_sudoku[81]; + char out_name[200]; + double time_taken; + clock_t start, end; + FILE *inp_fp,*out_fp; + + if (argc <=3 && argc>1 ) + { + memset(out_name, '\0', sizeof(out_name)); + strcpy(out_name, argv[1]); + i = 0 ; + while (out_name[i] != '\0' && out_name[i] != '.'){ + i ++; + } + if (out_name[i] == '\0'){ + fprintf(stderr, "The input file should be like X.in: %s!\n",out_name[i]); + exit(1); + } + else{ + out_name[i+1] = 's'; + out_name[i+2] = 'o'; + out_name[i+3] = 'l'; + out_name[i+4] = '\0'; + } + out_fp = fopen(out_name,"w"); + if (out_fp == NULL) { + fprintf(stderr, "Can't open output file %s!\n",out_name); + exit(1); + } + + inp_fp = fopen(argv[1],"r"); + if (inp_fp == NULL) { + fprintf(stderr, "Can't open input file %s!\n",argv[1]); + exit(1); + } + } + else + { + printf("Usage 'sudokusolver filename [version_no=1]'\n"); + exit(1); + } + + if (argc == 3 ){ + version = (int) strtol(argv[2], (char **)NULL, 10); + printf("Version = %d\n", version); + } + while (flag!=-1){ + readNextSudoku(inp_fp,cur_sudoku); + printSudoku(cur_sudoku); + start = clock(); + gpu_sudoku_solver(cur_sudoku,version); + end = clock(); + time_taken = ((double)(end - start))/ CLOCKS_PER_SEC; + printf("Time taken for the function_call is %lf\n", time_taken); + printSudoku(cur_sudoku); + printf("-----------------------------------------\n"); + printSudokuToFile(cur_sudoku,out_fp); + fgetc( inp_fp ); + flag=fgetc( inp_fp ); + } + fclose(inp_fp); + fclose(out_fp); +}