@@ -911,6 +911,211 @@ struct EllpackMatrix
911
911
}
912
912
};
913
913
914
+ // /===----------------------------------------------------------------------===//
915
+ // / BCSR matrix type
916
+ // /===----------------------------------------------------------------------===//
917
+
918
+ template <typename T>
919
+ struct BCSRMatrix
920
+ {
921
+ uint64_t num_blocks;
922
+
923
+ uint64_t block_rows;
924
+ uint64_t block_cols;
925
+
926
+ uint64_t *colptr;
927
+ uint64_t *colidx;
928
+ uint64_t colptr_len;
929
+ uint64_t colidx_len;
930
+
931
+ T *Aval;
932
+ uint64_t value_len;
933
+
934
+ bool has_values (uint64_t i, uint64_t mi, uint64_t j, uint64_t mj, CooMatrix<T> *coo_matrix) {
935
+ for (uint64_t bi = i; bi<mi; bi++) {
936
+ for (uint64_t bj = j; bj<mj; bj++) {
937
+ for (uint64_t p = 0 ; p<coo_matrix->num_nonzeros ; p++) {
938
+ auto coord = coo_matrix->coo_tuples [p];
939
+ if (coord.row == bi && coord.col == bj) {
940
+ return true ;
941
+ }
942
+ }
943
+ }
944
+ }
945
+
946
+ return false ;
947
+ }
948
+
949
+ // / Initializer
950
+ void Init (CooMatrix<T> *coo_matrix, bool verbose = false )
951
+ {
952
+ // num_rows = coo_matrix->num_rows;
953
+ // num_cols = 0;
954
+ uint64_t num_nonzeros = coo_matrix->num_nonzeros ;
955
+
956
+ // / Sort by rows, then columns
957
+ if (verbose)
958
+ printf (" Ordering..." );
959
+ fflush (stdout);
960
+ std::stable_sort (coo_matrix->coo_tuples , coo_matrix->coo_tuples + num_nonzeros, CooComparatorRow ());
961
+ if (verbose)
962
+ printf (" done." );
963
+ fflush (stdout);
964
+
965
+ // / Calculate the column count
966
+ uint64_t max = 0 ;
967
+ uint64_t buffer = 0 ;
968
+ // int current = -1;
969
+ uint64_t current = num_nonzeros > 0 ? coo_matrix->coo_tuples [0 ].row : 0 ;
970
+ for (uint64_t i = 0 ; i < num_nonzeros; i++)
971
+ {
972
+ if (coo_matrix->coo_tuples [i].row == current)
973
+ {
974
+ ++buffer;
975
+ }
976
+ else
977
+ {
978
+ if (buffer > max)
979
+ max = buffer;
980
+ buffer = 1 ;
981
+ current = coo_matrix->coo_tuples [i].row ;
982
+ }
983
+ }
984
+ if (buffer > max)
985
+ max = buffer;
986
+ // num_cols = max;
987
+
988
+ // Temporary
989
+ num_blocks = 1 ;
990
+ block_rows = 1 ;
991
+ block_cols = 1 ;
992
+ colptr_len = 1 ;
993
+ colidx_len = 1 ;
994
+ value_len = 1 ;
995
+
996
+ // /////////////////////////////////////////////
997
+ // for (uint64_t p = 0; p<coo_matrix->num_nonzeros; p++) {
998
+ // auto coord = coo_matrix->coo_tuples[p];
999
+ // printf("(%d, %d, %.0f)\n", coord.row, coord.col, coord.val);
1000
+ // }
1001
+
1002
+ // /////////////////////////////////////////////
1003
+ uint64_t rows = coo_matrix->num_rows ;
1004
+ uint64_t cols = coo_matrix->num_cols ;
1005
+ // printf("Num_rows: %d | Num_cols: %d\n", rows, cols);
1006
+
1007
+ std::vector<int > A2pos_nc;
1008
+ std::vector<int > A2crd;
1009
+ std::vector<double > Aval_nc;
1010
+
1011
+ // Step 1: Determine block size
1012
+ // TODO: Let us think about this. For now, quick solution
1013
+ block_rows = rows/2 ;
1014
+ block_cols = cols/2 ;
1015
+ // printf("Block_rows: %d | Block_cols: %d\n", block_rows, block_cols);
1016
+
1017
+ // Step 2: Examine the blocks
1018
+ // We only want the blocks with values
1019
+ //
1020
+ // From here, we can start building the A2 dimension
1021
+ //
1022
+ for (uint64_t i=0 ; i<rows; i+=block_rows) {
1023
+ for (uint64_t j=0 ; j<cols; j+=block_cols) {
1024
+ // Note: for A2_crd, corresponds to j, divide by "block_cols"
1025
+ // to get the block position
1026
+
1027
+ // Check the block and see if it has values
1028
+ // If so, we use it
1029
+ if (has_values (i, i+block_rows, j, j+block_cols, coo_matrix)) {
1030
+ A2pos_nc.push_back (i/block_rows);
1031
+ A2crd.push_back (j/block_cols);
1032
+
1033
+ // Add all the values including the padded zeros
1034
+ //
1035
+ // To do this, we first search the COO array for a non-zero
1036
+ // value. If there is no such value, then we add the
1037
+ // index with a zero.
1038
+ //
1039
+ for (uint64_t bi = i; bi<(i+block_rows); bi++) {
1040
+ for (uint64_t bj = j; bj<(j+block_cols); bj++) {
1041
+ bool found = false ;
1042
+ for (uint64_t p = 0 ; p<coo_matrix->num_nonzeros ; p++) {
1043
+ auto coord = coo_matrix->coo_tuples [p];
1044
+ if (coord.row == bi && coord.col == bj) {
1045
+ Aval_nc.push_back (coord.val );
1046
+ found = true ;
1047
+ break ;
1048
+ }
1049
+ }
1050
+
1051
+ if (found == false ) {
1052
+ Aval_nc.push_back (0 );
1053
+ }
1054
+ }
1055
+ }
1056
+ }
1057
+ }
1058
+ }
1059
+
1060
+ // Compress the row coordinates
1061
+ std::vector<int > A2pos;
1062
+ A2pos.push_back (0 );
1063
+
1064
+ int curr = A2pos_nc[0 ];
1065
+ int curr_end = 1 ;
1066
+ for (uint64_t i = 1 ; i<A2pos_nc.size (); i++) {
1067
+ if (A2pos_nc[i] != curr) {
1068
+ A2pos.push_back (curr_end);
1069
+ curr = A2pos_nc[i];
1070
+ }
1071
+ curr_end += 1 ;
1072
+ }
1073
+ A2pos.push_back (curr_end);
1074
+
1075
+ int A1pos = A2pos.size () - 1 ;
1076
+
1077
+ // Copy all the elements over
1078
+ num_blocks = A1pos;
1079
+ colptr_len = A2pos.size ();
1080
+ colidx_len = A2crd.size ();
1081
+ value_len = Aval_nc.size ();
1082
+
1083
+ colptr = new uint64_t [A2pos.size ()];
1084
+ colidx = new uint64_t [A2crd.size ()];
1085
+ Aval = new T[Aval_nc.size ()];
1086
+
1087
+ for (uint64_t i = 0 ; i<A2pos.size (); i++) {
1088
+ colptr[i] = A2pos[i];
1089
+ }
1090
+
1091
+ for (uint64_t i = 0 ; i<A2crd.size (); i++) {
1092
+ colidx[i] = A2crd[i];
1093
+ }
1094
+
1095
+ for (uint64_t i = 0 ; i<Aval_nc.size (); i++) {
1096
+ Aval[i] = Aval_nc[i];
1097
+ }
1098
+ }
1099
+
1100
+ // / Clear matrix
1101
+ void Clear ()
1102
+ {
1103
+ delete[] Aval;
1104
+ }
1105
+
1106
+ // / The constructor- calls the initializer
1107
+ BCSRMatrix (CooMatrix<T> *coo_matrix, bool verbose = false )
1108
+ {
1109
+ Init (coo_matrix, verbose);
1110
+ }
1111
+
1112
+ // / Destructor
1113
+ ~BCSRMatrix ()
1114
+ {
1115
+ Clear ();
1116
+ }
1117
+ };
1118
+
914
1119
// ===----------------------------------------------------------------------===//
915
1120
// / COO tensor 3D type. A COO tensor is just a vector of edge tuples. Tuples are sorted
916
1121
// / first by first dim, then by second dim and so on.
@@ -1756,20 +1961,6 @@ void read_input_sizes_2D(int32_t fileID,
1756
1961
EllpackMatrix<T> ellpack_matrix (FileReader.coo_matrix );
1757
1962
int cols = ellpack_matrix.num_cols * ellpack_matrix.num_rows ;
1758
1963
1759
- /*
1760
- desc_sizes->data[0] = 1; // A1pos
1761
- desc_sizes->data[1] = 1; // A1crd
1762
- desc_sizes->data[2] = 1; // A2pos
1763
- desc_sizes->data[3] = cols; // A2crd
1764
- desc_sizes->data[4] = 1; // A1_tile_pos
1765
- desc_sizes->data[5] = 1; // A1_tile_crd
1766
- desc_sizes->data[6] = 0; // A2_tile_pos
1767
- desc_sizes->data[7] = 0; // A2_tile_crd
1768
- desc_sizes->data[8] = cols; // Controls count of value dimension
1769
- desc_sizes->data[9] = FileReader.coo_matrix->num_rows;
1770
- desc_sizes->data[10] = FileReader.coo_matrix->num_cols;
1771
- */
1772
-
1773
1964
desc_sizes->data [0 ] = 1 ; // / A1pos
1774
1965
desc_sizes->data [1 ] = 1 ; // / A1crd
1775
1966
desc_sizes->data [2 ] = 1 ; // / A1_tile_pos
@@ -1796,7 +1987,30 @@ void read_input_sizes_2D(int32_t fileID,
1796
1987
// / BCSR
1797
1988
else if (A1format == Dense && A2format == Compressed_nonunique && A1_tile_format == Dense && A2_tile_format == Dense)
1798
1989
{
1799
- puts (" BCSR" );
1990
+ BCSRMatrix<T> bcsr_matrix (FileReader.coo_matrix );
1991
+
1992
+ desc_sizes->data [0 ] = 1 ; // / A1pos
1993
+ desc_sizes->data [1 ] = 1 ; // / A1crd
1994
+ desc_sizes->data [2 ] = 1 ; // / A1_tile_pos
1995
+ desc_sizes->data [3 ] = 1 ; // / A1_tile_crd
1996
+ desc_sizes->data [4 ] = bcsr_matrix.colptr_len ; // / A2pos
1997
+ desc_sizes->data [5 ] = bcsr_matrix.colidx_len ; // / A2crd
1998
+ desc_sizes->data [6 ] = 1 ; // / A2_tile_pos
1999
+ desc_sizes->data [7 ] = 1 ; // / A2_tile_crd
2000
+ desc_sizes->data [8 ] = bcsr_matrix.value_len ;
2001
+ desc_sizes->data [9 ] = FileReader.coo_matrix ->num_rows ;
2002
+ desc_sizes->data [10 ] = FileReader.coo_matrix ->num_cols ;
2003
+
2004
+ /* ****************DEBUG******************/
2005
+ // std::cout << "BCSR detail: \n"
2006
+ // << "desc_sizes->data[0]: " << desc_sizes->data[0] << "\n"
2007
+ // << "desc_sizes->data[1]: " << desc_sizes->data[1] << "\n"
2008
+ // << "desc_sizes->data[2]: " << desc_sizes->data[2] << "\n"
2009
+ // << "desc_sizes->data[3]: " << desc_sizes->data[3] << "\n"
2010
+ // << "desc_sizes->data[4]: " << desc_sizes->data[4] << "\n"
2011
+ // << "desc_sizes->data[5]: " << desc_sizes->data[5] << "\n"
2012
+ // << "desc_sizes->data[6]: " << desc_sizes->data[6] << "\n";
2013
+ /* ****************DEBUG******************/
1800
2014
}
1801
2015
// / CSB
1802
2016
else if (A1format == Compressed_unique && A2format == singleton && A1_tile_format == Dense && A2_tile_format == Dense)
@@ -2061,12 +2275,29 @@ void read_input_2D(int32_t fileID,
2061
2275
}
2062
2276
}
2063
2277
// / BCSR
2064
- /* else if (A1format == Dense && A2format == Compressed_nonunique && A3format == Dense && A4format == Dense)
2278
+ else if (A1format == Dense && A1_tile_format == Dense && A2format == Compressed_nonunique && A2_tile_format == Dense)
2065
2279
{
2066
- puts("BCSR");
2280
+ BCSRMatrix<T> bcsr_matrix (FileReader.coo_matrix );
2281
+ FileReader.FileReaderWrapperFinalize ();
2282
+
2283
+ desc_A1pos->data [0 ] = bcsr_matrix.num_blocks ;
2284
+ desc_A1tile_pos->data [0 ] = bcsr_matrix.block_rows ;
2285
+ desc_A2tile_pos->data [0 ] = bcsr_matrix.block_cols ;
2286
+
2287
+ for (uint64_t i = 0 ; i<bcsr_matrix.colptr_len ; i++) {
2288
+ desc_A2pos->data [i] = bcsr_matrix.colptr [i];
2289
+ }
2290
+
2291
+ for (uint64_t i = 0 ; i<bcsr_matrix.colidx_len ; i++) {
2292
+ desc_A2crd->data [i] = bcsr_matrix.colidx [i];
2293
+ }
2294
+
2295
+ for (uint64_t i = 0 ; i<bcsr_matrix.value_len ; i++) {
2296
+ desc_Aval->data [i] = bcsr_matrix.Aval [i];
2297
+ }
2067
2298
}
2068
2299
// / CSB
2069
- else if (A1format == Compressed_unique && A2format == singleton && A3format == Dense && A4format == Dense)
2300
+ /* else if (A1format == Compressed_unique && A2format == singleton && A3format == Dense && A4format == Dense)
2070
2301
{
2071
2302
puts("CSB");
2072
2303
}*/
@@ -2505,4 +2736,4 @@ extern "C" void comet_sort_index(int64_t rank, void *ptr, int64_t index_first, i
2505
2736
{
2506
2737
UnrankedMemRefType<int64_t > descriptor = {rank, ptr};
2507
2738
_milr_ciface_comet_sort (&descriptor, index_first, index_last);
2508
- }
2739
+ }
0 commit comments