@@ -57,7 +57,7 @@ class ArgumentBuilder {
57
57
// ! Build an argument list where each argument has its own line
58
58
ArgumentBuilder (int indent_level, const char * tab) {
59
59
std::stringstream ss;
60
- for (const auto i : c10::irange (indent_level)) {
60
+ for (const auto i : arange (indent_level)) {
61
61
(void )i; // Suppress unused variable warning
62
62
ss << tab;
63
63
}
@@ -335,7 +335,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
335
335
// Generate parameter declarations
336
336
kernel_params_.reserve (kernel_->parameters ().size ());
337
337
unsigned int duplicate_counter = 0 ;
338
- for (auto i : c10::irange (kernel_->parameters ().size ())) {
338
+ for (auto i : arange (kernel_->parameters ().size ())) {
339
339
std::stringstream var_name_ss;
340
340
auto param = kernel_->parameters ().at (i);
341
341
kernel_params_.insert (param);
@@ -557,7 +557,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
557
557
}
558
558
559
559
std::ostream& indent () {
560
- for (const auto i : c10::irange (block_nest_level_)) {
560
+ for (const auto i : arange (block_nest_level_)) {
561
561
(void )i; // Suppress unused variable warning
562
562
code_ << kTab ;
563
563
}
@@ -817,7 +817,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
817
817
}
818
818
auto dtype = std::get<StructType>(sop->output (0 )->dtype ().type );
819
819
code_ << dtype.name << " { " ;
820
- for (auto i : c10::irange (sop->inputs ().size ())) {
820
+ for (auto i : arange (sop->inputs ().size ())) {
821
821
if (i > 0 ) {
822
822
code_ << " , " ;
823
823
}
@@ -906,7 +906,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
906
906
// Generate other datatypes in double
907
907
}
908
908
code_ << " (" << gen (rop->input (0 ));
909
- for (auto inp_i : c10::irange (1 , rop->inputs ().size ())) {
909
+ for (auto inp_i : arange (1 , rop->inputs ().size ())) {
910
910
code_ << " , " << gen (rop->input (inp_i));
911
911
}
912
912
code_ << " );\n " ;
@@ -2106,8 +2106,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
2106
2106
ArgumentBuilder func_args (block_nest_level_ + 1 , kTab );
2107
2107
2108
2108
// Append arguments for each reduction
2109
- for (const auto i :
2110
- c10::irange (grouped_grop->numHorizontallyGroupedExprs ())) {
2109
+ for (const auto i : arange (grouped_grop->numHorizontallyGroupedExprs ())) {
2111
2110
NVF_ERROR (
2112
2111
grouped_grop->reduction_buffers ().at (i)->buffer ()->isA <TensorView>());
2113
2112
const auto work_buffer =
@@ -2221,7 +2220,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
2221
2220
for (const auto & index_values : index_val_sets) {
2222
2221
NVF_ERROR (loop_indices.size () == index_values.size ());
2223
2222
std::unordered_map<const Val*, int64_t > index_val_map;
2224
- for (const auto i : c10::irange (loop_indices.size ())) {
2223
+ for (const auto i : arange (loop_indices.size ())) {
2225
2224
auto loop_index = loop_indices.at (i);
2226
2225
auto index_val = index_values.at (i);
2227
2226
index_val_map.emplace (loop_index, index_val);
@@ -2280,15 +2279,14 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
2280
2279
ArgumentBuilder write_preds;
2281
2280
2282
2281
for (const auto expr_index :
2283
- c10::irange (grouped_grop->numHorizontallyGroupedExprs ())) {
2282
+ arange (grouped_grop->numHorizontallyGroupedExprs ())) {
2284
2283
const auto data_type = grouped_grop->outputs ().at (expr_index)->dtype ();
2285
2284
NVF_ERROR (grouped_grop->reduction_buffers ()
2286
2285
.at (expr_index)
2287
2286
->buffer ()
2288
2287
->isA <TensorView>());
2289
2288
2290
- for (const auto & group_index :
2291
- c10::irange (index_replacement_maps.size ())) {
2289
+ for (const auto & group_index : arange (index_replacement_maps.size ())) {
2292
2290
// Set the index replacement map with the concrete values of
2293
2291
// indices of grouped loops.
2294
2292
index_replacement_map_ = index_replacement_maps.at (group_index);
@@ -2422,13 +2420,12 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
2422
2420
auto init_vals = grouped_gwop->initVals ();
2423
2421
2424
2422
for (const auto expr_index :
2425
- c10::irange (grouped_gwop->numHorizontallyGroupedExprs ())) {
2423
+ arange (grouped_gwop->numHorizontallyGroupedExprs ())) {
2426
2424
const auto & output = output_vals.at (expr_index);
2427
2425
const auto & input = input_vals.at (expr_index);
2428
2426
const auto & init = init_vals.at (expr_index);
2429
2427
2430
- for (const auto & group_index :
2431
- c10::irange (index_replacement_maps.size ())) {
2428
+ for (const auto & group_index : arange (index_replacement_maps.size ())) {
2432
2429
// Set the index replacement map with the concrete values of
2433
2430
// indices of grouped loops.
2434
2431
index_replacement_map_ = index_replacement_maps.at (group_index);
@@ -2442,7 +2439,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
2442
2439
std::to_string (group_index));
2443
2440
2444
2441
// Setup arguments for avg, var, and N
2445
- for (const auto i : c10::irange (3 )) {
2442
+ for (const auto i : arange (3 )) {
2446
2443
out_args[i].arg (gen (output.get (i)));
2447
2444
in_args[i].arg (gen (input.get (i)));
2448
2445
init_args[i].arg (gen (init.get (i)));
@@ -2589,7 +2586,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
2589
2586
func_args.arg (genComputeBlockDim ());
2590
2587
2591
2588
// global buf
2592
- for (const auto i : c10::irange (3 )) {
2589
+ for (const auto i : arange (3 )) {
2593
2590
const auto work_buffer = grouped_gwop->reduction_buffers ()[i]
2594
2591
.at (0 )
2595
2592
->buffer ()
@@ -3005,7 +3002,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
3005
3002
grouped_rop->writePredicate ());
3006
3003
}
3007
3004
3008
- for (const auto i : c10::irange (num_grouped_exprs)) {
3005
+ for (const auto i : arange (num_grouped_exprs)) {
3009
3006
NVF_ERROR (grouped_rop->output (i)->isA <kir::TensorIndex>());
3010
3007
3011
3008
const auto output = grouped_rop->output (i)->as <kir::TensorIndex>();
@@ -3267,7 +3264,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
3267
3264
// Indentation for the PTX code
3268
3265
int utility_block_nest_level = 1 ;
3269
3266
std::function<std::ostream&()> indent_utility = [&]() -> std::ostream& {
3270
- for (auto _ : c10::irange (utility_block_nest_level)) {
3267
+ for (auto _ : arange (utility_block_nest_level)) {
3271
3268
(void )_;
3272
3269
utilities << kTab ;
3273
3270
}
@@ -3294,7 +3291,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
3294
3291
if (!asm_->options ().immediate_inputs .empty ()) {
3295
3292
utilities << " template <" ;
3296
3293
bool first = true ;
3297
- for (auto in_i : c10::irange ((int64_t )inputs.size ())) {
3294
+ for (auto in_i : arange ((int64_t )inputs.size ())) {
3298
3295
if (asm_->options ().immediate_inputs .count (in_i)) {
3299
3296
if (!first) {
3300
3297
utilities << " , " ;
@@ -3306,7 +3303,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
3306
3303
utilities << " >\n " ;
3307
3304
}
3308
3305
utilities << " __device__ __inline__ void " << utility_name_no_ns << " (" ;
3309
- for (auto out_i : c10::irange (outputs.size ())) {
3306
+ for (auto out_i : arange (outputs.size ())) {
3310
3307
if (out_i > 0 ) {
3311
3308
utilities << " , " ;
3312
3309
}
@@ -3315,7 +3312,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
3315
3312
if (!outputs.empty ()) {
3316
3313
utilities << " , " ;
3317
3314
}
3318
- for (auto in_i : c10::irange ((int64_t )inputs.size ())) {
3315
+ for (auto in_i : arange ((int64_t )inputs.size ())) {
3319
3316
if (asm_->options ().immediate_inputs .count (in_i)) {
3320
3317
continue ;
3321
3318
}
@@ -3427,7 +3424,7 @@ class CudaKernelGenerator : private kir::ConstIrVisitor {
3427
3424
auto reg_dtype = get_type_or_index_type (register_);
3428
3425
if (std::holds_alternative<ArrayType>(reg_dtype.type )) {
3429
3426
for (auto i :
3430
- c10::irange (std::get<ArrayType>(reg_dtype.type ).size )) {
3427
+ arange (std::get<ArrayType>(reg_dtype.type ).size )) {
3431
3428
if (i > 0 ) {
3432
3429
next_line ();
3433
3430
}
0 commit comments