@@ -770,7 +770,7 @@ def ParallelOp : SCF_Op<"parallel",
770770 "getSingleLowerBound", "getSingleUpperBound", "getSingleStep"]>,
771771 RecursiveMemoryEffects,
772772 DeclareOpInterfaceMethods<RegionBranchOpInterface>,
773- SingleBlockImplicitTerminator<"scf::YieldOp ">]> {
773+ SingleBlockImplicitTerminator<"scf::ReduceOp ">]> {
774774 let summary = "parallel for operation";
775775 let description = [{
776776 The "scf.parallel" operation represents a loop nest taking 4 groups of SSA
@@ -791,27 +791,36 @@ def ParallelOp : SCF_Op<"parallel",
791791
792792 The parallel loop operation supports reduction of values produced by
793793 individual iterations into a single result. This is modeled using the
794- scf.reduce operation (see scf.reduce for details). Each result of a
795- scf.parallel operation is associated with an initial value operand and
796- reduce operation that is an immediate child. Reductions are matched to
797- result and initial values in order of their appearance in the body.
798- Consequently, we require that the body region has the same number of
799- results and initial values as it has reduce operations.
800-
801- The body region must contain exactly one block that terminates with
802- "scf.yield" without operands. Parsing ParallelOp will create such a region
803- and insert the terminator when it is absent from the custom format.
794+ "scf.reduce" terminator operation (see "scf.reduce" for details). The i-th
795+ result of an "scf.parallel" operation is associated with the i-th initial
796+ value operand, the i-th operand of the "scf.reduce" operation (the value to
797+ be reduced) and the i-th region of the "scf.reduce" operation (the reduction
798+ function). Consequently, we require that the number of results of an
799+ "scf.parallel" op matches the number of initial values and the the number of
800+ reductions in the "scf.reduce" terminator.
801+
802+ The body region must contain exactly one block that terminates with a
803+ "scf.reduce" operation. If an "scf.parallel" op has no reductions, the
804+ terminator has no operands and no regions. The "scf.parallel" parser will
805+ automatically insert the terminator for ops that have no reductions if it is
806+ absent.
804807
805808 Example:
806809
807810 ```mlir
808811 %init = arith.constant 0.0 : f32
809- scf.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init) -> f32 {
810- %elem_to_reduce = load %buffer[%iv] : memref<100xf32>
811- scf.reduce(%elem_to_reduce) : f32 {
812+ %r:2 = scf.parallel (%iv) = (%lb) to (%ub) step (%step) init (%init, %init)
813+ -> f32, f32 {
814+ %elem_to_reduce1 = load %buffer1[%iv] : memref<100xf32>
815+ %elem_to_reduce2 = load %buffer2[%iv] : memref<100xf32>
816+ scf.reduce(%elem_to_reduce1, %elem_to_reduce2 : f32, f32) {
812817 ^bb0(%lhs : f32, %rhs: f32):
813818 %res = arith.addf %lhs, %rhs : f32
814819 scf.reduce.return %res : f32
820+ }, {
821+ ^bb0(%lhs : f32, %rhs: f32):
822+ %res = arith.mulf %lhs, %rhs : f32
823+ scf.reduce.return %res : f32
815824 }
816825 }
817826 ```
@@ -853,36 +862,36 @@ def ParallelOp : SCF_Op<"parallel",
853862// ReduceOp
854863//===----------------------------------------------------------------------===//
855864
856- def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
857- let summary = "reduce operation for parallel for";
865+ def ReduceOp : SCF_Op<"reduce", [
866+ Terminator, HasParent<"ParallelOp">, RecursiveMemoryEffects,
867+ DeclareOpInterfaceMethods<RegionBranchTerminatorOpInterface>]> {
868+ let summary = "reduce operation for scf.parallel";
858869 let description = [{
859- "scf.reduce" is an operation occurring inside "scf.parallel" operations.
860- It consists of one block with two arguments which have the same type as the
861- operand of "scf.reduce".
862-
863- "scf.reduce" is used to model the value for reduction computations of a
864- "scf.parallel" operation. It has to appear as an immediate child of a
865- "scf.parallel" and is associated with a result value of its parent
866- operation.
867-
868- Association is in the order of appearance in the body where the first
869- result of a parallel loop operation corresponds to the first "scf.reduce"
870- in the operation's body region. The reduce operation takes a single
871- operand, which is the value to be used in the reduction.
872-
873- The reduce operation contains a region whose entry block expects two
874- arguments of the same type as the operand. As the iteration order of the
875- parallel loop and hence reduction order is unspecified, the result of
876- reduction may be non-deterministic unless the operation is associative and
877- commutative.
878-
879- The result of the reduce operation's body must have the same type as the
880- operands and associated result value of the parallel loop operation.
870+ "scf.reduce" is the terminator for "scf.parallel" operations. It can model
871+ an arbitrary number of reductions. It has one region per reduction. Each
872+ region has one block with two arguments which have the same type as the
873+ corresponding operand of "scf.reduce". The operands of the op are the values
874+ that should be reduce; one value per reduction.
875+
876+ The i-th reduction (i.e., the i-th region and the i-th operand) corresponds
877+ the i-th initial value and the i-th result of the enclosing "scf.parallel"
878+ op.
879+
880+ The "scf.reduce" operation contains regions whose entry blocks expect two
881+ arguments of the same type as the corresponding operand. As the iteration
882+ order of the enclosing parallel loop and hence reduction order is
883+ unspecified, the results of the reductions may be non-deterministic unless
884+ the reductions are associative and commutative.
885+
886+ The result of a reduction region ("scf.reduce.return" operand) must have the
887+ same type as the corresponding "scf.reduce" operand and the corresponding
888+ "scf.parallel" initial value.
889+
881890 Example:
882891
883892 ```mlir
884893 %operand = arith.constant 1.0 : f32
885- scf.reduce(%operand) : f32 {
894+ scf.reduce(%operand : f32) {
886895 ^bb0(%lhs : f32, %rhs: f32):
887896 %res = arith.addf %lhs, %rhs : f32
888897 scf.reduce.return %res : f32
@@ -892,14 +901,15 @@ def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
892901
893902 let skipDefaultBuilders = 1;
894903 let builders = [
895- OpBuilder<(ins "Value":$operand,
896- CArg<"function_ref<void (OpBuilder &, Location, Value, Value)>",
897- "nullptr">:$bodyBuilderFn)>
904+ OpBuilder<(ins "ValueRange":$operands)>,
905+ OpBuilder<(ins)>
898906 ];
899907
900- let arguments = (ins AnyType:$operand);
901- let hasCustomAssemblyFormat = 1;
902- let regions = (region SizedRegion<1>:$reductionOperator);
908+ let arguments = (ins Variadic<AnyType>:$operands);
909+ let assemblyFormat = [{
910+ (`(` $operands^ `:` type($operands) `)`)? $reductions attr-dict
911+ }];
912+ let regions = (region VariadicRegion<SizedRegion<1>>:$reductions);
903913 let hasRegionVerifier = 1;
904914}
905915
@@ -908,13 +918,14 @@ def ReduceOp : SCF_Op<"reduce", [HasParent<"ParallelOp">]> {
908918//===----------------------------------------------------------------------===//
909919
910920def ReduceReturnOp :
911- SCF_Op<"reduce.return", [HasParent<"ReduceOp">, Pure,
912- Terminator]> {
921+ SCF_Op<"reduce.return", [HasParent<"ReduceOp">, Pure, Terminator]> {
913922 let summary = "terminator for reduce operation";
914923 let description = [{
915924 "scf.reduce.return" is a special terminator operation for the block inside
916- "scf.reduce". It terminates the region. It should have the same type as
917- the operand of "scf.reduce". Example for the custom format:
925+ "scf.reduce" regions. It terminates the region. It should have the same
926+ operand type as the corresponding operand of the enclosing "scf.reduce" op.
927+
928+ Example:
918929
919930 ```mlir
920931 scf.reduce.return %res : f32
@@ -1150,7 +1161,7 @@ def IndexSwitchOp : SCF_Op<"index_switch", [RecursiveMemoryEffects,
11501161
11511162def YieldOp : SCF_Op<"yield", [Pure, ReturnLike, Terminator,
11521163 ParentOneOf<["ExecuteRegionOp", "ForOp", "IfOp", "IndexSwitchOp",
1153- "ParallelOp", " WhileOp"]>]> {
1164+ "WhileOp"]>]> {
11541165 let summary = "loop yield and termination operation";
11551166 let description = [{
11561167 "scf.yield" yields an SSA value from the SCF dialect op region and
0 commit comments