-
Notifications
You must be signed in to change notification settings - Fork 299
Regulate reduction accumulator operations and Element-wise operations #274
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
25e3d0e
9d7870b
9237f1c
cdbbbbb
a752b11
8eb5387
053a212
61d01e7
d5bbe7d
aefaf5e
f9b8804
b271740
27dc13f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -39,16 +39,14 @@ using CLayout = ck::tensor_layout::gemm::RowMajor; | |
| using AElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using BElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using CElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using D0ReduceOp = ck::reduce::Add<ReduceAccDataType>; | ||
| using D1ReduceOp = ck::reduce::Add<ReduceAccDataType>; | ||
| using D0ReduceOp = ck::reduce::Add; | ||
| using D1ReduceOp = ck::reduce::Add; | ||
| using DxsReduceOp = ck::Tuple<D0ReduceOp, D1ReduceOp>; | ||
|
|
||
| using UnaryIdenticElementOp = | ||
| ck::tensor_operation::element_wise::UnaryIdentic<ReduceAccDataType, ReduceAccDataType, false>; | ||
| using UnarySquareElementOp = | ||
| ck::tensor_operation::element_wise::UnarySquare<ReduceAccDataType, ReduceAccDataType, false>; | ||
| using DxsInElementOp = ck::Tuple<UnaryIdenticElementOp, UnarySquareElementOp>; | ||
| using DxsOutElementOp = ck::Tuple<UnaryIdenticElementOp, UnaryIdenticElementOp>; | ||
| using UnaryIdenticElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using UnarySquareElementOp = ck::tensor_operation::element_wise::UnarySquare; | ||
| using DxsInElementOp = ck::Tuple<UnaryIdenticElementOp, UnarySquareElementOp>; | ||
| using DxsOutElementOp = ck::Tuple<UnaryIdenticElementOp, UnaryIdenticElementOp>; | ||
|
|
||
| using DGlobalMemOp = | ||
| ck::InMemoryDataOperationEnumSequence<ck::InMemoryDataOperationEnum::AtomicAdd, | ||
|
|
@@ -259,8 +257,8 @@ int main(int argc, char* argv[]) | |
| { | ||
| for(int m = 0; m < M; ++m) | ||
| { | ||
| float d0_acc = d0_reduce_op.GetIdentityValue(); | ||
| float d1_acc = d1_reduce_op.GetIdentityValue(); | ||
| float d0_acc = d0_reduce_op.GetIdentityValue<float>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corrected in ba0ce |
||
| float d1_acc = d1_reduce_op.GetIdentityValue<float>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corrected in ba0ce |
||
|
|
||
| for(int n = 0; n < N; ++n) | ||
| { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -45,17 +45,14 @@ using CLayout = ck::tensor_layout::gemm::RowMajor; | |
| using AElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using BElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using CElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using ReduceSumOp = ck::reduce::Add<ReduceAccDataType>; | ||
| using ReduceSumOp = ck::reduce::Add; | ||
| using DxsReduceOp = ck::Tuple<ReduceSumOp, ReduceSumOp>; | ||
|
|
||
| using UnaryIdenticElementOp = | ||
| ck::tensor_operation::element_wise::UnaryIdentic<ReduceAccDataType, ReduceAccDataType, false>; | ||
| using UnaryDivElementOp = | ||
| ck::tensor_operation::element_wise::UnaryIdentic<ReduceAccDataType, ReduceAccDataType, true>; | ||
| using UnarySquareElementOp = | ||
| ck::tensor_operation::element_wise::UnarySquare<ReduceAccDataType, ReduceAccDataType, false>; | ||
| using DxsInElementOp = ck::Tuple<UnaryIdenticElementOp, UnarySquareElementOp>; | ||
| using DxsOutElementOp = ck::Tuple<UnaryDivElementOp, UnaryDivElementOp>; | ||
| using UnaryIdenticElementOp = ck::tensor_operation::element_wise::PassThrough; | ||
| using UnaryDivElementOp = ck::tensor_operation::element_wise::UnaryDivide; | ||
| using UnarySquareElementOp = ck::tensor_operation::element_wise::UnarySquare; | ||
| using DxsInElementOp = ck::Tuple<UnaryIdenticElementOp, UnarySquareElementOp>; | ||
| using DxsOutElementOp = ck::Tuple<UnaryDivElementOp, UnaryDivElementOp>; | ||
|
|
||
| using DxsGlobalMemOp = | ||
| ck::InMemoryDataOperationEnumSequence<ck::InMemoryDataOperationEnum::AtomicAdd, | ||
|
|
@@ -157,8 +154,8 @@ void host_gemm_layernorm(Tensor<LayerNormOutDataType>& out_m_n, | |
| auto reduceSumOpInst = ReduceSumOp{}; | ||
| for(int m = 0; m < M; ++m) | ||
| { | ||
| float mean_acc = reduceSumOpInst.GetIdentityValue(); | ||
| float square_mean_acc = reduceSumOpInst.GetIdentityValue(); | ||
| float mean_acc = reduceSumOpInst.GetIdentityValue<float>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corrected in ba0ce |
||
| float square_mean_acc = reduceSumOpInst.GetIdentityValue<float>(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corrected in ba0ce |
||
|
|
||
| for(int n = 0; n < N; ++n) | ||
| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Originally
using D0ReduceOp = ck::reduce::Add<ReduceAccDataType>;So now it should be
float d0_acc = d0_reduce_op.GetIdentityValue<ReduceAccDataType>();There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, this example need a hack with regard to the using of "AccDataType"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Corrected in ba0ce