-
Notifications
You must be signed in to change notification settings - Fork 615
feat: Montgomery optimisation (partial) #12822
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
fc47469
2d62cb3
596070b
b30acd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -514,6 +514,36 @@ constexpr void field<T>::wasm_reduce(uint64_t& result_0, | |
| result_7 += k * wasm_modulus[7]; | ||
| result_8 += k * wasm_modulus[8]; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Perform 29-bit montgomery reduction on 1 limb using Yuval's method * | ||
| * @details https://hackmd.io/@Ingonyama/Barret-Montgomery | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. quite a nice document! |
||
| * | ||
| */ | ||
| template <class T> | ||
| constexpr void field<T>::wasm_reduce_yuval(uint64_t& result_0, | ||
| uint64_t& result_1, | ||
| uint64_t& result_2, | ||
| uint64_t& result_3, | ||
| uint64_t& result_4, | ||
| uint64_t& result_5, | ||
| uint64_t& result_6, | ||
| uint64_t& result_7, | ||
| uint64_t& result_8, | ||
| uint64_t& result_9) | ||
| { | ||
| constexpr uint64_t mask = 0x1fffffff; | ||
| const uint64_t result_0_masked = result_0 & mask; | ||
| result_1 += result_0_masked * wasm_r_inv[0] + (result_0 >> WASM_LIMB_BITS); | ||
| result_2 += result_0_masked * wasm_r_inv[1]; | ||
| result_3 += result_0_masked * wasm_r_inv[2]; | ||
| result_4 += result_0_masked * wasm_r_inv[3]; | ||
| result_5 += result_0_masked * wasm_r_inv[4]; | ||
| result_6 += result_0_masked * wasm_r_inv[5]; | ||
| result_7 += result_0_masked * wasm_r_inv[6]; | ||
| result_8 += result_0_masked * wasm_r_inv[7]; | ||
| result_9 += result_0_masked * wasm_r_inv[8]; | ||
| } | ||
| /** | ||
| * @brief Convert 4 64-bit limbs into 9 29-bit limbs | ||
| * | ||
|
|
@@ -617,14 +647,24 @@ template <class T> constexpr field<T> field<T>::montgomery_mul(const field& othe | |
| wasm_madd(left[6], right, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| wasm_madd(left[7], right, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
| wasm_madd(left[8], right, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); | ||
| wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); | ||
| wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); | ||
| wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); | ||
| wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); | ||
| wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); | ||
| wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); | ||
| wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
|
|
||
| wasm_reduce_yuval(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); | ||
| wasm_reduce_yuval(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); | ||
| wasm_reduce_yuval(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); | ||
| wasm_reduce_yuval(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); | ||
| wasm_reduce_yuval(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); | ||
| wasm_reduce_yuval(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| wasm_reduce_yuval(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
| wasm_reduce_yuval(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); | ||
|
|
||
| // wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left these in in case there is some unforseen issue that is not picked up by CI. Will allow us to quickly switch back
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add some comments that clarify the presence of the commented out code and what exactly would be needed to turn off the new thing and reenable the old thing if that were to be needed? maybe wrapping some of the logic in methods would be helpful. I'm assuming leaving the final calls to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| // wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); | ||
| // wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); | ||
| // wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); | ||
| // wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); | ||
| // wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); | ||
| // wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| // wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
| wasm_reduce(temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); | ||
|
|
||
| // Convert result to unrelaxed form (all limbs are 29 bits) | ||
|
|
@@ -804,14 +844,24 @@ template <class T> constexpr field<T> field<T>::montgomery_square() const noexce | |
| temp_16 += left[8] * left[8]; | ||
|
|
||
| // Perform reductions | ||
| wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); | ||
| wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); | ||
| wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); | ||
| wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); | ||
| wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); | ||
| wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); | ||
| wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
|
|
||
| wasm_reduce_yuval(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); | ||
| wasm_reduce_yuval(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); | ||
| wasm_reduce_yuval(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); | ||
| wasm_reduce_yuval(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); | ||
| wasm_reduce_yuval(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); | ||
| wasm_reduce_yuval(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| wasm_reduce_yuval(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
| wasm_reduce_yuval(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); | ||
|
|
||
| // wasm_reduce(temp_0, temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8); | ||
| // wasm_reduce(temp_1, temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9); | ||
| // wasm_reduce(temp_2, temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10); | ||
| // wasm_reduce(temp_3, temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11); | ||
| // wasm_reduce(temp_4, temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12); | ||
| // wasm_reduce(temp_5, temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13); | ||
| // wasm_reduce(temp_6, temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14); | ||
| // wasm_reduce(temp_7, temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15); | ||
| wasm_reduce(temp_8, temp_9, temp_10, temp_11, temp_12, temp_13, temp_14, temp_15, temp_16); | ||
|
|
||
| // Convert to unrelaxed 29-bit form | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm sure you wanted to maintain the absolutely pristine absence of comments in this file but would you mind adding some anyway :). E.g. a note about the significance of
r. Also, it seems like many of these constants were computed using the python script included in this PR? If so can you make a note of that in the comments?