Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 68 additions & 7 deletions src/libraries/System.Private.CoreLib/src/System/Random.cs
Original file line number Diff line number Diff line change
Expand Up @@ -363,17 +363,78 @@ public void Shuffle<T>(T[] values)
/// </remarks>
public void Shuffle<T>(Span<T> values)
{
int n = values.Length;
if (_impl is XoshiroImpl xoshiro)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pattern is not so straight to embed implementation details of XoshiroImpl in base class. Let's see if others have better idea.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also seeing problems.
This implementation requires NextUInt64(), so currently XoshiroImpl is required.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather ImplBase have a virtual or abstract Shuffle method on it, which this public Shuffle would just delegate to on _impl. Then the XoshiroImpl-specific implementation can be an override.

{
ulong bound = 2432902008176640000; // 20!
int nextIndex = Math.Min(20, values.Length);

for (int i = 1; i < values.Length;)
{
ulong r = xoshiro.NextUInt64();

// Correct r to be unbiased.
// Based on https://github.com/swiftlang/swift/pull/39143
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may need to a link to the "original" idea for the algorithm, and potentially a brief description of it.

ulong rbound = r * bound;
if (rbound > 0 - bound)
{
ulong sum, carry;
do
{
ulong r2 = xoshiro.NextUInt64();
ulong lohi = Math.BigMul(r2, bound, out ulong lolo);
sum = rbound + lohi;
carry = sum < rbound ? 1ul : 0ul;
rbound = lolo;
} while (sum == ~0ul);
r += carry;
}

for (int i = 0; i < n - 1; i++)
// Shuffle the values ​​based on r
for (int m = i; m < nextIndex; m++)
{
int index = (int)Math.BigMul(r, (ulong)(m + 1), out r);

// Swap span[m] <-> span[index]
ref var head = ref MemoryMarshal.GetReference(values);
var t = Unsafe.Add(ref head, m);
Unsafe.Add(ref head, m) = Unsafe.Add(ref head, index);
Unsafe.Add(ref head, index) = t;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can the bound check be eliminated by JIT with idiomatic access? If not, consider to add assert about it.

Copy link
Member

@EgorBo EgorBo Jan 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if the bound checks are not removed please provide some strong motivation for it to exist as it is not aligned with our strategy to reduce unsafe code in the BCL.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I tried it, the benchmark results were very slightly worse.
When I checked the JIT code implemented with swapping without using Unsafe, I found that the amount of code jumping to CORINFO_HELP_RNGCHKFAIL was increasing.

Method Mean Error StdDev
shuffleUnseededInt10_Unsafe 10.086 ns 0.0783 ns 0.0694 ns
shuffleUnseededInt10_Safe 10.372 ns 0.1657 ns 0.1550 ns
shuffleUnseededInt1000_Unsafe 1,437.318 ns 21.1372 ns 19.7718 ns
shuffleUnseededInt1000_Safe 1,468.840 ns 18.4242 ns 17.2340 ns
JIT asm

// with Unsafe swap
// The reason why CORINFO_HELP_RNGCHKFAIL is also in the "with Unsafe" code is because the fallback code contains a normal array reference.

.NET 10.0.0 (42.42.42.42424), X64 RyuJIT AVX2

; System.Tests.Perf_Random_Mine.shuffleUnseededInt10()
;         [Benchmark] public void shuffleUnseededInt10() => _randomUnseeded.Shuffle(_destinationInt.AsSpan(..10));
;                                                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       7FFC77DD48D0 sub       rsp,38
       7FFC77DD48D4 xor       eax,eax
       7FFC77DD48D6 mov       [rsp+28],rax
       7FFC77DD48DB mov       rax,[rcx+8]
       7FFC77DD48DF mov       rdx,[rcx+18]
       7FFC77DD48E3 test      rdx,rdx
       7FFC77DD48E6 je        short M00_L01
       7FFC77DD48E8 mov       ecx,[rdx+8]
       7FFC77DD48EB cmp       ecx,0A
       7FFC77DD48EE jb        short M00_L00
       7FFC77DD48F0 add       rdx,10
       7FFC77DD48F4 mov       [rsp+28],rdx
       7FFC77DD48F9 mov       dword ptr [rsp+30],0A
       7FFC77DD4901 lea       rdx,[rsp+28]
       7FFC77DD4906 mov       rcx,rax
       7FFC77DD4909 cmp       [rcx],ecx
       7FFC77DD490B call      qword ptr [7FFC78247960]; System.Random.Shuffle[[System.Int32, System.Private.CoreLib]](System.Span`1<Int32>)
       7FFC77DD4911 nop
       7FFC77DD4912 add       rsp,38
       7FFC77DD4916 ret
M00_L00:
       7FFC77DD4917 call      qword ptr [7FFC7824D830]
       7FFC77DD491D int       3
M00_L01:
       7FFC77DD491E mov       ecx,2
       7FFC77DD4923 call      qword ptr [7FFC77D1FD38]
       7FFC77DD4929 int       3
; Total bytes of code 90
; System.Random.Shuffle[[System.Int32, System.Private.CoreLib]](System.Span`1<Int32>)
;             if (_impl is XoshiroImpl xoshiro)
;             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                 ulong bound = 2432902008176640000;        // 20!
;                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                 int nextIndex = Math.Min(20, values.Length);
;                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                 for (int i = 1; i < values.Length;)
;                      ^^^^^^^^^
;                     ulong r = xoshiro.NextUInt64();
;                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                     ulong rbound = r * bound;
;                     ^^^^^^^^^^^^^^^^^^^^^^^^^
;                     if (rbound > 0 - bound)
;                     ^^^^^^^^^^^^^^^^^^^^^^^
;                             ulong r2 = xoshiro.NextUInt64();
;                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                             sum = rbound + lohi;
;                             ^^^^^^^^^^^^^^^^^^^^
;                             carry = sum < rbound ? 1ul : 0ul;
;                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                             rbound = lolo;
;                             ^^^^^^^^^^^^^^
;                         } while (sum == ~0ul);
;                           ^^^^^^^^^^^^^^^^^^^^
;                         r += carry;
;                         ^^^^^^^^^^^
;                     for (int m = i; m < nextIndex; m++)
;                          ^^^^^^^^^
;                         int index = (int)Math.BigMul(r, (ulong)(m + 1), out r);
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                         ref var head = ref MemoryMarshal.GetReference(values);
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                         var t = Unsafe.Add(ref head, m);
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                         Unsafe.Add(ref head, m) = Unsafe.Add(ref head, index);
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                         Unsafe.Add(ref head, index) = t;
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                     i = nextIndex;
;                     ^^^^^^^^^^^^^^
;                     bound = (ulong)(i + 1);
;                     ^^^^^^^^^^^^^^^^^^^^^^^
;                     for (nextIndex = i + 1; nextIndex < values.Length; nextIndex++)
;                          ^^^^^^^^^^^^^^^^^
;                         if (Math.BigMul(bound, (ulong)(nextIndex + 1), out var newbound) == 0)
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                             bound = newbound;
;                             ^^^^^^^^^^^^^^^^^
;                 int n = values.Length;
;                 ^^^^^^^^^^^^^^^^^^^^^^
;                 for (int i = 0; i < n - 1; i++)
;                      ^^^^^^^^^
;                     int j = Next(i, n);
;                     ^^^^^^^^^^^^^^^^^^^
;                     if (j != i)
;                     ^^^^^^^^^^^
;                         T temp = values[i];
;                         ^^^^^^^^^^^^^^^^^^^
;                         values[i] = values[j];
;                         ^^^^^^^^^^^^^^^^^^^^^^
;                         values[j] = temp;
;                         ^^^^^^^^^^^^^^^^^
       7FFC77DD4AE0 push      r15
       7FFC77DD4AE2 push      r14
       7FFC77DD4AE4 push      r13
       7FFC77DD4AE6 push      rdi
       7FFC77DD4AE7 push      rsi
       7FFC77DD4AE8 push      rbp
       7FFC77DD4AE9 push      rbx
       7FFC77DD4AEA sub       rsp,40
       7FFC77DD4AEE mov       rbx,rcx
       7FFC77DD4AF1 mov       rsi,[rdx]
       7FFC77DD4AF4 mov       edi,[rdx+8]
       7FFC77DD4AF7 mov       rcx,[rbx+8]
       7FFC77DD4AFB test      rcx,rcx
       7FFC77DD4AFE je        near ptr M01_L09
       7FFC77DD4B04 mov       rdx,offset MT_System.Random+XoshiroImpl
       7FFC77DD4B0E cmp       [rcx],rdx
       7FFC77DD4B11 jne       near ptr M01_L09
       7FFC77DD4B17 mov       rdx,21C3677C82B40000
       7FFC77DD4B21 mov       r8d,14
       7FFC77DD4B27 cmp       edi,14
       7FFC77DD4B2A cmovl     r8d,edi
       7FFC77DD4B2E mov       eax,1
       7FFC77DD4B33 cmp       edi,1
       7FFC77DD4B36 jle       near ptr M01_L06
M01_L00:
       7FFC77DD4B3C mov       r10,[rcx+8]
       7FFC77DD4B40 mov       r9,[rcx+10]
       7FFC77DD4B44 mov       r11,[rcx+18]
       7FFC77DD4B48 mov       rbx,[rcx+20]
       7FFC77DD4B4C mov       rbp,r9
       7FFC77DD4B4F shl       rbp,11
       7FFC77DD4B53 xor       r11,r10
       7FFC77DD4B56 xor       rbx,r9
       7FFC77DD4B59 lea       r14,[r9+r9*4]
       7FFC77DD4B5D rol       r14,7
       7FFC77DD4B61 lea       r14,[r14+r14*8]
       7FFC77DD4B65 xor       r9,r11
       7FFC77DD4B68 xor       r10,rbx
       7FFC77DD4B6B xor       r11,rbp
       7FFC77DD4B6E rol       rbx,2D
       7FFC77DD4B72 mov       [rcx+8],r10
       7FFC77DD4B76 mov       [rcx+10],r9
       7FFC77DD4B7A mov       [rcx+18],r11
       7FFC77DD4B7E mov       [rcx+20],rbx
       7FFC77DD4B82 mov       r10,r14
       7FFC77DD4B85 imul      r10,rdx
       7FFC77DD4B89 mov       [rsp+38],rdx
       7FFC77DD4B8E mov       r9,rdx
       7FFC77DD4B91 neg       r9
       7FFC77DD4B94 cmp       r9,r10
       7FFC77DD4B97 jb        short M01_L04
M01_L01:
       7FFC77DD4B99 cmp       eax,r8d
       7FFC77DD4B9C jge       short M01_L03
       7FFC77DD4B9E xchg      ax,ax
M01_L02:
       7FFC77DD4BA0 lea       r11d,[rax+1]
       7FFC77DD4BA4 movsxd    r10,r11d
       7FFC77DD4BA7 lea       r9,[rsp+28]
       7FFC77DD4BAC mov       rdx,r14
       7FFC77DD4BAF mulx      rdx,rbx,r10
       7FFC77DD4BB4 mov       [r9],rbx
       7FFC77DD4BB7 mov       r14,[rsp+28]
       7FFC77DD4BBC cdqe
       7FFC77DD4BBE mov       r10d,[rsi+rax*4]
       7FFC77DD4BC2 movsxd    rdx,edx
       7FFC77DD4BC5 mov       r9d,[rsi+rdx*4]
       7FFC77DD4BC9 mov       [rsi+rax*4],r9d
       7FFC77DD4BCD mov       [rsi+rdx*4],r10d
       7FFC77DD4BD1 mov       eax,r11d
       7FFC77DD4BD4 cmp       eax,r8d
       7FFC77DD4BD7 jl        short M01_L02
M01_L03:
       7FFC77DD4BD9 mov       eax,r8d
       7FFC77DD4BDC lea       r8d,[rax+1]
       7FFC77DD4BE0 movsxd    r11,r8d
       7FFC77DD4BE3 cmp       r8d,edi
       7FFC77DD4BE6 jge       near ptr M01_L05
       7FFC77DD4BEC jmp       near ptr M01_L08
M01_L04:
       7FFC77DD4BF1 mov       r9,[rcx+8]
       7FFC77DD4BF5 mov       r11,[rcx+10]
       7FFC77DD4BF9 mov       rbx,[rcx+18]
       7FFC77DD4BFD mov       rbp,[rcx+20]
       7FFC77DD4C01 mov       r15,r11
       7FFC77DD4C04 shl       r15,11
       7FFC77DD4C08 xor       rbx,r9
       7FFC77DD4C0B xor       rbp,r11
       7FFC77DD4C0E lea       r13,[r11+r11*4]
       7FFC77DD4C12 rol       r13,7
       7FFC77DD4C16 lea       r13,[r13+r13*8]
       7FFC77DD4C1B xor       r11,rbx
       7FFC77DD4C1E xor       r9,rbp
       7FFC77DD4C21 xor       rbx,r15
       7FFC77DD4C24 rol       rbp,2D
       7FFC77DD4C28 mov       [rcx+8],r9
       7FFC77DD4C2C mov       [rcx+10],r11
       7FFC77DD4C30 mov       [rcx+18],rbx
       7FFC77DD4C34 mov       [rcx+20],rbp
       7FFC77DD4C38 lea       r9,[rsp+30]
       7FFC77DD4C3D mov       r11,[rsp+38]
       7FFC77DD4C42 mov       rdx,r13
       7FFC77DD4C45 mulx      rdx,rbx,r11
       7FFC77DD4C4A mov       [r9],rbx
       7FFC77DD4C4D mov       r9,[rsp+30]
       7FFC77DD4C52 add       rdx,r10
       7FFC77DD4C55 cmp       rdx,r10
       7FFC77DD4C58 setb      r10b
       7FFC77DD4C5C movzx     r10d,r10b
       7FFC77DD4C60 cmp       rdx,0FFFFFFFFFFFFFFFF
       7FFC77DD4C64 mov       [rsp+38],r11
       7FFC77DD4C69 je        short M01_L07
       7FFC77DD4C6B add       r14,r10
       7FFC77DD4C6E jmp       near ptr M01_L01
M01_L05:
       7FFC77DD4C73 cmp       eax,edi
       7FFC77DD4C75 mov       rdx,r11
       7FFC77DD4C78 jl        near ptr M01_L00
M01_L06:
       7FFC77DD4C7E add       rsp,40
       7FFC77DD4C82 pop       rbx
       7FFC77DD4C83 pop       rbp
       7FFC77DD4C84 pop       rsi
       7FFC77DD4C85 pop       rdi
       7FFC77DD4C86 pop       r13
       7FFC77DD4C88 pop       r14
       7FFC77DD4C8A pop       r15
       7FFC77DD4C8C ret
M01_L07:
       7FFC77DD4C8D mov       r10,r9
       7FFC77DD4C90 jmp       near ptr M01_L04
M01_L08:
       7FFC77DD4C95 lea       edx,[r8+1]
       7FFC77DD4C99 movsxd    r10,edx
       7FFC77DD4C9C lea       r9,[rsp+20]
       7FFC77DD4CA1 mov       rdx,r11
       7FFC77DD4CA4 mulx      rdx,rbx,r10
       7FFC77DD4CA9 mov       [r9],rbx
       7FFC77DD4CAC mov       r10,[rsp+20]
       7FFC77DD4CB1 test      rdx,rdx
       7FFC77DD4CB4 jne       short M01_L05
       7FFC77DD4CB6 mov       r11,r10
       7FFC77DD4CB9 inc       r8d
       7FFC77DD4CBC cmp       r8d,edi
       7FFC77DD4CBF jl        short M01_L08
       7FFC77DD4CC1 jmp       short M01_L05
M01_L09:
       7FFC77DD4CC3 mov       ebp,edi
       7FFC77DD4CC5 xor       r14d,r14d
M01_L10:
       7FFC77DD4CC8 lea       ecx,[rdi-1]
       7FFC77DD4CCB cmp       r14d,ecx
       7FFC77DD4CCE jge       short M01_L06
       7FFC77DD4CD0 mov       rcx,rbx
       7FFC77DD4CD3 mov       edx,r14d
       7FFC77DD4CD6 mov       r8d,edi
       7FFC77DD4CD9 mov       rax,[rbx]
       7FFC77DD4CDC mov       rax,[rax+40]
       7FFC77DD4CE0 call      qword ptr [rax+30]
       7FFC77DD4CE3 cmp       eax,r14d
       7FFC77DD4CE6 je        short M01_L11
       7FFC77DD4CE8 mov       ecx,[rsi+r14*4]
       7FFC77DD4CEC cmp       eax,ebp
       7FFC77DD4CEE jae       short M01_L12
       7FFC77DD4CF0 mov       edx,eax
       7FFC77DD4CF2 mov       edx,[rsi+rdx*4]
       7FFC77DD4CF5 mov       [rsi+r14*4],edx
       7FFC77DD4CF9 mov       eax,eax
       7FFC77DD4CFB mov       [rsi+rax*4],ecx
M01_L11:
       7FFC77DD4CFE inc       r14d
       7FFC77DD4D01 jmp       short M01_L10
M01_L12:
       7FFC77DD4D03 call      CORINFO_HELP_RNGCHKFAIL
       7FFC77DD4D08 int       3
; Total bytes of code 553

// without Unsafe swap
// The jump to CORINFO_HELP_RNGCHKFAIL has increased to three places, indicating that the range check has not been removed by JIT.

.NET 10.0.0 (42.42.42.42424), X64 RyuJIT AVX2

; System.Tests.Perf_Random_Mine.shuffleUnseededInt10()
;         [Benchmark] public void shuffleUnseededInt10() => _randomUnseeded.Shuffle(_destinationInt.AsSpan(..10));
;                                                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
       7FFC622C48D0 sub       rsp,38
       7FFC622C48D4 xor       eax,eax
       7FFC622C48D6 mov       [rsp+28],rax
       7FFC622C48DB mov       rax,[rcx+8]
       7FFC622C48DF mov       rdx,[rcx+18]
       7FFC622C48E3 test      rdx,rdx
       7FFC622C48E6 je        short M00_L01
       7FFC622C48E8 mov       ecx,[rdx+8]
       7FFC622C48EB cmp       ecx,0A
       7FFC622C48EE jb        short M00_L00
       7FFC622C48F0 add       rdx,10
       7FFC622C48F4 mov       [rsp+28],rdx
       7FFC622C48F9 mov       dword ptr [rsp+30],0A
       7FFC622C4901 lea       rdx,[rsp+28]
       7FFC622C4906 mov       rcx,rax
       7FFC622C4909 cmp       [rcx],ecx
       7FFC622C490B call      qword ptr [7FFC627379C0]; System.Random.Shuffle[[System.Int32, System.Private.CoreLib]](System.Span`1<Int32>)
       7FFC622C4911 nop
       7FFC622C4912 add       rsp,38
       7FFC622C4916 ret
M00_L00:
       7FFC622C4917 call      qword ptr [7FFC6273D878]
       7FFC622C491D int       3
M00_L01:
       7FFC622C491E mov       ecx,2
       7FFC622C4923 call      qword ptr [7FFC6220FD38]
       7FFC622C4929 int       3
; Total bytes of code 90
; System.Random.Shuffle[[System.Int32, System.Private.CoreLib]](System.Span`1<Int32>)
;             if (_impl is XoshiroImpl xoshiro)
;             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                 ulong bound = 2432902008176640000;        // 20!
;                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                 int nextIndex = Math.Min(20, values.Length);
;                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                 for (int i = 1; i < values.Length;)
;                      ^^^^^^^^^
;                     ulong r = xoshiro.NextUInt64();
;                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                     ulong rbound = r * bound;
;                     ^^^^^^^^^^^^^^^^^^^^^^^^^
;                     if (rbound > 0 - bound)
;                     ^^^^^^^^^^^^^^^^^^^^^^^
;                             ulong r2 = xoshiro.NextUInt64();
;                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                             sum = rbound + lohi;
;                             ^^^^^^^^^^^^^^^^^^^^
;                             carry = sum < rbound ? 1ul : 0ul;
;                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                             rbound = lolo;
;                             ^^^^^^^^^^^^^^
;                         } while (sum == ~0ul);
;                           ^^^^^^^^^^^^^^^^^^^^
;                         r += carry;
;                         ^^^^^^^^^^^
;                     for (int m = i; m < nextIndex; m++)
;                          ^^^^^^^^^
;                         int index = (int)Math.BigMul(r, (ulong)(m + 1), out r);
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                         var temp = values[m];
;                         ^^^^^^^^^^^^^^^^^^^^^
;                         values[m] = values[index];
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^
;                         values[index] = temp;
;                         ^^^^^^^^^^^^^^^^^^^^^
;                     i = nextIndex;
;                     ^^^^^^^^^^^^^^
;                     bound = (ulong)(i + 1);
;                     ^^^^^^^^^^^^^^^^^^^^^^^
;                     for (nextIndex = i + 1; nextIndex < values.Length; nextIndex++)
;                          ^^^^^^^^^^^^^^^^^
;                         if (Math.BigMul(bound, (ulong)(nextIndex + 1), out var newbound) == 0)
;                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;                             bound = newbound;
;                             ^^^^^^^^^^^^^^^^^
;                 int n = values.Length;
;                 ^^^^^^^^^^^^^^^^^^^^^^
;                 for (int i = 0; i < n - 1; i++)
;                      ^^^^^^^^^
;                     int j = Next(i, n);
;                     ^^^^^^^^^^^^^^^^^^^
;                     if (j != i)
;                     ^^^^^^^^^^^
;                         T temp = values[i];
;                         ^^^^^^^^^^^^^^^^^^^
;                         values[i] = values[j];
;                         ^^^^^^^^^^^^^^^^^^^^^^
;                         values[j] = temp;
;                         ^^^^^^^^^^^^^^^^^
       7FFC622C4AE0 push      r15
       7FFC622C4AE2 push      r14
       7FFC622C4AE4 push      r13
       7FFC622C4AE6 push      rdi
       7FFC622C4AE7 push      rsi
       7FFC622C4AE8 push      rbp
       7FFC622C4AE9 push      rbx
       7FFC622C4AEA sub       rsp,40
       7FFC622C4AEE mov       rbx,rcx
       7FFC622C4AF1 mov       rsi,[rdx]
       7FFC622C4AF4 mov       edi,[rdx+8]
       7FFC622C4AF7 mov       rcx,[rbx+8]
       7FFC622C4AFB test      rcx,rcx
       7FFC622C4AFE je        near ptr M01_L09
       7FFC622C4B04 mov       rdx,offset MT_System.Random+XoshiroImpl
       7FFC622C4B0E cmp       [rcx],rdx
       7FFC622C4B11 jne       near ptr M01_L09
       7FFC622C4B17 mov       rdx,21C3677C82B40000
       7FFC622C4B21 mov       r8d,14
       7FFC622C4B27 cmp       edi,14
       7FFC622C4B2A cmovl     r8d,edi
       7FFC622C4B2E mov       eax,1
       7FFC622C4B33 cmp       edi,1
       7FFC622C4B36 jle       near ptr M01_L06
M01_L00:
       7FFC622C4B3C mov       r10,[rcx+8]
       7FFC622C4B40 mov       r9,[rcx+10]
       7FFC622C4B44 mov       r11,[rcx+18]
       7FFC622C4B48 mov       rbx,[rcx+20]
       7FFC622C4B4C mov       rbp,r9
       7FFC622C4B4F shl       rbp,11
       7FFC622C4B53 xor       r11,r10
       7FFC622C4B56 xor       rbx,r9
       7FFC622C4B59 lea       r14,[r9+r9*4]
       7FFC622C4B5D rol       r14,7
       7FFC622C4B61 lea       r14,[r14+r14*8]
       7FFC622C4B65 xor       r9,r11
       7FFC622C4B68 xor       r10,rbx
       7FFC622C4B6B xor       r11,rbp
       7FFC622C4B6E rol       rbx,2D
       7FFC622C4B72 mov       [rcx+8],r10
       7FFC622C4B76 mov       [rcx+10],r9
       7FFC622C4B7A mov       [rcx+18],r11
       7FFC622C4B7E mov       [rcx+20],rbx
       7FFC622C4B82 mov       r10,r14
       7FFC622C4B85 imul      r10,rdx
       7FFC622C4B89 mov       [rsp+38],rdx
       7FFC622C4B8E mov       r9,rdx
       7FFC622C4B91 neg       r9
       7FFC622C4B94 cmp       r9,r10
       7FFC622C4B97 jb        short M01_L04
M01_L01:
       7FFC622C4B99 cmp       eax,r8d
       7FFC622C4B9C jge       short M01_L03
       7FFC622C4B9E xchg      ax,ax
M01_L02:
       7FFC622C4BA0 lea       r11d,[rax+1]
       7FFC622C4BA4 movsxd    r10,r11d
       7FFC622C4BA7 lea       r9,[rsp+28]
       7FFC622C4BAC mov       rdx,r14
       7FFC622C4BAF mulx      rdx,rbx,r10
       7FFC622C4BB4 mov       [r9],rbx
       7FFC622C4BB7 mov       r14,[rsp+28]
       7FFC622C4BBC cmp       eax,edi
       7FFC622C4BBE jae       near ptr M01_L12
       7FFC622C4BC4 mov       eax,eax
       7FFC622C4BC6 mov       r10d,[rsi+rax*4]
       7FFC622C4BCA cmp       edx,edi
       7FFC622C4BCC jae       near ptr M01_L12
       7FFC622C4BD2 mov       edx,edx
       7FFC622C4BD4 mov       r9d,[rsi+rdx*4]
       7FFC622C4BD8 mov       [rsi+rax*4],r9d
       7FFC622C4BDC mov       [rsi+rdx*4],r10d
       7FFC622C4BE0 mov       eax,r11d
       7FFC622C4BE3 cmp       eax,r8d
       7FFC622C4BE6 jl        short M01_L02
M01_L03:
       7FFC622C4BE8 mov       eax,r8d
       7FFC622C4BEB lea       r8d,[rax+1]
       7FFC622C4BEF movsxd    r11,r8d
       7FFC622C4BF2 cmp       r8d,edi
       7FFC622C4BF5 jge       near ptr M01_L05
       7FFC622C4BFB jmp       near ptr M01_L08
M01_L04:
       7FFC622C4C00 mov       r9,[rcx+8]
       7FFC622C4C04 mov       r11,[rcx+10]
       7FFC622C4C08 mov       rbx,[rcx+18]
       7FFC622C4C0C mov       rbp,[rcx+20]
       7FFC622C4C10 mov       r15,r11
       7FFC622C4C13 shl       r15,11
       7FFC622C4C17 xor       rbx,r9
       7FFC622C4C1A xor       rbp,r11
       7FFC622C4C1D lea       r13,[r11+r11*4]
       7FFC622C4C21 rol       r13,7
       7FFC622C4C25 lea       r13,[r13+r13*8]
       7FFC622C4C2A xor       r11,rbx
       7FFC622C4C2D xor       r9,rbp
       7FFC622C4C30 xor       rbx,r15
       7FFC622C4C33 rol       rbp,2D
       7FFC622C4C37 mov       [rcx+8],r9
       7FFC622C4C3B mov       [rcx+10],r11
       7FFC622C4C3F mov       [rcx+18],rbx
       7FFC622C4C43 mov       [rcx+20],rbp
       7FFC622C4C47 lea       r9,[rsp+30]
       7FFC622C4C4C mov       r11,[rsp+38]
       7FFC622C4C51 mov       rdx,r13
       7FFC622C4C54 mulx      rdx,rbx,r11
       7FFC622C4C59 mov       [r9],rbx
       7FFC622C4C5C mov       r9,[rsp+30]
       7FFC622C4C61 add       rdx,r10
       7FFC622C4C64 cmp       rdx,r10
       7FFC622C4C67 setb      r10b
       7FFC622C4C6B movzx     r10d,r10b
       7FFC622C4C6F cmp       rdx,0FFFFFFFFFFFFFFFF
       7FFC622C4C73 mov       [rsp+38],r11
       7FFC622C4C78 je        short M01_L07
       7FFC622C4C7A add       r14,r10
       7FFC622C4C7D jmp       near ptr M01_L01
M01_L05:
       7FFC622C4C82 cmp       eax,edi
       7FFC622C4C84 mov       rdx,r11
       7FFC622C4C87 jl        near ptr M01_L00
M01_L06:
       7FFC622C4C8D add       rsp,40
       7FFC622C4C91 pop       rbx
       7FFC622C4C92 pop       rbp
       7FFC622C4C93 pop       rsi
       7FFC622C4C94 pop       rdi
       7FFC622C4C95 pop       r13
       7FFC622C4C97 pop       r14
       7FFC622C4C99 pop       r15
       7FFC622C4C9B ret
M01_L07:
       7FFC622C4C9C mov       r10,r9
       7FFC622C4C9F jmp       near ptr M01_L04
M01_L08:
       7FFC622C4CA4 lea       edx,[r8+1]
       7FFC622C4CA8 movsxd    r10,edx
       7FFC622C4CAB lea       r9,[rsp+20]
       7FFC622C4CB0 mov       rdx,r11
       7FFC622C4CB3 mulx      rdx,rbx,r10
       7FFC622C4CB8 mov       [r9],rbx
       7FFC622C4CBB mov       r10,[rsp+20]
       7FFC622C4CC0 test      rdx,rdx
       7FFC622C4CC3 jne       short M01_L05
       7FFC622C4CC5 mov       r11,r10
       7FFC622C4CC8 inc       r8d
       7FFC622C4CCB cmp       r8d,edi
       7FFC622C4CCE jl        short M01_L08
       7FFC622C4CD0 jmp       short M01_L05
M01_L09:
       7FFC622C4CD2 mov       ebp,edi
       7FFC622C4CD4 xor       r14d,r14d
M01_L10:
       7FFC622C4CD7 lea       ecx,[rdi-1]
       7FFC622C4CDA cmp       r14d,ecx
       7FFC622C4CDD jge       short M01_L06
       7FFC622C4CDF mov       rcx,rbx
       7FFC622C4CE2 mov       edx,r14d
       7FFC622C4CE5 mov       r8d,edi
       7FFC622C4CE8 mov       rax,[rbx]
       7FFC622C4CEB mov       rax,[rax+40]
       7FFC622C4CEF call      qword ptr [rax+30]
       7FFC622C4CF2 cmp       eax,r14d
       7FFC622C4CF5 je        short M01_L11
       7FFC622C4CF7 mov       ecx,[rsi+r14*4]
       7FFC622C4CFB cmp       eax,ebp
       7FFC622C4CFD jae       short M01_L12
       7FFC622C4CFF mov       edx,eax
       7FFC622C4D01 mov       edx,[rsi+rdx*4]
       7FFC622C4D04 mov       [rsi+r14*4],edx
       7FFC622C4D08 mov       eax,eax
       7FFC622C4D0A mov       [rsi+rax*4],ecx
M01_L11:
       7FFC622C4D0D inc       r14d
       7FFC622C4D10 jmp       short M01_L10
M01_L12:
       7FFC622C4D12 call      CORINFO_HELP_RNGCHKFAIL
       7FFC622C4D17 int       3
; Total bytes of code 568

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andanteyk I guess the end results imply that the unsafe code is not worth it

}

i = nextIndex;

// Calculates bound.
// bound is (i + 1) * (i + 2) * ... * (nextIndex) < 2^64
bound = (ulong)(i + 1);
for (nextIndex = i + 1; nextIndex < values.Length; nextIndex++)
{
if (Math.BigMul(bound, (ulong)(nextIndex + 1), out var newbound) == 0)
{
bound = newbound;
}
else
{
break;
}
}
}

}
else
{
int j = Next(i, n);
// Fallback & compatibility implementation
int n = values.Length;

if (j != i)
for (int i = 0; i < n - 1; i++)
{
T temp = values[i];
values[i] = values[j];
values[j] = temp;
int j = Next(i, n);

if (j != i)
{
T temp = values[i];
values[i] = values[j];
values[j] = temp;
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;

namespace System.Security.Cryptography
Expand Down Expand Up @@ -288,17 +289,60 @@ public static string GetHexString(int stringLength, bool lowercase = false)
/// <typeparam name="T">The type of span.</typeparam>
public static void Shuffle<T>(Span<T> values)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changes for cryptographic RNG may subject to more careful review for security requirements.

{
int n = values.Length;
ulong bound = 2432902008176640000; // 20!
int nextIndex = Math.Min(20, values.Length);

for (int i = 0; i < n - 1; i++)
for (int i = 1; i < values.Length;)
{
int j = GetInt32(i, n);
ulong r = 0;
RandomNumberGeneratorImplementation.FillSpan(MemoryMarshal.AsBytes(new Span<ulong>(ref r)));

if (i != j)
// Correct r to be unbiased.
// Based on https://github.com/swiftlang/swift/pull/39143
ulong rbound = r * bound;
if (rbound > 0 - bound)
{
T temp = values[i];
values[i] = values[j];
values[j] = temp;
ulong sum, carry;
do
{
ulong r2 = 0;
RandomNumberGeneratorImplementation.FillSpan(MemoryMarshal.AsBytes(new Span<ulong>(ref r2)));

ulong lohi = Math.BigMul(r2, bound, out ulong lolo);
sum = rbound + lohi;
carry = sum < rbound ? 1ul : 0ul;
rbound = lolo;
} while (sum == ~0ul);
r += carry;
}

// Shuffle the values ​​based on r
for (int m = i; m < nextIndex; m++)
{
int index = (int)Math.BigMul(r, (ulong)(m + 1), out r);

// Swap span[m] <-> span[index]
ref var head = ref MemoryMarshal.GetReference(values);
var t = Unsafe.Add(ref head, m);
Unsafe.Add(ref head, m) = Unsafe.Add(ref head, index);
Unsafe.Add(ref head, index) = t;
}

i = nextIndex;

// Calculates bound.
// bound is (i + 1) * (i + 2) * ... * (nextIndex) < 2^64
bound = (ulong)(i + 1);
for (nextIndex = i + 1; nextIndex < values.Length; nextIndex++)
{
if (Math.BigMul(bound, (ulong)(nextIndex + 1), out var newbound) == 0)
{
bound = newbound;
}
else
{
break;
}
}
}
}
Expand Down
Loading