diff --git a/src/libraries/System.Threading.Tasks.Parallel/tests/ParallelForEachAsyncTests.cs b/src/libraries/System.Threading.Tasks.Parallel/tests/ParallelForEachAsyncTests.cs index 77c16ddd72c928..093d35bb55e0e9 100644 --- a/src/libraries/System.Threading.Tasks.Parallel/tests/ParallelForEachAsyncTests.cs +++ b/src/libraries/System.Threading.Tasks.Parallel/tests/ParallelForEachAsyncTests.cs @@ -1393,6 +1393,32 @@ static async IAsyncEnumerable Iterate() await t; } + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsThreadingSupported))] + public async Task GitHubIssue_114262_Async() + { + var options = new ParallelOptions + { + MaxDegreeOfParallelism = 5, + CancellationToken = new CancellationTokenSource().Token + }; + + var range = Enumerable.Range(1, 1000); + + for (int i = 0; i < 100; i++) + { + await Parallel.ForEachAsync(range, options, async (data, token) => + { + for (int i = 0; i < 5; i++) + { + await Task.Yield(); + var buffer = new byte[10_000]; + await Task.Run(() => {var _ = buffer[0];} ); + await Task.Yield(); + } + }); + } + } + private static async IAsyncEnumerable EnumerableRangeAsync(int start, int count, bool yield = true) { for (int i = start; i < start + count; i++) diff --git a/src/mono/mono/utils/atomic.h b/src/mono/mono/utils/atomic.h index 3f0a01e6beb428..9d4459d932fb1a 100644 --- a/src/mono/mono/utils/atomic.h +++ b/src/mono/mono/utils/atomic.h @@ -95,11 +95,25 @@ Apple targets have historically being problematic, xcode 4.6 would miscompile th #include +#if defined(HOST_ARM64) +// C11 atomics on ARM64 offers a weaker version of sequential consistent, not expected by mono atomics operations. +// C11 seq_cst on ARM64 corresponds to acquire/release semantics, but mono expects these functions to emit a full memory +// barrier preventing any kind of reordering around the atomic operation. GCC atomics on ARM64 had similar limitations, +// see comments on GCC atomics below and mono injected full memory barriers around GCC atomic functions to mitigate this. +// Since mono GCC atomics implementation ended up even stronger (full memory barrier before/after), the C11 atomics +// implementation is still a little weaker, but should correspond to the exact same semantics as implemented by JIT +// compiler for sequential consistent atomic load/store/add/exchange/cas op codes on ARM64. +#define C11_MEMORY_ORDER_SEQ_CST() atomic_thread_fence (memory_order_seq_cst) +#else +#define C11_MEMORY_ORDER_SEQ_CST() +#endif + static inline guint8 mono_atomic_cas_u8 (volatile guint8 *dest, guint8 exch, guint8 comp) { g_static_assert (sizeof (atomic_char) == sizeof (*dest) && ATOMIC_CHAR_LOCK_FREE == 2); (void)atomic_compare_exchange_strong ((volatile atomic_char *)dest, (char*)&comp, exch); + C11_MEMORY_ORDER_SEQ_CST (); return comp; } @@ -108,6 +122,7 @@ mono_atomic_cas_u16 (volatile guint16 *dest, guint16 exch, guint16 comp) { g_static_assert (sizeof (atomic_short) == sizeof (*dest) && ATOMIC_SHORT_LOCK_FREE == 2); (void)atomic_compare_exchange_strong ((volatile atomic_short *)dest, (short*)&comp, exch); + C11_MEMORY_ORDER_SEQ_CST (); return comp; } @@ -116,6 +131,7 @@ mono_atomic_cas_i32 (volatile gint32 *dest, gint32 exch, gint32 comp) { g_static_assert (sizeof (atomic_int) == sizeof (*dest) && ATOMIC_INT_LOCK_FREE == 2); (void)atomic_compare_exchange_strong ((volatile atomic_int *)dest, &comp, exch); + C11_MEMORY_ORDER_SEQ_CST (); return comp; } @@ -125,14 +141,14 @@ mono_atomic_cas_i64 (volatile gint64 *dest, gint64 exch, gint64 comp) #if SIZEOF_LONG == 8 g_static_assert (sizeof (atomic_long) == sizeof (*dest) && ATOMIC_LONG_LOCK_FREE == 2); (void)atomic_compare_exchange_strong ((volatile atomic_long *)dest, (long*)&comp, exch); - return comp; #elif SIZEOF_LONG_LONG == 8 g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2); (void)atomic_compare_exchange_strong ((volatile atomic_llong *)dest, (long long*)&comp, exch); - return comp; #else #error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif + C11_MEMORY_ORDER_SEQ_CST (); + return comp; } static inline gpointer @@ -140,6 +156,7 @@ mono_atomic_cas_ptr (volatile gpointer *dest, gpointer exch, gpointer comp) { g_static_assert(ATOMIC_POINTER_LOCK_FREE == 2); (void)atomic_compare_exchange_strong ((volatile _Atomic(gpointer) *)dest, &comp, exch); + C11_MEMORY_ORDER_SEQ_CST (); return comp; } @@ -191,21 +208,27 @@ static inline guint8 mono_atomic_xchg_u8 (volatile guint8 *dest, guint8 exch) { g_static_assert (sizeof (atomic_char) == sizeof (*dest) && ATOMIC_CHAR_LOCK_FREE == 2); - return atomic_exchange ((volatile atomic_char *)dest, exch); + guint8 old = atomic_exchange ((volatile atomic_char *)dest, exch); + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline guint16 mono_atomic_xchg_u16 (volatile guint16 *dest, guint16 exch) { g_static_assert (sizeof (atomic_short) == sizeof (*dest) && ATOMIC_SHORT_LOCK_FREE == 2); - return atomic_exchange ((volatile atomic_short *)dest, exch); + guint16 old = atomic_exchange ((volatile atomic_short *)dest, exch); + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline gint32 mono_atomic_xchg_i32 (volatile gint32 *dest, gint32 exch) { g_static_assert (sizeof (atomic_int) == sizeof (*dest) && ATOMIC_INT_LOCK_FREE == 2); - return atomic_exchange ((volatile atomic_int *)dest, exch); + gint32 old = atomic_exchange ((volatile atomic_int *)dest, exch); + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline gint64 @@ -213,27 +236,33 @@ mono_atomic_xchg_i64 (volatile gint64 *dest, gint64 exch) { #if SIZEOF_LONG == 8 g_static_assert (sizeof (atomic_long) == sizeof (*dest) && ATOMIC_LONG_LOCK_FREE == 2); - return atomic_exchange ((volatile atomic_long *)dest, exch); + gint64 old = atomic_exchange ((volatile atomic_long *)dest, exch); #elif SIZEOF_LONG_LONG == 8 g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2); - return atomic_exchange ((volatile atomic_llong *)dest, exch); + gint64 old = atomic_exchange ((volatile atomic_llong *)dest, exch); #else #error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline gpointer mono_atomic_xchg_ptr (volatile gpointer *dest, gpointer exch) { g_static_assert (ATOMIC_POINTER_LOCK_FREE == 2); - return atomic_exchange ((volatile _Atomic(gpointer) *)dest, exch); + gpointer old = atomic_exchange ((volatile _Atomic(gpointer) *)dest, exch); + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline gint32 mono_atomic_fetch_add_i32 (volatile gint32 *dest, gint32 add) { g_static_assert (sizeof (atomic_int) == sizeof (*dest) && ATOMIC_INT_LOCK_FREE == 2); - return atomic_fetch_add ((volatile atomic_int *)dest, add); + gint32 old = atomic_fetch_add ((volatile atomic_int *)dest, add); + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline gint64 @@ -241,33 +270,41 @@ mono_atomic_fetch_add_i64 (volatile gint64 *dest, gint64 add) { #if SIZEOF_LONG == 8 g_static_assert (sizeof (atomic_long) == sizeof (*dest) && ATOMIC_LONG_LOCK_FREE == 2); - return atomic_fetch_add ((volatile atomic_long *)dest, add); + gint64 old = atomic_fetch_add ((volatile atomic_long *)dest, add); #elif SIZEOF_LONG_LONG == 8 g_static_assert (sizeof (atomic_llong) == sizeof (*dest) && ATOMIC_LLONG_LOCK_FREE == 2); - return atomic_fetch_add ((volatile atomic_llong *)dest, add); + gint64 old = atomic_fetch_add ((volatile atomic_llong *)dest, add); #else #error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif + C11_MEMORY_ORDER_SEQ_CST (); + return old; } static inline gint8 mono_atomic_load_i8 (volatile gint8 *src) { g_static_assert (sizeof (atomic_char) == sizeof (*src) && ATOMIC_CHAR_LOCK_FREE == 2); - return atomic_load ((volatile atomic_char *)src); + C11_MEMORY_ORDER_SEQ_CST (); + gint8 val = atomic_load ((volatile atomic_char *)src); + return val; } static inline gint16 mono_atomic_load_i16 (volatile gint16 *src) { g_static_assert (sizeof (atomic_short) == sizeof (*src) && ATOMIC_SHORT_LOCK_FREE == 2); - return atomic_load ((volatile atomic_short *)src); + C11_MEMORY_ORDER_SEQ_CST (); + gint16 val = atomic_load ((volatile atomic_short *)src); + return val; } static inline gint32 mono_atomic_load_i32 (volatile gint32 *src) { g_static_assert (sizeof (atomic_int) == sizeof (*src) && ATOMIC_INT_LOCK_FREE == 2); - return atomic_load ((volatile atomic_int *)src); + C11_MEMORY_ORDER_SEQ_CST (); + gint32 val = atomic_load ((volatile atomic_int *)src); + return val; } static inline gint64 @@ -275,20 +312,25 @@ mono_atomic_load_i64 (volatile gint64 *src) { #if SIZEOF_LONG == 8 g_static_assert (sizeof (atomic_long) == sizeof (*src) && ATOMIC_LONG_LOCK_FREE == 2); - return atomic_load ((volatile atomic_long *)src); + C11_MEMORY_ORDER_SEQ_CST (); + gint64 val = atomic_load ((volatile atomic_long *)src); #elif SIZEOF_LONG_LONG == 8 g_static_assert (sizeof (atomic_llong) == sizeof (*src) && ATOMIC_LLONG_LOCK_FREE == 2); - return atomic_load ((volatile atomic_llong *)src); + C11_MEMORY_ORDER_SEQ_CST (); + gint64 val = atomic_load ((volatile atomic_llong *)src); #else #error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif + return val; } static inline gpointer mono_atomic_load_ptr (volatile gpointer *src) { g_static_assert (ATOMIC_POINTER_LOCK_FREE == 2); - return atomic_load ((volatile _Atomic(gpointer) *)src); + C11_MEMORY_ORDER_SEQ_CST (); + gpointer val = atomic_load ((volatile _Atomic(gpointer) *)src); + return val; } static inline void @@ -296,6 +338,7 @@ mono_atomic_store_i8 (volatile gint8 *dst, gint8 val) { g_static_assert (sizeof (atomic_char) == sizeof (*dst) && ATOMIC_CHAR_LOCK_FREE == 2); atomic_store ((volatile atomic_char *)dst, val); + C11_MEMORY_ORDER_SEQ_CST (); } static inline void @@ -303,6 +346,7 @@ mono_atomic_store_i16 (volatile gint16 *dst, gint16 val) { g_static_assert (sizeof (atomic_short) == sizeof (*dst) && ATOMIC_SHORT_LOCK_FREE == 2); atomic_store ((volatile atomic_short *)dst, val); + C11_MEMORY_ORDER_SEQ_CST (); } static inline void @@ -310,6 +354,7 @@ mono_atomic_store_i32 (volatile gint32 *dst, gint32 val) { g_static_assert (sizeof (atomic_int) == sizeof (*dst) && ATOMIC_INT_LOCK_FREE == 2); atomic_store ((atomic_int *)dst, val); + C11_MEMORY_ORDER_SEQ_CST (); } static inline void @@ -324,6 +369,7 @@ mono_atomic_store_i64 (volatile gint64 *dst, gint64 val) #else #error "gint64 not same size atomic_llong or atomic_long, don't define MONO_USE_STDATOMIC" #endif + C11_MEMORY_ORDER_SEQ_CST (); } static inline void @@ -331,6 +377,7 @@ mono_atomic_store_ptr (volatile gpointer *dst, gpointer val) { g_static_assert (ATOMIC_POINTER_LOCK_FREE == 2); atomic_store ((volatile _Atomic(gpointer) *)dst, val); + C11_MEMORY_ORDER_SEQ_CST (); } #elif defined(MONO_USE_WIN32_ATOMIC) diff --git a/src/tests/FunctionalTests/iOS/Device/ParallelForEachAsync/Program.cs b/src/tests/FunctionalTests/iOS/Device/ParallelForEachAsync/Program.cs new file mode 100644 index 00000000000000..493efaae51f91a --- /dev/null +++ b/src/tests/FunctionalTests/iOS/Device/ParallelForEachAsync/Program.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Threading; +using System.Threading.Tasks; +using System.Runtime.InteropServices; +using System.Linq; + +public static class Program +{ + [DllImport("__Internal")] + public static extern void mono_ios_set_summary (string value); + + private static async Task GitHubIssue_114262_Async() + { + var options = new ParallelOptions + { + MaxDegreeOfParallelism = 5, + CancellationToken = new CancellationTokenSource().Token + }; + + var range = Enumerable.Range(1, 1000); + + for (int i = 0; i < 100; i++) + { + await Parallel.ForEachAsync(range, options, async (data, token) => + { + for (int i = 0; i < 5; i++) + { + await Task.Yield(); + var buffer = new byte[10_000]; + await Task.Run(() => {var _ = buffer[0];} ); + await Task.Yield(); + } + }); + } + } + + public static async Task Main(string[] args) + { + mono_ios_set_summary($"Starting functional test"); + + await GitHubIssue_114262_Async(); + + Console.WriteLine("Done!"); + + return 42; + } +} diff --git a/src/tests/FunctionalTests/iOS/Device/ParallelForEachAsync/iOS.Device.ParallelForEachAsync.Test.csproj b/src/tests/FunctionalTests/iOS/Device/ParallelForEachAsync/iOS.Device.ParallelForEachAsync.Test.csproj new file mode 100644 index 00000000000000..ae748949616933 --- /dev/null +++ b/src/tests/FunctionalTests/iOS/Device/ParallelForEachAsync/iOS.Device.ParallelForEachAsync.Test.csproj @@ -0,0 +1,22 @@ + + + Exe + true + $(NetCoreAppCurrent) + ios + arm64 + false + 42 + true + + + + true + false + iOS.Device.ParallelForEachAsync.Test.dll + + + + + +