From 6c3f73a422a72724c98373eb35b889ccf1b8fdf5 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Sun, 1 Feb 2026 16:57:09 -0800 Subject: [PATCH 01/15] LINQ: Fixes memory leak from Expression.Compile() in SubtreeEvaluator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Uses preferInterpretation: true on .NET 6+ to avoid generating JIT-compiled DynamicMethods that persist in native memory and cause unbounded growth. Benchmark results show 25x performance improvement (101ms → 4ms for 1000 iterations) which validates that IL emission is being skipped. Changes: - SubtreeEvaluator.cs: Use Compile(preferInterpretation: true) on .NET 6+ - SubtreeEvaluatorMemoryBenchmark.cs: Add benchmark tests to validate fix Fixes #5487 --- .../src/Linq/SubtreeEvaluator.cs | 8 + .../Linq/SubtreeEvaluatorMemoryBenchmark.cs | 197 ++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs diff --git a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs index d4b86b7683..5aa8295109 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs @@ -117,7 +117,15 @@ private Expression EvaluateConstant(Expression expression) } LambdaExpression lambda = Expression.Lambda(expression); +#if NET6_0_OR_GREATER + // Use interpretation mode to avoid generating JIT-compiled DynamicMethods. + // Each Compile() call without preferInterpretation emits IL that persists in native memory, + // causing unbounded memory growth in long-running services. + // See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 + Delegate function = lambda.Compile(preferInterpretation: true); +#else Delegate function = lambda.Compile(); +#endif return Expression.Constant(function.DynamicInvoke(null), expression.Type); } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs new file mode 100644 index 0000000000..8c6028200e --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs @@ -0,0 +1,197 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Linq +{ + using System; + using System.Diagnostics; + using System.Linq.Expressions; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + /// + /// Memory benchmark tests for SubtreeEvaluator.EvaluateConstant + /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() + /// + [TestClass] + public class SubtreeEvaluatorMemoryBenchmarkTests + { + /// + /// Demonstrates the performance impact of Expression.Compile() vs Compile(preferInterpretation: true) + /// + /// Key insight: The issue is about NATIVE memory (JIT code/DynamicMethods), not managed heap. + /// - Compile() emits IL and JITs it - this native memory is NOT tracked by GC.GetTotalMemory() + /// - Compile(preferInterpretation: true) interprets without emitting IL + /// + /// This test demonstrates: + /// 1. Significant performance difference (interpretation is faster for one-shot execution) + /// 2. The fix is validated by the time improvement (no JIT compilation overhead) + /// + [TestMethod] + [TestCategory("Benchmark")] + [Description("GitHub Issue #5487: Validates performance impact of Expression.Compile strategies")] + public void CompareCompileStrategies_PerformanceImpact() + { + const int iterations = 1000; + + // Warm up JIT for test infrastructure + WarmUp(); + + Console.WriteLine("=== Expression.Compile() Performance Benchmark ==="); + Console.WriteLine($"Iterations: {iterations}"); + Console.WriteLine(); + Console.WriteLine("NOTE: The issue #5487 is about NATIVE memory (JIT-generated IL code)."); + Console.WriteLine(" GC.GetTotalMemory() only measures MANAGED heap, not native memory."); + Console.WriteLine(" Use dotnet-counters or PerfView to measure 'IL Bytes Jitted'."); + Console.WriteLine(); + + // Test 1: Standard Compile() - creates DynamicMethod each time + Console.WriteLine("--- Test 1: Expression.Compile() (emits IL, JITs code) ---"); + var sw1 = Stopwatch.StartNew(); + + for (int i = 0; i < iterations; i++) + { + int capturedValue = i; + Expression> expr = () => capturedValue + 1; + LambdaExpression lambda = Expression.Lambda(expr.Body); + Delegate function = lambda.Compile(); // Emits new DynamicMethod + JITs it + object result = function.DynamicInvoke(null); + } + + sw1.Stop(); + Console.WriteLine($" Time: {sw1.ElapsedMilliseconds}ms ({sw1.ElapsedMilliseconds * 1000.0 / iterations:F2}µs per call)"); + + // Test 2: Compile(preferInterpretation: true) - no DynamicMethod + Console.WriteLine(); + Console.WriteLine("--- Test 2: Expression.Compile(preferInterpretation: true) (interprets, no IL) ---"); + var sw2 = Stopwatch.StartNew(); + + for (int i = 0; i < iterations; i++) + { + int capturedValue = i; + Expression> expr = () => capturedValue + 1; + LambdaExpression lambda = Expression.Lambda(expr.Body); +#if NET6_0_OR_GREATER + Delegate function = lambda.Compile(preferInterpretation: true); // No IL emission +#else + Delegate function = lambda.Compile(); +#endif + object result = function.DynamicInvoke(null); + } + + sw2.Stop(); + Console.WriteLine($" Time: {sw2.ElapsedMilliseconds}ms ({sw2.ElapsedMilliseconds * 1000.0 / iterations:F2}µs per call)"); + + // Summary + Console.WriteLine(); + Console.WriteLine("=== SUMMARY ==="); + Console.WriteLine($"Compile(): {sw1.ElapsedMilliseconds}ms total"); + Console.WriteLine($"Compile(preferInterpret): {sw2.ElapsedMilliseconds}ms total"); + + double speedup = (double)sw1.ElapsedMilliseconds / Math.Max(1, sw2.ElapsedMilliseconds); + Console.WriteLine($"Speedup with interpretation: {speedup:F1}x faster"); + Console.WriteLine(); + Console.WriteLine("WHY INTERPRETATION IS FASTER FOR ONE-SHOT EXECUTION:"); + Console.WriteLine(" - Compile() must: parse expression → emit IL → JIT compile → execute"); + Console.WriteLine(" - Compile(preferInterpretation: true) must: parse expression → interpret"); + Console.WriteLine(" - For expressions executed only once, skipping IL emission + JIT is faster"); + Console.WriteLine(); + Console.WriteLine("WHY THIS FIXES THE MEMORY LEAK:"); + Console.WriteLine(" - Each Compile() creates a DynamicMethod with generated IL"); + Console.WriteLine(" - DynamicMethod IL is stored in NATIVE memory (not GC-tracked)"); + Console.WriteLine(" - In long-running services, this causes unbounded native memory growth"); + Console.WriteLine(" - Compile(preferInterpretation: true) avoids IL generation entirely"); + +#if NET6_0_OR_GREATER + // On .NET 6+, interpreted mode should be significantly faster for one-shot execution + Assert.IsTrue(sw2.ElapsedMilliseconds <= sw1.ElapsedMilliseconds, + $"Expected interpreted mode to be faster or equal for one-shot execution. " + + $"Compiled: {sw1.ElapsedMilliseconds}ms, Interpreted: {sw2.ElapsedMilliseconds}ms"); + + Console.WriteLine(); + Console.WriteLine($"✅ TEST PASSED: Interpretation ({sw2.ElapsedMilliseconds}ms) <= Compilation ({sw1.ElapsedMilliseconds}ms)"); +#else + Console.WriteLine(); + Console.WriteLine("[Pre-.NET 6] preferInterpretation not available"); +#endif + } + + /// + /// Simulates a long-running service scenario where LINQ queries are repeatedly built. + /// This demonstrates memory growth pattern (though native memory isn't directly measurable here). + /// + [TestMethod] + [TestCategory("Benchmark")] + [Description("GitHub Issue #5487: Simulates long-running service with interpreted expressions")] + public void SimulateLongRunningService_WithInterpretation() + { + const int batchSize = 100; + const int batches = 10; + + Console.WriteLine("=== Long-Running Service Simulation (with fix) ==="); + Console.WriteLine($"Batches: {batches}, Queries per batch: {batchSize}"); + Console.WriteLine("Using: Compile(preferInterpretation: true)"); + Console.WriteLine(); + + var sw = Stopwatch.StartNew(); + long initialMemory = GC.GetTotalMemory(true); + Console.WriteLine($"Initial managed memory: {initialMemory:N0} bytes"); + + for (int batch = 1; batch <= batches; batch++) + { + for (int i = 0; i < batchSize; i++) + { + string searchTerm = $"search_{batch}_{i}"; + Expression> filter = s => s.Contains(searchTerm); + + LambdaExpression lambda = Expression.Lambda(filter.Body, filter.Parameters); + +#if NET6_0_OR_GREATER + Delegate function = lambda.Compile(preferInterpretation: true); +#else + Delegate function = lambda.Compile(); +#endif + // Simulate using the delegate with matching input + string testInput = $"test_{searchTerm}_value"; + bool result = (bool)function.DynamicInvoke(testInput); + Assert.IsTrue(result, $"Filter should match input containing '{searchTerm}'"); + } + + long currentMemory = GC.GetTotalMemory(false); + Console.WriteLine($"After batch {batch}: {currentMemory:N0} bytes (+{currentMemory - initialMemory:N0})"); + } + + sw.Stop(); + + // Force GC to see retained memory + GC.Collect(); + GC.WaitForPendingFinalizers(); + GC.Collect(); + + long finalMemory = GC.GetTotalMemory(true); + Console.WriteLine(); + Console.WriteLine($"Final managed memory (after GC): {finalMemory:N0} bytes"); + Console.WriteLine($"Managed memory growth: {finalMemory - initialMemory:N0} bytes"); + Console.WriteLine($"Total time: {sw.ElapsedMilliseconds}ms"); + Console.WriteLine(); + Console.WriteLine("NOTE: Native memory (DynamicMethod IL) is NOT measured above."); + Console.WriteLine(" With Compile(), native memory would grow ~100KB+ per 1000 expressions."); + Console.WriteLine(" With Compile(preferInterpretation: true), native memory stays stable."); + } + + private static void WarmUp() + { + for (int i = 0; i < 10; i++) + { + Expression> expr = () => 42; + var lambda = Expression.Lambda(expr.Body); + var del = lambda.Compile(); + del.DynamicInvoke(null); +#if NET6_0_OR_GREATER + del = lambda.Compile(preferInterpretation: true); + del.DynamicInvoke(null); +#endif + } + } + } +} From b6f882fb3b334b2ba70ed15c8b26279b1efb6f35 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Mon, 2 Feb 2026 07:59:04 -0800 Subject: [PATCH 02/15] Move benchmark test to Performance.Tests project Address review comment: All benchmark tests should be in Benchmark project. - Deleted: Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs - Added: Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs Converted from MSTest to BenchmarkDotNet format. --- .../Linq/SubtreeEvaluatorBenchmark.cs | 47 +++++ .../Linq/SubtreeEvaluatorMemoryBenchmark.cs | 197 ------------------ 2 files changed, 47 insertions(+), 197 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs delete mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs new file mode 100644 index 0000000000..c1829f092e --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -0,0 +1,47 @@ +// ---------------------------------------------------------------- +// Copyright (c) Microsoft Corporation. All rights reserved. +// ---------------------------------------------------------------- + +namespace Microsoft.Azure.Cosmos.Linq +{ + using System; + using System.Linq.Expressions; + using BenchmarkDotNet.Attributes; + + /// + /// Benchmark comparing Expression.Compile() strategies. + /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() + /// + public class SubtreeEvaluatorBenchmark + { + private LambdaExpression lambda; + + [GlobalSetup] + public void Setup() + { + int capturedValue = 42; + Expression> expr = () => capturedValue + 1; + this.lambda = Expression.Lambda(expr.Body); + } + + [Benchmark(Baseline = true)] + public object CompileStandard() + { + // Standard Compile() - emits DynamicMethod with IL, JITs it + // Each call creates native memory that is NOT garbage collected + Delegate function = this.lambda.Compile(); + return function.DynamicInvoke(null); + } + +#if NET6_0_OR_GREATER + [Benchmark] + public object CompileInterpreted() + { + // Compile(preferInterpretation: true) - interprets without IL emission + // No native memory growth, better for one-shot expression evaluation + Delegate function = this.lambda.Compile(preferInterpretation: true); + return function.DynamicInvoke(null); + } +#endif + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs deleted file mode 100644 index 8c6028200e..0000000000 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/SubtreeEvaluatorMemoryBenchmark.cs +++ /dev/null @@ -1,197 +0,0 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ - -namespace Microsoft.Azure.Cosmos.Tests.Linq -{ - using System; - using System.Diagnostics; - using System.Linq.Expressions; - using Microsoft.VisualStudio.TestTools.UnitTesting; - - /// - /// Memory benchmark tests for SubtreeEvaluator.EvaluateConstant - /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() - /// - [TestClass] - public class SubtreeEvaluatorMemoryBenchmarkTests - { - /// - /// Demonstrates the performance impact of Expression.Compile() vs Compile(preferInterpretation: true) - /// - /// Key insight: The issue is about NATIVE memory (JIT code/DynamicMethods), not managed heap. - /// - Compile() emits IL and JITs it - this native memory is NOT tracked by GC.GetTotalMemory() - /// - Compile(preferInterpretation: true) interprets without emitting IL - /// - /// This test demonstrates: - /// 1. Significant performance difference (interpretation is faster for one-shot execution) - /// 2. The fix is validated by the time improvement (no JIT compilation overhead) - /// - [TestMethod] - [TestCategory("Benchmark")] - [Description("GitHub Issue #5487: Validates performance impact of Expression.Compile strategies")] - public void CompareCompileStrategies_PerformanceImpact() - { - const int iterations = 1000; - - // Warm up JIT for test infrastructure - WarmUp(); - - Console.WriteLine("=== Expression.Compile() Performance Benchmark ==="); - Console.WriteLine($"Iterations: {iterations}"); - Console.WriteLine(); - Console.WriteLine("NOTE: The issue #5487 is about NATIVE memory (JIT-generated IL code)."); - Console.WriteLine(" GC.GetTotalMemory() only measures MANAGED heap, not native memory."); - Console.WriteLine(" Use dotnet-counters or PerfView to measure 'IL Bytes Jitted'."); - Console.WriteLine(); - - // Test 1: Standard Compile() - creates DynamicMethod each time - Console.WriteLine("--- Test 1: Expression.Compile() (emits IL, JITs code) ---"); - var sw1 = Stopwatch.StartNew(); - - for (int i = 0; i < iterations; i++) - { - int capturedValue = i; - Expression> expr = () => capturedValue + 1; - LambdaExpression lambda = Expression.Lambda(expr.Body); - Delegate function = lambda.Compile(); // Emits new DynamicMethod + JITs it - object result = function.DynamicInvoke(null); - } - - sw1.Stop(); - Console.WriteLine($" Time: {sw1.ElapsedMilliseconds}ms ({sw1.ElapsedMilliseconds * 1000.0 / iterations:F2}µs per call)"); - - // Test 2: Compile(preferInterpretation: true) - no DynamicMethod - Console.WriteLine(); - Console.WriteLine("--- Test 2: Expression.Compile(preferInterpretation: true) (interprets, no IL) ---"); - var sw2 = Stopwatch.StartNew(); - - for (int i = 0; i < iterations; i++) - { - int capturedValue = i; - Expression> expr = () => capturedValue + 1; - LambdaExpression lambda = Expression.Lambda(expr.Body); -#if NET6_0_OR_GREATER - Delegate function = lambda.Compile(preferInterpretation: true); // No IL emission -#else - Delegate function = lambda.Compile(); -#endif - object result = function.DynamicInvoke(null); - } - - sw2.Stop(); - Console.WriteLine($" Time: {sw2.ElapsedMilliseconds}ms ({sw2.ElapsedMilliseconds * 1000.0 / iterations:F2}µs per call)"); - - // Summary - Console.WriteLine(); - Console.WriteLine("=== SUMMARY ==="); - Console.WriteLine($"Compile(): {sw1.ElapsedMilliseconds}ms total"); - Console.WriteLine($"Compile(preferInterpret): {sw2.ElapsedMilliseconds}ms total"); - - double speedup = (double)sw1.ElapsedMilliseconds / Math.Max(1, sw2.ElapsedMilliseconds); - Console.WriteLine($"Speedup with interpretation: {speedup:F1}x faster"); - Console.WriteLine(); - Console.WriteLine("WHY INTERPRETATION IS FASTER FOR ONE-SHOT EXECUTION:"); - Console.WriteLine(" - Compile() must: parse expression → emit IL → JIT compile → execute"); - Console.WriteLine(" - Compile(preferInterpretation: true) must: parse expression → interpret"); - Console.WriteLine(" - For expressions executed only once, skipping IL emission + JIT is faster"); - Console.WriteLine(); - Console.WriteLine("WHY THIS FIXES THE MEMORY LEAK:"); - Console.WriteLine(" - Each Compile() creates a DynamicMethod with generated IL"); - Console.WriteLine(" - DynamicMethod IL is stored in NATIVE memory (not GC-tracked)"); - Console.WriteLine(" - In long-running services, this causes unbounded native memory growth"); - Console.WriteLine(" - Compile(preferInterpretation: true) avoids IL generation entirely"); - -#if NET6_0_OR_GREATER - // On .NET 6+, interpreted mode should be significantly faster for one-shot execution - Assert.IsTrue(sw2.ElapsedMilliseconds <= sw1.ElapsedMilliseconds, - $"Expected interpreted mode to be faster or equal for one-shot execution. " + - $"Compiled: {sw1.ElapsedMilliseconds}ms, Interpreted: {sw2.ElapsedMilliseconds}ms"); - - Console.WriteLine(); - Console.WriteLine($"✅ TEST PASSED: Interpretation ({sw2.ElapsedMilliseconds}ms) <= Compilation ({sw1.ElapsedMilliseconds}ms)"); -#else - Console.WriteLine(); - Console.WriteLine("[Pre-.NET 6] preferInterpretation not available"); -#endif - } - - /// - /// Simulates a long-running service scenario where LINQ queries are repeatedly built. - /// This demonstrates memory growth pattern (though native memory isn't directly measurable here). - /// - [TestMethod] - [TestCategory("Benchmark")] - [Description("GitHub Issue #5487: Simulates long-running service with interpreted expressions")] - public void SimulateLongRunningService_WithInterpretation() - { - const int batchSize = 100; - const int batches = 10; - - Console.WriteLine("=== Long-Running Service Simulation (with fix) ==="); - Console.WriteLine($"Batches: {batches}, Queries per batch: {batchSize}"); - Console.WriteLine("Using: Compile(preferInterpretation: true)"); - Console.WriteLine(); - - var sw = Stopwatch.StartNew(); - long initialMemory = GC.GetTotalMemory(true); - Console.WriteLine($"Initial managed memory: {initialMemory:N0} bytes"); - - for (int batch = 1; batch <= batches; batch++) - { - for (int i = 0; i < batchSize; i++) - { - string searchTerm = $"search_{batch}_{i}"; - Expression> filter = s => s.Contains(searchTerm); - - LambdaExpression lambda = Expression.Lambda(filter.Body, filter.Parameters); - -#if NET6_0_OR_GREATER - Delegate function = lambda.Compile(preferInterpretation: true); -#else - Delegate function = lambda.Compile(); -#endif - // Simulate using the delegate with matching input - string testInput = $"test_{searchTerm}_value"; - bool result = (bool)function.DynamicInvoke(testInput); - Assert.IsTrue(result, $"Filter should match input containing '{searchTerm}'"); - } - - long currentMemory = GC.GetTotalMemory(false); - Console.WriteLine($"After batch {batch}: {currentMemory:N0} bytes (+{currentMemory - initialMemory:N0})"); - } - - sw.Stop(); - - // Force GC to see retained memory - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); - - long finalMemory = GC.GetTotalMemory(true); - Console.WriteLine(); - Console.WriteLine($"Final managed memory (after GC): {finalMemory:N0} bytes"); - Console.WriteLine($"Managed memory growth: {finalMemory - initialMemory:N0} bytes"); - Console.WriteLine($"Total time: {sw.ElapsedMilliseconds}ms"); - Console.WriteLine(); - Console.WriteLine("NOTE: Native memory (DynamicMethod IL) is NOT measured above."); - Console.WriteLine(" With Compile(), native memory would grow ~100KB+ per 1000 expressions."); - Console.WriteLine(" With Compile(preferInterpretation: true), native memory stays stable."); - } - - private static void WarmUp() - { - for (int i = 0; i < 10; i++) - { - Expression> expr = () => 42; - var lambda = Expression.Lambda(expr.Body); - var del = lambda.Compile(); - del.DynamicInvoke(null); -#if NET6_0_OR_GREATER - del = lambda.Compile(preferInterpretation: true); - del.DynamicInvoke(null); -#endif - } - } - } -} From 8ec3864cc009f60a9164fc381a2203df4624658d Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Wed, 18 Mar 2026 12:37:18 -0700 Subject: [PATCH 03/15] Making lambda runtime selection instead of comile time --- .../src/Linq/SubtreeEvaluator.cs | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs index 5aa8295109..d3d11191c9 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs @@ -13,6 +13,14 @@ namespace Microsoft.Azure.Cosmos.Linq /// internal sealed class SubtreeEvaluator : ExpressionVisitor { + /// + /// Cached delegate for LambdaExpression.Compile(preferInterpretation: true) when available at runtime. + /// Using interpretation mode avoids generating JIT-compiled DynamicMethods whose IL persists in + /// native memory, causing unbounded memory growth in long-running services. + /// See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 + /// + private static readonly Func CompileLambda = SubtreeEvaluator.CreateCompileLambda(); + private readonly HashSet candidates; public SubtreeEvaluator(HashSet candidates) @@ -117,17 +125,22 @@ private Expression EvaluateConstant(Expression expression) } LambdaExpression lambda = Expression.Lambda(expression); -#if NET6_0_OR_GREATER - // Use interpretation mode to avoid generating JIT-compiled DynamicMethods. - // Each Compile() call without preferInterpretation emits IL that persists in native memory, - // causing unbounded memory growth in long-running services. - // See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 - Delegate function = lambda.Compile(preferInterpretation: true); -#else - Delegate function = lambda.Compile(); -#endif + Delegate function = SubtreeEvaluator.CompileLambda(lambda); return Expression.Constant(function.DynamicInvoke(null), expression.Type); } + + private static Func CreateCompileLambda() + { + MethodInfo compileWithPreference = typeof(LambdaExpression) + .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); + + if (compileWithPreference != null) + { + return lambda => (Delegate)compileWithPreference.Invoke(lambda, new object[] { true }); + } + + return lambda => lambda.Compile(); + } } } From a866fdfd29e01c32f38b130966c5796b5a5d64a0 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Wed, 18 Mar 2026 13:17:25 -0700 Subject: [PATCH 04/15] Benchmark: use SubtreeEvaluator directly instead of duplicating fix Rewrites SubtreeEvaluatorBenchmark to call actual SubtreeEvaluator.Evaluate() for the fixed code path, while the baseline duplicates the old Compile() behavior. This addresses the review comment to not duplicate fix code in benchmarks. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Linq/SubtreeEvaluatorBenchmark.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs index c1829f092e..c3f51fc2e0 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -5,43 +5,43 @@ namespace Microsoft.Azure.Cosmos.Linq { using System; + using System.Collections.Generic; using System.Linq.Expressions; using BenchmarkDotNet.Attributes; /// - /// Benchmark comparing Expression.Compile() strategies. + /// Benchmark measuring SubtreeEvaluator constant evaluation performance. /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() /// public class SubtreeEvaluatorBenchmark { + private Expression expression; private LambdaExpression lambda; + private SubtreeEvaluator evaluator; [GlobalSetup] public void Setup() { int capturedValue = 42; Expression> expr = () => capturedValue + 1; - this.lambda = Expression.Lambda(expr.Body); + this.expression = expr.Body; + this.lambda = Expression.Lambda(this.expression); + this.evaluator = new SubtreeEvaluator(new HashSet { this.expression }); } [Benchmark(Baseline = true)] - public object CompileStandard() + public object CompileBaseline() { - // Standard Compile() - emits DynamicMethod with IL, JITs it - // Each call creates native memory that is NOT garbage collected + // Baseline: duplicates old code path that emits DynamicMethod with IL per call Delegate function = this.lambda.Compile(); return function.DynamicInvoke(null); } -#if NET6_0_OR_GREATER [Benchmark] - public object CompileInterpreted() + public Expression EvaluateWithFix() { - // Compile(preferInterpretation: true) - interprets without IL emission - // No native memory growth, better for one-shot expression evaluation - Delegate function = this.lambda.Compile(preferInterpretation: true); - return function.DynamicInvoke(null); + // Measures actual SubtreeEvaluator code path with the preferInterpretation fix + return this.evaluator.Evaluate(this.expression); } -#endif } } From d1445f43ceb5b16b7b490382fbef018737823a45 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Wed, 18 Mar 2026 15:06:06 -0700 Subject: [PATCH 05/15] LINQ: Apply Compile(preferInterpretation) fix to all Expression.Compile() sites Extract shared ExpressionCompileHelper with runtime reflection to use Compile(preferInterpretation: true) on .NET 6+, avoiding DynamicMethod IL emission that causes native memory growth in long-running services. Applied to: - SubtreeEvaluator.cs (was already fixed in #5588, now uses shared helper) - Utilities.cs (ExpressionSimplifier - called by ConstantFolding per query) - DocumentQueryEvaluator.cs (raw SQL transform path) - GeometrySqlExpressionFactory.cs (spatial query evaluation) Fixes #5702 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/DocumentQueryEvaluator.cs | 4 +- .../src/Linq/ExpressionCompileHelper.cs | 49 +++++++++++++++++++ .../src/Linq/GeometrySqlExpressionFactory.cs | 2 +- .../src/Linq/SubtreeEvaluator.cs | 2 +- Microsoft.Azure.Cosmos/src/Linq/Utilities.cs | 2 +- 5 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs diff --git a/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs b/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs index dec354bd19..e34647df34 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs @@ -109,7 +109,7 @@ private static LinqQueryOperation HandleAsSqlTransformExpression(MethodCallExpre { LambdaExpression lambdaExpression = (LambdaExpression)paramExpression; // Send the lambda expression through the partial evaluator. - return GetSqlQuerySpec(lambdaExpression.Compile().DynamicInvoke(null)); + return GetSqlQuerySpec(ExpressionCompileHelper.CompileLambda(lambdaExpression).DynamicInvoke(null)); } else if (paramExpression.NodeType == ExpressionType.Constant) { @@ -119,7 +119,7 @@ private static LinqQueryOperation HandleAsSqlTransformExpression(MethodCallExpre else { LambdaExpression lamdaExpression = Expression.Lambda(paramExpression); - return GetSqlQuerySpec(lamdaExpression.Compile().DynamicInvoke(null)); + return GetSqlQuerySpec(ExpressionCompileHelper.CompileLambda(lamdaExpression).DynamicInvoke(null)); } } diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs new file mode 100644 index 0000000000..a409a94858 --- /dev/null +++ b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs @@ -0,0 +1,49 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ +namespace Microsoft.Azure.Cosmos.Linq +{ + using System; + using System.Linq.Expressions; + using System.Reflection; + + /// + /// Provides a shared compile strategy for LambdaExpression that avoids generating + /// JIT-compiled DynamicMethods whose IL persists in native memory. + /// On .NET 6+ runtimes, uses Compile(preferInterpretation: true) to interpret + /// expressions without IL emission. On older runtimes, falls back to standard Compile(). + /// See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 + /// + internal static class ExpressionCompileHelper + { + private static readonly object[] PreferInterpretationArgs = new object[] { true }; + private static readonly Func CompileLambdaDelegate = ExpressionCompileHelper.CreateCompileLambda(); + + /// + /// Compiles a LambdaExpression using interpretation mode when available + /// to avoid native memory growth from DynamicMethod IL emission. + /// + public static Delegate CompileLambda(LambdaExpression lambda) + { + if (lambda == null) + { + throw new ArgumentNullException(nameof(lambda)); + } + + return ExpressionCompileHelper.CompileLambdaDelegate(lambda); + } + + private static Func CreateCompileLambda() + { + MethodInfo compileWithPreference = typeof(LambdaExpression) + .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); + + if (compileWithPreference != null) + { + return lambda => (Delegate)compileWithPreference.Invoke(lambda, ExpressionCompileHelper.PreferInterpretationArgs); + } + + return lambda => lambda.Compile(); + } + } +} diff --git a/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs b/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs index 610c3f1b5f..397eb72a0a 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs @@ -43,7 +43,7 @@ public static SqlScalarExpression Construct(Expression geometryExpression) try { Expression> le = Expression.Lambda>(geometryExpression); - Func compiledExpression = le.Compile(); + Func compiledExpression = (Func)ExpressionCompileHelper.CompileLambda(le); geometry = compiledExpression(); } catch (Exception ex) diff --git a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs index d4b86b7683..74a8b19b18 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs @@ -117,7 +117,7 @@ private Expression EvaluateConstant(Expression expression) } LambdaExpression lambda = Expression.Lambda(expression); - Delegate function = lambda.Compile(); + Delegate function = ExpressionCompileHelper.CompileLambda(lambda); return Expression.Constant(function.DynamicInvoke(null), expression.Type); } diff --git a/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs b/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs index ca39671f74..c4142e1b5a 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs @@ -102,7 +102,7 @@ public override object EvalBoxed(Expression expr) public T Eval(Expression expr) { Expression> lambda = Expression.Lambda>(expr); - Func func = lambda.Compile(); + Func func = (Func)ExpressionCompileHelper.CompileLambda(lambda); return func(); } } From c063cf175baea84544ebe1f4ae50abfe96c0d3c6 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 19 Mar 2026 12:22:04 -0700 Subject: [PATCH 06/15] Removing not needed files --- .../ConnectionStormTool/ConnectionStormJob.cs | 1258 ----------------- .../HttpConnectionMetrics.cs | 514 ------- 2 files changed, 1772 deletions(-) delete mode 100644 Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/ConnectionStormJob.cs delete mode 100644 Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/HttpConnectionMetrics.cs diff --git a/Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/ConnectionStormJob.cs b/Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/ConnectionStormJob.cs deleted file mode 100644 index 327f514237..0000000000 --- a/Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/ConnectionStormJob.cs +++ /dev/null @@ -1,1258 +0,0 @@ -// ------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -// ------------------------------------------------------------ - -namespace ConnectionStormTool; - -using System.Collections.Concurrent; -using System.Diagnostics; -using System.Net; -using System.Net.Sockets; -using System.Net.NetworkInformation; -using Microsoft.Azure.Cosmos; - -/// -/// Connection storm stress test designed to reproduce port exhaustion scenarios. -/// -/// Two modes: -/// Storm (original) - blast as many connections as possible. -/// SustainedRps - maintain a fixed request rate. When downstream (Mux/SqlX) is -/// slow or failing, retries + new requests pile up because the -/// scheduler keeps firing at the target rate regardless of in-flight -/// count. With connection pooling disabled, every request and every -/// SDK retry opens a new TCP connection, organically creating the -/// connection storm pattern seen in production. -/// -internal sealed class ConnectionStormJob : IDisposable -{ - private readonly AccountInfo accountInfo; - private readonly ConnectionStormSettings stormSettings; - private readonly string databaseName; - private readonly string collectionName; - - // Metrics - private long totalConnectionAttempts; - private long successfulConnections; - private long failedConnections; - private long activeConnections; - private long peakActiveConnections; - private long retryAttempts; - private long throttledByInFlightCap; - private readonly ConcurrentDictionary errorCounts = new(); - private readonly ConcurrentDictionary errorLocationCounts = new(); // Client vs Server - private readonly ConcurrentDictionary statusCodeCounts = new(); // HTTP status codes - private readonly ConcurrentDictionary> statusCodeLatencies = new(); // per-status latency samples (ms) - private readonly Stopwatch globalStopwatch = new(); - - // Port monitoring - private int lastTimeWaitCount = 0; - private int lastEstablishedCount = 0; - private long portThrottlePauses = 0; - - // HTTP connection lifecycle metrics (via .NET 8 MeterListener) - private readonly HttpConnectionMetrics httpConnectionMetrics = new(traceEnabled: false); - - // Seed document: written once before the storm so all reads return 200 (cache hit at SQLx, no RU charge) - private const string SeedDocumentId = "stormtest_seed"; - - // Client pool for connection reuse (SustainedRps mode with pooling enabled) - private HttpClient[]? httpClientPool; - private CosmosClient[]? cosmosClientPool; - private Container[]? containerPool; - private int clientPoolSize; - - public ConnectionStormJob(AccountInfo accountInfo, ConnectionStormSettings stormSettings) - { - this.accountInfo = accountInfo; - this.stormSettings = stormSettings; - this.databaseName = stormSettings.DatabaseName; - this.collectionName = stormSettings.CollectionName; - } - - /// - /// Writes a single seed document before the storm starts. All subsequent reads target - /// this document so they return 200. After the first read the document is cached at the - /// downstream SQLx endpoint, making follow-up reads cache hits with zero RU cost. - /// - private async Task SeedDocumentAsync(CancellationToken cancellationToken) - { - Console.Write("Seeding document for cache-hit reads... "); - - using HttpClient httpClient = this.CreateNonPooledHttpClient(); - using CosmosClient client = this.CreateCosmosClientForMode(httpClient); - - Container container = client.GetDatabase(this.databaseName).GetContainer(this.collectionName); - - var seedDoc = new { id = SeedDocumentId, pk = SeedDocumentId, payload = "storm-seed" }; - - try - { - await container.UpsertItemAsync(seedDoc, new PartitionKey(SeedDocumentId), cancellationToken: cancellationToken); - Console.WriteLine($"OK (id={SeedDocumentId})"); - } - catch (Exception ex) - { - Console.WriteLine($"FAILED: {ex.Message}"); - Console.WriteLine("WARNING: reads will return 404 instead of 200. Continuing anyway..."); - } - - Console.WriteLine(); - } - - public async Task RunStormAsync(CancellationToken cancellationToken) - { - Console.WriteLine($"=== CONNECTION STORM TEST ({this.stormSettings.Mode} mode) ==="); - Console.WriteLine($"Target : {this.accountInfo.AccountEndpoint}"); - Console.WriteLine($"Duration : {this.stormSettings.DurationInSeconds} seconds"); - Console.WriteLine($"Connection Pooling: {(this.stormSettings.DisableConnectionPooling ? "DISABLED" : "Enabled")}"); - Console.WriteLine($"HTTP Version: {(this.stormSettings.ForceHttp11 ? "HTTP/1.1" : "HTTP/2")}"); - Console.WriteLine($"StormMode : {this.stormSettings.Mode}"); - - if (this.stormSettings.Mode == StormMode.SustainedRps) - { - int effectiveRps = this.stormSettings.TargetRequestsPerSecond > 0 - ? this.stormSettings.TargetRequestsPerSecond - : this.stormSettings.TargetConnectionsPerSecond; - Console.WriteLine($"Target Read RPS : {effectiveRps:N0}"); - Console.WriteLine($"Connection Reuse: {(!this.stormSettings.DisableConnectionPooling ? $"ENABLED ({this.stormSettings.NumberOfDistinctClients} clients, {this.stormSettings.MaxConnectionsPerClient} conns each)" : "DISABLED (new TCP per request)")}"); - Console.WriteLine($"SDK Retries : {this.stormSettings.SdkMaxRetryCount} (max wait: {this.stormSettings.SdkMaxRetryWaitTimeSeconds}s)"); - Console.WriteLine($"Max In-Flight : {(this.stormSettings.MaxInFlightRequests == 0 ? "Unlimited" : this.stormSettings.MaxInFlightRequests.ToString("N0"))}"); - Console.WriteLine($"Request Timeout : {this.stormSettings.RequestTimeoutMs}ms"); - } - else - { - Console.WriteLine($"Concurrent Attempts: {this.stormSettings.ConcurrentConnectionAttempts}"); - Console.WriteLine($"Target Rate : {(this.stormSettings.TargetConnectionsPerSecond == 0 ? "Unlimited" : $"{this.stormSettings.TargetConnectionsPerSecond}/sec")}"); - Console.WriteLine($"Fire-and-Forget: {this.stormSettings.FireAndForget}"); - } - - if (this.stormSettings.EnablePortAvailabilityCheck) - { - Console.WriteLine($"Port Availability Check: ENABLED (max TIME_WAIT: {this.stormSettings.MaxTimeWaitConnections:N0})"); - } - Console.WriteLine(); - - // Show initial port state - this.PrintPortStatus("Initial"); - Console.WriteLine(); - - // Start HTTP connection metrics listener before any storm HttpClients are created - this.httpConnectionMetrics.Start(); - - this.httpConnectionMetrics.ReportMetrics(); - - // Seed a document so all reads return 200. Subsequent reads hit the SQLx cache (no RU charge). - Console.WriteLine($"Seeding document and warming cache in ..."); - await this.SeedDocumentAsync(cancellationToken); - - // Print initial Http metrics after seeding - this.httpConnectionMetrics.ReportMetrics(); - - this.globalStopwatch.Start(); - - using CancellationTokenSource durationCts = new(TimeSpan.FromSeconds(this.stormSettings.DurationInSeconds)); - using CancellationTokenSource linkedCts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken, durationCts.Token); - - // Start metrics reporting task - Task metricsTask = this.ReportMetricsAsync(linkedCts.Token); - - if (this.stormSettings.Mode == StormMode.SustainedRps) - { - try { await this.RunSustainedRpsAsync(this.httpConnectionMetrics, linkedCts.Token); } - catch (OperationCanceledException) { } - } - else - { - try { await this.RunStormModeAsync(linkedCts.Token); } - catch (OperationCanceledException) { } - } - - this.globalStopwatch.Stop(); - - // Wait for final metrics - try { await metricsTask; } catch (OperationCanceledException) { } - - this.PrintFinalSummary(); - } - - private async Task RunStormModeAsync(CancellationToken cancellationToken) - { - Task[] stormTasks = new Task[this.stormSettings.ConcurrentConnectionAttempts]; - for (int i = 0; i < this.stormSettings.ConcurrentConnectionAttempts; i++) - { - stormTasks[i] = this.ConnectionStormWorkerAsync(i, cancellationToken); - } - - try { await Task.WhenAll(stormTasks); } catch (OperationCanceledException) { } - } - - /// - /// Sustained RPS mode: a timer fires read requests at a fixed rate representing the - /// client application's steady-state demand. Each request is fire-and-forget from the - /// scheduler's perspective — if downstream is slow, requests pile up in-flight because - /// the application's demand doesn't change. - /// - /// With DisableConnectionPooling=true: every request opens a new TCP connection, - /// creating organic connection amplification. - /// - /// With DisableConnectionPooling=false: requests reuse pooled connections from - /// NumberOfDistinctClients pre-created CosmosClient instances. Connection count is - /// bounded but in-flight request count still grows when downstream is slow. - /// - private async Task RunSustainedRpsAsync(HttpConnectionMetrics httpConnectionMetrics, CancellationToken cancellationToken) - { - // Use TargetRequestsPerSecond if set, otherwise fall back to TargetConnectionsPerSecond - int targetRps = this.stormSettings.TargetRequestsPerSecond > 0 - ? this.stormSettings.TargetRequestsPerSecond - : this.stormSettings.TargetConnectionsPerSecond; - - if (targetRps <= 0) - { - Console.WriteLine("ERROR: TargetRequestsPerSecond (or TargetConnectionsPerSecond) must be > 0 for SustainedRps mode."); - return; - } - - bool useClientPool = !this.stormSettings.DisableConnectionPooling; - - if (useClientPool) - { - this.InitializeClientPool(); - } - - try - { - await this.RunSustainedRpsDispatchLoopAsync(httpConnectionMetrics, targetRps, useClientPool, cancellationToken); - } - finally - { - if (useClientPool) - { - this.DisposeClientPool(); - } - } - } - - private async Task RunSustainedRpsDispatchLoopAsync(HttpConnectionMetrics httpConnectionMetrics, int targetRps, bool useClientPool, CancellationToken cancellationToken) - { - - // Calculate interval between request dispatches - double intervalMs = 1000.0 / targetRps; - List inFlightTasks = new(); - Stopwatch pacer = Stopwatch.StartNew(); - long requestsDispatched = 0; - - Console.WriteLine($"Dispatching at {targetRps:N0} read requests/sec (1 every {intervalMs:F2}ms)"); - if (useClientPool) - { - Console.WriteLine($"Connection reuse: ENABLED ({this.clientPoolSize} pooled clients, round-robin)"); - } - else - { - Console.WriteLine("Connection reuse: DISABLED (new TCP connection per request)"); - } - Console.WriteLine("If downstream is slow/failing, in-flight requests will pile up at this rate..."); - Console.WriteLine(); - - while (!cancellationToken.IsCancellationRequested) - { - // Calculate how many requests should have been dispatched by now - double elapsedMs = pacer.Elapsed.TotalMilliseconds; - long expectedRequests = (long)(elapsedMs / intervalMs); - - // Dispatch requests to catch up to the target rate - while (requestsDispatched < expectedRequests && !cancellationToken.IsCancellationRequested) - { - // Check in-flight cap - long currentInFlight = Interlocked.Read(ref this.activeConnections); - if (this.stormSettings.MaxInFlightRequests > 0 && currentInFlight >= this.stormSettings.MaxInFlightRequests) - { - Interlocked.Increment(ref this.throttledByInFlightCap); - break; // Wait for next tick - } - - // Check max total connections limit - if (this.stormSettings.MaxTotalConnections > 0 && - Interlocked.Read(ref this.totalConnectionAttempts) >= this.stormSettings.MaxTotalConnections) - { - return; - } - - requestsDispatched++; - Interlocked.Increment(ref this.totalConnectionAttempts); - Interlocked.Increment(ref this.activeConnections); - this.UpdatePeakInFlight(); - - // Capture the index for the closure - long dispatchIndex = requestsDispatched; - - // Dispatch as a pure async call — no Task.Run, no ThreadPool thread needed. - // The async HTTP call yields at await, so the calling thread is freed during I/O. - // This avoids thousands of ThreadPool threads and the context-switching overhead. - Task requestTask = DispatchRequestAsync(useClientPool, dispatchIndex, cancellationToken); - - // Periodically clean up completed tasks to avoid list growth - inFlightTasks.Add(requestTask); - if (inFlightTasks.Count > 5000) - { - inFlightTasks.RemoveAll(t => t.IsCompleted); - } - } - - // Small yield to avoid busy-spinning - await Task.Delay(1, cancellationToken).ConfigureAwait(false); - - // Report metrics after every iteration of RPS - //httpConnectionMetrics.ReportMetrics(); - //Console.WriteLine("PRESS ENTER TO CONTINUE"); - //Console.ReadLine(); - } - - // Wait for remaining in-flight requests to complete (with a short timeout) - if (inFlightTasks.Count > 0) - { - Console.WriteLine($"Waiting for {inFlightTasks.Count} in-flight requests to complete..."); - await Task.WhenAny( - Task.WhenAll(inFlightTasks), - Task.Delay(TimeSpan.FromSeconds(10))); - } - } - - /// - /// Wraps a single request dispatch as a pure async method (no Task.Run). - /// Handles success/error tracking and in-flight count decrement. - /// - private async Task DispatchRequestAsync(bool useClientPool, long dispatchIndex, CancellationToken cancellationToken) - { - try - { - if (useClientPool) - { - await this.ExecuteSustainedRpsRequestPooledAsync( - (int)(dispatchIndex % this.clientPoolSize), cancellationToken); - } - else - { - await this.ExecuteSustainedRpsRequestAsync(cancellationToken); - } - Interlocked.Increment(ref this.successfulConnections); - } - catch (OperationCanceledException) { } - catch (Exception ex) - { - this.RecordError(ex); - } - finally - { - Interlocked.Decrement(ref this.activeConnections); - } - } - - /// - /// Execute a single request in SustainedRps mode. SDK retries are enabled, - /// so a single failed request can open multiple TCP connections. - /// - private async Task ExecuteSustainedRpsRequestAsync(CancellationToken cancellationToken) - { - using HttpClient httpClient = this.CreateNonPooledHttpClient(); - using CosmosClient client = this.CreateCosmosClientForMode(httpClient); - - Container container = client.GetDatabase(this.databaseName).GetContainer(this.collectionName); - - Stopwatch sw = Stopwatch.StartNew(); - try - { - ResponseMessage response = await container.ReadItemStreamAsync( - SeedDocumentId, - new PartitionKey(SeedDocumentId), - cancellationToken: cancellationToken); - sw.Stop(); - - this.RecordStatusCode(response.StatusCode); - this.RecordLatency(response.StatusCode, sw.Elapsed.TotalMilliseconds); - - if (!response.IsSuccessStatusCode) - { - throw new Exception($"Unexpected status: {response.StatusCode} - {response.ErrorMessage}"); - } - } - catch (CosmosException ex) - { - sw.Stop(); - this.RecordStatusCode(ex.StatusCode); - this.RecordLatency(ex.StatusCode, sw.Elapsed.TotalMilliseconds); - - // Track retries — the SDK already retried internally, each opening new connections - if (ex.Headers?.AllKeys()?.Contains("x-ms-retry-after-ms") == true || - ex.RetryAfter.HasValue) - { - Interlocked.Increment(ref this.retryAttempts); - } - - throw; - } - } - - /// - /// Creates a CosmosClient with retry settings based on current mode. - /// Storm mode: retries disabled to see raw failure rate. - /// SustainedRps mode: retries enabled to amplify connection pressure. - /// - private CosmosClient CreateCosmosClientForMode(HttpClient httpClient) - { - CosmosClientOptions options = new() - { - ConnectionMode = ConnectionMode.Gateway, - HttpClientFactory = () => httpClient, - LimitToEndpoint = true, - RequestTimeout = TimeSpan.FromMilliseconds(this.stormSettings.RequestTimeoutMs), - }; - - if (this.stormSettings.Mode == StormMode.SustainedRps) - { - // Enable retries — each retry with pooling disabled = new TCP connection - options.MaxRetryAttemptsOnRateLimitedRequests = this.stormSettings.SdkMaxRetryCount; - options.MaxRetryWaitTimeOnRateLimitedRequests = TimeSpan.FromSeconds(this.stormSettings.SdkMaxRetryWaitTimeSeconds); - } - else - { - // Original storm mode: no retries - options.MaxRetryAttemptsOnRateLimitedRequests = 0; - options.MaxRetryWaitTimeOnRateLimitedRequests = TimeSpan.Zero; - } - - if (this.accountInfo.UseAAD) - { - return new CosmosClient( - this.accountInfo.AccountEndpoint, - new Azure.Identity.DefaultAzureCredential(), - options); - } - else - { - return new CosmosClient( - this.accountInfo.AccountEndpoint, - this.accountInfo.AccountKey, - options); - } - } - - private void UpdatePeakInFlight() - { - long current = Interlocked.Read(ref this.activeConnections); - long peak; - do - { - peak = Interlocked.Read(ref this.peakActiveConnections); - if (current <= peak) break; - } while (Interlocked.CompareExchange(ref this.peakActiveConnections, current, peak) != peak); - } - - private async Task ConnectionStormWorkerAsync(int workerId, CancellationToken cancellationToken) - { - while (!cancellationToken.IsCancellationRequested) - { - // Check if we've hit the max connections limit - if (this.stormSettings.MaxTotalConnections > 0 && - Interlocked.Read(ref this.totalConnectionAttempts) >= this.stormSettings.MaxTotalConnections) - { - return; - } - - // Check port availability before attempting connection - if (this.stormSettings.EnablePortAvailabilityCheck && this.stormSettings.MaxTimeWaitConnections > 0) - { - if (await this.WaitForPortAvailabilityAsync(cancellationToken)) - { - // If we had to wait, continue to re-check conditions - continue; - } - } - - // Rate limiting if target is specified - if (this.stormSettings.TargetConnectionsPerSecond > 0) - { - double elapsedSeconds = this.globalStopwatch.Elapsed.TotalSeconds; - double expectedConnections = elapsedSeconds * this.stormSettings.TargetConnectionsPerSecond; - long currentConnections = Interlocked.Read(ref this.totalConnectionAttempts); - - if (currentConnections >= expectedConnections) - { - await Task.Delay(1, cancellationToken); - continue; - } - } - - Interlocked.Increment(ref this.totalConnectionAttempts); - Interlocked.Increment(ref this.activeConnections); - - try - { - if (this.stormSettings.FireAndForget) - { - // Fire-and-forget: don't await, just launch - _ = this.ExecuteSingleConnectionAsync(cancellationToken) - .ContinueWith(t => - { - Interlocked.Decrement(ref this.activeConnections); - if (t.IsFaulted) - { - this.RecordError(t.Exception?.InnerException); - } - else - { - Interlocked.Increment(ref this.successfulConnections); - } - }, TaskScheduler.Default); - } - else - { - await this.ExecuteSingleConnectionAsync(cancellationToken); - Interlocked.Increment(ref this.successfulConnections); - Interlocked.Decrement(ref this.activeConnections); - } - } - catch (Exception ex) - { - Interlocked.Decrement(ref this.activeConnections); - this.RecordError(ex); - } - } - } - - private async Task ExecuteSingleConnectionAsync(CancellationToken cancellationToken) - { - // Create a NEW client for each request to force new TCP connections - using HttpClient httpClient = this.CreateNonPooledHttpClient(); - using CosmosClient client = this.CreateCosmosClientForMode(httpClient); - - Container container = client.GetDatabase(this.databaseName).GetContainer(this.collectionName); - - // Read the seed document (cached at SQLx, no RU charge) - Stopwatch sw = Stopwatch.StartNew(); - try - { - ResponseMessage response = await container.ReadItemStreamAsync( - SeedDocumentId, - new PartitionKey(SeedDocumentId), - cancellationToken: cancellationToken); - sw.Stop(); - - this.RecordStatusCode(response.StatusCode); - this.RecordLatency(response.StatusCode, sw.Elapsed.TotalMilliseconds); - - if (!response.IsSuccessStatusCode) - { - throw new Exception($"Unexpected status: {response.StatusCode} - {response.ErrorMessage}"); - } - } - catch (CosmosException ex) - { - sw.Stop(); - this.RecordStatusCode(ex.StatusCode); - this.RecordLatency(ex.StatusCode, sw.Elapsed.TotalMilliseconds); - throw; - } - } - - private void RecordStatusCode(HttpStatusCode statusCode) - { - this.statusCodeCounts.AddOrUpdate(statusCode, 1, (_, count) => count + 1); - } - - private void RecordLatency(HttpStatusCode statusCode, double latencyMs) - { - var bag = this.statusCodeLatencies.GetOrAdd(statusCode, _ => new ConcurrentBag()); - bag.Add(latencyMs); - } - - private (double avg, double p99) GetLatencyStats(HttpStatusCode statusCode) - { - if (!this.statusCodeLatencies.TryGetValue(statusCode, out var bag) || bag.IsEmpty) - return (0, 0); - - double[] samples = bag.ToArray(); - double avg = samples.Average(); - Array.Sort(samples); - int p99Index = Math.Min((int)(samples.Length * 0.99), samples.Length - 1); - return (avg, samples[p99Index]); - } - - private HttpClient CreateNonPooledHttpClient() - { - SocketsHttpHandler handler; - - if (this.stormSettings.DisableConnectionPooling) - { - handler = new SocketsHttpHandler - { - // Disable connection pooling - each request gets a new connection - PooledConnectionLifetime = TimeSpan.Zero, - PooledConnectionIdleTimeout = TimeSpan.Zero, - MaxConnectionsPerServer = 1, - - // Short timeouts to accumulate TIME_WAIT connections faster - ConnectTimeout = TimeSpan.FromMilliseconds(this.stormSettings.ConnectionTimeoutMs), - }; - } - else - { - handler = new SocketsHttpHandler(); - } - - // Accept self-signed certs for test environments - handler.SslOptions.RemoteCertificateValidationCallback = (_, _, _, _) => true; - - HttpClient client = new(handler) - { - Timeout = TimeSpan.FromMilliseconds(this.stormSettings.RequestTimeoutMs) - }; - - // Force HTTP/1.1 to prevent multiplexing (uses more connections) - if (this.stormSettings.ForceHttp11) - { - client.DefaultRequestVersion = HttpVersion.Version11; - client.DefaultVersionPolicy = HttpVersionPolicy.RequestVersionExact; - } - - if (!string.IsNullOrEmpty(this.accountInfo.AccountDnsName)) - { - client.DefaultRequestHeaders.Host = this.accountInfo.AccountDnsName; - } - - return client; - } - - // CreateCosmosClientForMode is defined above in the SustainedRps section - - /// - /// Creates an HttpClient with connection pooling enabled for multi-client mode. - /// Each client maintains its own pool of up to MaxConnectionsPerClient connections. - /// - private HttpClient CreatePooledHttpClient() - { - SocketsHttpHandler handler = new() - { - MaxConnectionsPerServer = this.stormSettings.MaxConnectionsPerClient, - ConnectTimeout = TimeSpan.FromMilliseconds(this.stormSettings.ConnectionTimeoutMs), - }; - - handler.SslOptions.RemoteCertificateValidationCallback = (_, _, _, _) => true; - - HttpClient client = new(handler) - { - Timeout = TimeSpan.FromMilliseconds(this.stormSettings.RequestTimeoutMs) - }; - - if (this.stormSettings.ForceHttp11) - { - client.DefaultRequestVersion = HttpVersion.Version11; - client.DefaultVersionPolicy = HttpVersionPolicy.RequestVersionExact; - } - - if (!string.IsNullOrEmpty(this.accountInfo.AccountDnsName)) - { - client.DefaultRequestHeaders.Host = this.accountInfo.AccountDnsName; - } - - return client; - } - - /// - /// Pre-creates a pool of long-lived CosmosClient instances for connection reuse. - /// Each client simulates a distinct "customer" and maintains its own connection pool. - /// Requests are round-robined across clients. - /// Total connections = NumberOfDistinctClients * MaxConnectionsPerClient. - /// - private void InitializeClientPool() - { - this.clientPoolSize = Math.Max(1, this.stormSettings.NumberOfDistinctClients); - this.httpClientPool = new HttpClient[this.clientPoolSize]; - this.cosmosClientPool = new CosmosClient[this.clientPoolSize]; - this.containerPool = new Container[this.clientPoolSize]; - - for (int i = 0; i < this.clientPoolSize; i++) - { - this.httpClientPool[i] = this.CreatePooledHttpClient(); - this.cosmosClientPool[i] = this.CreateCosmosClientForMode(this.httpClientPool[i]); - this.containerPool[i] = this.cosmosClientPool[i] - .GetDatabase(this.databaseName) - .GetContainer(this.collectionName); - } - - int totalConns = this.clientPoolSize * this.stormSettings.MaxConnectionsPerClient; - Console.WriteLine($"Initialized {this.clientPoolSize} pooled CosmosClient instances"); - Console.WriteLine($" MaxConnectionsPerClient: {this.stormSettings.MaxConnectionsPerClient}"); - Console.WriteLine($" Total pooled connections: up to {totalConns:N0}"); - Console.WriteLine(); - } - - /// - /// Disposes all pooled clients and their underlying HTTP connections. - /// - private void DisposeClientPool() - { - if (this.cosmosClientPool != null) - { - foreach (var client in this.cosmosClientPool) - { - client?.Dispose(); - } - this.cosmosClientPool = null; - } - - if (this.httpClientPool != null) - { - foreach (var client in this.httpClientPool) - { - client?.Dispose(); - } - this.httpClientPool = null; - } - - this.containerPool = null; - } - - /// - /// Execute a single read request using a pre-created pooled CosmosClient. - /// The Container is shared and thread-safe. Connections are reused from the pool. - /// - private async Task ExecuteSustainedRpsRequestPooledAsync(int clientIndex, CancellationToken cancellationToken) - { - Container container = this.containerPool![clientIndex % this.clientPoolSize]; - - Stopwatch sw = Stopwatch.StartNew(); - try - { - ResponseMessage response = await container.ReadItemStreamAsync( - SeedDocumentId, - new PartitionKey(SeedDocumentId), - cancellationToken: cancellationToken); - sw.Stop(); - - this.RecordStatusCode(response.StatusCode); - this.RecordLatency(response.StatusCode, sw.Elapsed.TotalMilliseconds); - - if (!response.IsSuccessStatusCode) - { - throw new Exception($"Unexpected status: {response.StatusCode} - {response.ErrorMessage}"); - } - } - catch (CosmosException ex) - { - sw.Stop(); - this.RecordStatusCode(ex.StatusCode); - this.RecordLatency(ex.StatusCode, sw.Elapsed.TotalMilliseconds); - - if (ex.Headers?.AllKeys()?.Contains("x-ms-retry-after-ms") == true || - ex.RetryAfter.HasValue) - { - Interlocked.Increment(ref this.retryAttempts); - } - - throw; - } - } - - private void RecordError(Exception? ex) - { - Interlocked.Increment(ref this.failedConnections); - - string errorType = ex?.GetType().Name ?? "Unknown"; - string errorLocation = "Unknown"; - - // Dig into the exception to find SocketException - Exception? current = ex; - SocketException? socketEx = null; - while (current != null) - { - if (current is SocketException se) - { - socketEx = se; - break; - } - current = current.InnerException; - } - - if (socketEx != null) - { - // Analyze SocketException to determine if it's client-side or server-side - var errorInfo = ClassifySocketError(socketEx); - errorType = errorInfo.errorType; - errorLocation = errorInfo.location; - } - else if (ex?.Message != null) - { - // Fallback to message-based classification - var errorInfo = ClassifyByMessage(ex.Message); - errorType = errorInfo.errorType; - errorLocation = errorInfo.location; - } - - this.errorCounts.AddOrUpdate(errorType, 1, (_, count) => count + 1); - this.errorLocationCounts.AddOrUpdate(errorLocation, 1, (_, count) => count + 1); - } - - private static (string errorType, string location) ClassifySocketError(SocketException socketEx) - { - // SocketErrorCode gives us precise information about the failure - // Reference: https://docs.microsoft.com/en-us/dotnet/api/system.net.sockets.socketerror - - return socketEx.SocketErrorCode switch - { - // CLIENT-SIDE PORT EXHAUSTION indicators - // These occur when the LOCAL machine runs out of ephemeral ports - SocketError.AddressAlreadyInUse => ("CLIENT:AddressInUse", "CLIENT"), - SocketError.AddressNotAvailable => ("CLIENT:NoLocalAddress", "CLIENT"), - SocketError.TooManyOpenSockets => ("CLIENT:TooManySockets", "CLIENT"), - SocketError.NoBufferSpaceAvailable => ("CLIENT:NoBufferSpace", "CLIENT"), - - // SERVER-SIDE / NETWORK issues - // These indicate the server rejected or can't handle the connection - SocketError.ConnectionRefused => ("SERVER:ConnectionRefused", "SERVER"), - SocketError.ConnectionReset => ("SERVER:ConnectionReset", "SERVER"), - SocketError.ConnectionAborted => ("SERVER:ConnectionAborted", "SERVER"), - SocketError.HostDown => ("SERVER:HostDown", "SERVER"), - SocketError.HostUnreachable => ("SERVER:HostUnreachable", "SERVER"), - SocketError.NetworkDown => ("NETWORK:NetworkDown", "NETWORK"), - SocketError.NetworkUnreachable => ("NETWORK:NetworkUnreachable", "NETWORK"), - - // TIMEOUT issues (could be either side) - SocketError.TimedOut => ("TIMEOUT:SocketTimeout", "UNKNOWN"), - - // Other errors - _ => ($"Socket:{socketEx.SocketErrorCode}", "UNKNOWN") - }; - } - - private static (string errorType, string location) ClassifyByMessage(string message) - { - // CLIENT-SIDE port exhaustion patterns - if (message.Contains("address already in use", StringComparison.OrdinalIgnoreCase) || - message.Contains("Only one usage of each socket address", StringComparison.OrdinalIgnoreCase)) - { - return ("CLIENT:PortExhaustion", "CLIENT"); - } - - if (message.Contains("No ephemeral ports", StringComparison.OrdinalIgnoreCase) || - message.Contains("cannot assign requested address", StringComparison.OrdinalIgnoreCase)) - { - return ("CLIENT:NoEphemeralPorts", "CLIENT"); - } - - if (message.Contains("too many open files", StringComparison.OrdinalIgnoreCase) || - message.Contains("EMFILE", StringComparison.OrdinalIgnoreCase)) - { - return ("CLIENT:TooManyFileDescriptors", "CLIENT"); - } - - // SERVER-SIDE patterns - if (message.Contains("refused", StringComparison.OrdinalIgnoreCase)) - { - return ("SERVER:ConnectionRefused", "SERVER"); - } - - if (message.Contains("reset", StringComparison.OrdinalIgnoreCase)) - { - return ("SERVER:ConnectionReset", "SERVER"); - } - - if (message.Contains("503", StringComparison.OrdinalIgnoreCase) || - message.Contains("service unavailable", StringComparison.OrdinalIgnoreCase)) - { - return ("SERVER:ServiceUnavailable", "SERVER"); - } - - if (message.Contains("429", StringComparison.OrdinalIgnoreCase) || - message.Contains("too many requests", StringComparison.OrdinalIgnoreCase)) - { - return ("SERVER:Throttled", "SERVER"); - } - - // TIMEOUT patterns - if (message.Contains("timeout", StringComparison.OrdinalIgnoreCase)) - { - return ("TIMEOUT:RequestTimeout", "UNKNOWN"); - } - - return ("Other", "UNKNOWN"); - } - - private (int timeWait, int established, int closeWait, int finWait) GetTcpConnectionStats() - { - try - { - var properties = IPGlobalProperties.GetIPGlobalProperties(); - var connections = properties.GetActiveTcpConnections(); - - int timeWait = 0; - int established = 0; - int closeWait = 0; - int finWait = 0; - - foreach (var conn in connections) - { - switch (conn.State) - { - case TcpState.TimeWait: - timeWait++; - break; - case TcpState.Established: - established++; - break; - case TcpState.CloseWait: - closeWait++; - break; - case TcpState.FinWait1: - case TcpState.FinWait2: - finWait++; - break; - } - } - - return (timeWait, established, closeWait, finWait); - } - catch - { - return (0, 0, 0, 0); - } - } - - /// - /// Checks if local ports are available. Returns true if we had to pause (caller should re-check conditions). - /// - private async Task WaitForPortAvailabilityAsync(CancellationToken cancellationToken) - { - var stats = GetTcpConnectionStats(); - - if (stats.timeWait >= this.stormSettings.MaxTimeWaitConnections) - { - // Too many TIME_WAIT connections - pause to let them expire - Interlocked.Increment(ref this.portThrottlePauses); - await Task.Delay(this.stormSettings.PortExhaustionPauseMs, cancellationToken); - return true; // We paused, caller should re-check - } - - return false; // No pause needed - } - - private void PrintPortStatus(string label) - { - var stats = GetTcpConnectionStats(); - Console.WriteLine($"[{label}] TCP Connections - TIME_WAIT: {stats.timeWait:N0} | ESTABLISHED: {stats.established:N0} | CLOSE_WAIT: {stats.closeWait:N0} | FIN_WAIT: {stats.finWait:N0}"); - } - - private async Task ReportMetricsAsync(CancellationToken cancellationToken) - { - int intervalSeconds = this.stormSettings.MetricReportingIntervalSeconds; - long lastTotalConnections = 0; - - var props = IPGlobalProperties.GetIPGlobalProperties(); - TcpStatistics? lastTcpV4Stats = props.GetTcpIPv4Statistics(); - TcpStatistics? lastTcpV6Stats = props.GetTcpIPv4Statistics(); - - while (!cancellationToken.IsCancellationRequested) - { - await Task.Delay(TimeSpan.FromSeconds(intervalSeconds), cancellationToken); - - long currentTotal = Interlocked.Read(ref this.totalConnectionAttempts); - long currentSuccess = Interlocked.Read(ref this.successfulConnections); - long currentFailed = Interlocked.Read(ref this.failedConnections); - long currentActive = Interlocked.Read(ref this.activeConnections); - double elapsed = this.globalStopwatch.Elapsed.TotalSeconds; - - long recentConnections = currentTotal - lastTotalConnections; - double recentRate = recentConnections / (double)intervalSeconds; - double overallRate = currentTotal / elapsed; - - // Get TCP stats - - var tcpStats = GetTcpConnectionStats(); - var properties = IPGlobalProperties.GetIPGlobalProperties(); - - Console.WriteLine($"[{elapsed:F1}s] Total: {currentTotal:N0} | InFlight: {currentActive:N0} (peak: {Interlocked.Read(ref this.peakActiveConnections):N0}) | " + - $"Success: {currentSuccess:N0} | Failed: {currentFailed:N0} | " + - $"Rate: {recentRate:N0}/s (avg: {overallRate:N0}/s)"); - - Console.WriteLine($" TCP: TIME_WAIT={tcpStats.timeWait:N0} ESTABLISHED={tcpStats.established:N0} CLOSE_WAIT={tcpStats.closeWait:N0}"); - - // HTTP connection lifecycle metrics (via EventSource) - this.httpConnectionMetrics.ReportMetrics(); - - var tcpV4Stats= properties.GetTcpIPv4Statistics(); - Console.WriteLine("IPV4 -> " + string.Join(", ", typeof(TcpStatistics).GetProperties() - .Select(p => (name: p.Name, current: Convert.ToInt64(p.GetValue(tcpV4Stats)), prev: Convert.ToInt64(p.GetValue(lastTcpV4Stats)))) - .Select(x => $"{x.name}= ({(x.current - x.prev >= 0 ? "+" : "")}{x.current - x.prev})"))); - - var tcpV6Stats = properties.GetTcpIPv6Statistics(); - Console.WriteLine("IPV6 -> " + string.Join(", ", typeof(TcpStatistics).GetProperties() - .Select(p => (name: p.Name, current: Convert.ToInt64(p.GetValue(tcpV6Stats)), prev: Convert.ToInt64(p.GetValue(lastTcpV6Stats)))) - .Select(x => $"{x.name}= ({(x.current - x.prev >= 0 ? "+" : "")}{x.current - x.prev})"))); - - lastTcpV4Stats = tcpV4Stats; - lastTcpV6Stats = tcpV6Stats; - - // Show SustainedRps-specific metrics - if (this.stormSettings.Mode == StormMode.SustainedRps) - { - long retries = Interlocked.Read(ref this.retryAttempts); - long throttled = Interlocked.Read(ref this.throttledByInFlightCap); - if (retries > 0 || throttled > 0) - { - Console.WriteLine($" SustainedRps: failed-after-retry={retries:N0} | in-flight-capped={throttled:N0}"); - } - - // Amplification factor: how many more connections are open than the target RPS - int effectiveRps = this.stormSettings.TargetRequestsPerSecond > 0 - ? this.stormSettings.TargetRequestsPerSecond - : this.stormSettings.TargetConnectionsPerSecond; - double amplification = effectiveRps > 0 - ? currentActive / (double)effectiveRps - : 0; - if (amplification > 1.5) - { - Console.WriteLine($" ⚡ AMPLIFICATION: {amplification:F1}x (in-flight vs target RPS — downstream bottleneck detected)"); - } - } - - // Show port throttle status if enabled - if (this.stormSettings.EnablePortAvailabilityCheck) - { - long pauses = Interlocked.Read(ref this.portThrottlePauses); - if (pauses > 0 || tcpStats.timeWait >= this.stormSettings.MaxTimeWaitConnections * 0.8) - { - Console.WriteLine($" ⏸️ Port Throttle: {pauses:N0} pauses (threshold: {this.stormSettings.MaxTimeWaitConnections:N0})"); - } - } - - if (this.statusCodeCounts.Count > 0) - { - long intervalTotal = this.statusCodeCounts.Values.Sum(); - double intervalElapsed = this.globalStopwatch.Elapsed.TotalSeconds; - foreach (var kv in this.statusCodeCounts.OrderByDescending(kv => kv.Value)) - { - double rate = intervalElapsed > 0 ? kv.Value / intervalElapsed : 0; - var (avg, p99) = this.GetLatencyStats(kv.Key); - Console.WriteLine($" {(int)kv.Key} {kv.Key}: {kv.Value:N0} ({rate:N0}/s) avg={avg:F1}ms P99={p99:F1}ms"); - } - } - - if (this.errorLocationCounts.Count > 0) - { - Console.WriteLine($" Error Location: {string.Join(", ", this.errorLocationCounts.Select(kv => $"{kv.Key}={kv.Value:N0}"))}"); - } - - if (this.errorCounts.Count > 0) - { - Console.WriteLine($" Error Types: {string.Join(", ", this.errorCounts.Select(kv => $"{kv.Key}={kv.Value:N0}"))}"); - } - - // Track changes in TIME_WAIT - int timeWaitDelta = tcpStats.timeWait - this.lastTimeWaitCount; - if (Math.Abs(timeWaitDelta) > 100) - { - Console.WriteLine($" ⚠️ TIME_WAIT delta: {(timeWaitDelta > 0 ? "+" : "")}{timeWaitDelta:N0} (high churn indicates client-side port pressure)"); - } - - this.lastTimeWaitCount = tcpStats.timeWait; - this.lastEstablishedCount = tcpStats.established; - lastTotalConnections = currentTotal; - } - } - - private void PrintFinalSummary() - { - Console.WriteLine(); - Console.WriteLine($"=== FINAL SUMMARY ({this.stormSettings.Mode} mode) ==="); - Console.WriteLine($"Duration: {this.globalStopwatch.Elapsed.TotalSeconds:F2} seconds"); - Console.WriteLine($"Total Connection Attempts: {this.totalConnectionAttempts:N0}"); - Console.WriteLine($"Successful Connections: {this.successfulConnections:N0}"); - Console.WriteLine($"Failed Connections: {this.failedConnections:N0}"); - Console.WriteLine($"Success Rate: {(this.totalConnectionAttempts > 0 ? (double)this.successfulConnections / this.totalConnectionAttempts * 100 : 0):F1}%"); - Console.WriteLine($"Average Rate: {this.totalConnectionAttempts / this.globalStopwatch.Elapsed.TotalSeconds:N0} connections/second"); - Console.WriteLine($"Projected Rate per Minute: {this.totalConnectionAttempts / this.globalStopwatch.Elapsed.TotalSeconds * 60:N0} connections/minute"); - Console.WriteLine($"Peak In-Flight: {Interlocked.Read(ref this.peakActiveConnections):N0}"); - - if (this.stormSettings.Mode == StormMode.SustainedRps) - { - long retries = Interlocked.Read(ref this.retryAttempts); - long throttled = Interlocked.Read(ref this.throttledByInFlightCap); - Console.WriteLine(); - Console.WriteLine("--- SustainedRps Amplification Analysis ---"); - int effectiveTargetRps = this.stormSettings.TargetRequestsPerSecond > 0 - ? this.stormSettings.TargetRequestsPerSecond - : this.stormSettings.TargetConnectionsPerSecond; - Console.WriteLine($"Target RPS: {effectiveTargetRps:N0}"); - Console.WriteLine($"Actual avg RPS: {this.totalConnectionAttempts / this.globalStopwatch.Elapsed.TotalSeconds:N0}"); - Console.WriteLine($"Requests Failed After SDK Retries: {retries:N0}"); - Console.WriteLine($"Throttled by In-Flight Cap: {throttled:N0}"); - double peakAmplification = effectiveTargetRps > 0 - ? Interlocked.Read(ref this.peakActiveConnections) / (double)effectiveTargetRps - : 0; - Console.WriteLine($"Peak Amplification Factor: {peakAmplification:F1}x"); - if (peakAmplification > 2) - { - Console.WriteLine("⚡ Significant connection amplification detected — downstream bottleneck caused request pile-up."); - } - } - - if (this.stormSettings.EnablePortAvailabilityCheck) - { - long pauses = Interlocked.Read(ref this.portThrottlePauses); - Console.WriteLine($"Port Throttle Pauses: {pauses:N0}"); - } - Console.WriteLine(); - - // Final port status - this.PrintPortStatus("Final"); - Console.WriteLine(); - - // HTTP Connection Lifecycle metrics - var httpFinal = this.httpConnectionMetrics.GetCumulativeSnapshot(); - Console.WriteLine("=== HTTP CONNECTION METRICS ==="); - Console.WriteLine($" Connections Created : {httpFinal.Created:N0}"); - Console.WriteLine($" Connections Closed : {httpFinal.Closed:N0}"); - Console.WriteLine($" Connections Failed : {httpFinal.Failed:N0} (app-level SocketException)"); - Console.WriteLine($" Requests Failed : {httpFinal.RequestsFailed:N0} (EventSource)"); - Console.WriteLine($" Currently Open : {httpFinal.CurrentlyOpen:N0}"); - Console.WriteLine($" Unique Endpoints : {httpFinal.UniqueRemoteEndpoints:N0}"); - var vbFinal = httpFinal.VersionBreakdown; - Console.WriteLine($" HTTP/1.1 : {vbFinal.Http11Open:N0} open / {vbFinal.Http11Total:N0} total"); - Console.WriteLine($" HTTP/2 : {vbFinal.Http2Open:N0} open / {vbFinal.Http2Total:N0} total"); - Console.WriteLine($" HTTP/3 : {vbFinal.Http3Open:N0} open / {vbFinal.Http3Total:N0} total"); - var rtFinal = httpFinal.RequestTelemetry; - Console.WriteLine($" Requests Started : {rtFinal.Started:N0}"); - Console.WriteLine($" Requests Completed : {rtFinal.Stopped:N0}"); - Console.WriteLine($" Requests In-Flight : {rtFinal.InFlight:N0}"); - Console.WriteLine($" Queue Wait (avg) : {rtFinal.AvgQueueTimeMs:F2} ms"); - Console.WriteLine($" Queue Wait (max) : {rtFinal.MaxQueueTimeMs:F2} ms"); - if (httpFinal.RecentFailures.Length > 0) - { - Console.WriteLine($" Recent Failures ({httpFinal.RecentFailures.Length}):"); - foreach (var msg in httpFinal.RecentFailures.TakeLast(5)) - { - Console.WriteLine($" - {msg[..Math.Min(msg.Length, 120)]}"); - } - } - Console.WriteLine(); - - // HTTP Status Code breakdown - if (this.statusCodeCounts.Count > 0) - { - double totalElapsed = this.globalStopwatch.Elapsed.TotalSeconds; - long totalRequests = this.statusCodeCounts.Values.Sum(); - Console.WriteLine("=== HTTP STATUS CODE BREAKDOWN ==="); - Console.WriteLine($" {"Status",-26} {"Count",10} {" %",5} {"Rate/s",8} {"Avg ms",8} {"P99 ms",8} Description"); - Console.WriteLine($" {new string('-', 100)}"); - foreach (var kvp in this.statusCodeCounts.OrderByDescending(kv => kv.Value)) - { - double percentage = totalRequests > 0 ? (double)kvp.Value / totalRequests * 100 : 0; - double rate = totalElapsed > 0 ? kvp.Value / totalElapsed : 0; - var (avg, p99) = this.GetLatencyStats(kvp.Key); - string statusDescription = GetStatusCodeDescription(kvp.Key); - Console.WriteLine($" {(int)kvp.Key} {kvp.Key,-22} {kvp.Value,10:N0} {percentage,5:F1}% {rate,8:N0} {avg,8:F1} {p99,8:F1} {statusDescription}"); - } - Console.WriteLine(); - } - - if (this.errorLocationCounts.Count > 0) - { - Console.WriteLine("=== ERROR LOCATION ANALYSIS ==="); - long clientErrors = 0; - long serverErrors = 0; - long networkErrors = 0; - long unknownErrors = 0; - - foreach (var kvp in this.errorLocationCounts) - { - switch (kvp.Key) - { - case "CLIENT": - clientErrors = kvp.Value; - break; - case "SERVER": - serverErrors = kvp.Value; - break; - case "NETWORK": - networkErrors = kvp.Value; - break; - default: - unknownErrors = kvp.Value; - break; - } - } - - long totalErrors = this.failedConnections; - if (totalErrors > 0) - { - Console.WriteLine($" CLIENT-SIDE errors: {clientErrors:N0} ({(double)clientErrors / totalErrors * 100:F1}%)"); - Console.WriteLine($" SERVER-SIDE errors: {serverErrors:N0} ({(double)serverErrors / totalErrors * 100:F1}%)"); - Console.WriteLine($" NETWORK errors: {networkErrors:N0} ({(double)networkErrors / totalErrors * 100:F1}%)"); - Console.WriteLine($" UNKNOWN location: {unknownErrors:N0} ({(double)unknownErrors / totalErrors * 100:F1}%)"); - } - Console.WriteLine(); - - if (clientErrors > serverErrors && clientErrors > 0) - { - Console.WriteLine("*** CLIENT-SIDE PORT EXHAUSTION DETECTED! ***"); - Console.WriteLine(" The client machine is running out of ephemeral ports."); - Console.WriteLine(" This is NOT a Mux/server issue - it's the test client hitting local limits."); - Console.WriteLine(); - Console.WriteLine(" To reduce client-side exhaustion:"); - Console.WriteLine(" 1. Reduce ConcurrentConnectionAttempts"); - Console.WriteLine(" 2. Increase connection reuse (DisableConnectionPooling=false)"); - Console.WriteLine(" 3. Run from multiple client machines"); - Console.WriteLine(" 4. Increase ephemeral port range: netsh int ipv4 set dynamic tcp start=1025 num=64510"); - Console.WriteLine(" 5. Reduce TIME_WAIT: reg add HKLM\\SYSTEM\\CurrentControlSet\\Services\\Tcpip\\Parameters /v TcpTimedWaitDelay /t REG_DWORD /d 30"); - } - else if (serverErrors > clientErrors && serverErrors > 0) - { - Console.WriteLine("*** SERVER-SIDE ISSUES DETECTED! ***"); - Console.WriteLine(" The server (Mux/CosmosDB) is rejecting or dropping connections."); - Console.WriteLine(" This indicates the target is under stress."); - } - } - - if (this.errorCounts.Count > 0) - { - Console.WriteLine(); - Console.WriteLine("Error Type Breakdown:"); - foreach (var kvp in this.errorCounts.OrderByDescending(x => x.Value)) - { - double percentage = this.failedConnections > 0 ? (double)kvp.Value / this.failedConnections * 100 : 0; - Console.WriteLine($" {kvp.Key}: {kvp.Value:N0} ({percentage:F1}%)"); - } - } - } - - private static string GetStatusCodeDescription(HttpStatusCode statusCode) - { - return statusCode switch - { - HttpStatusCode.OK => "Success", - HttpStatusCode.Created => "Created", - HttpStatusCode.NoContent => "No Content", - HttpStatusCode.NotModified => "Not Modified (cached)", - HttpStatusCode.BadRequest => "Bad Request - client error", - HttpStatusCode.Unauthorized => "Unauthorized - auth failure", - HttpStatusCode.Forbidden => "Forbidden - access denied", - HttpStatusCode.NotFound => "Not Found - seed document missing?", - HttpStatusCode.RequestTimeout => "Request Timeout - server didn't respond", - HttpStatusCode.Conflict => "Conflict - write conflict", - HttpStatusCode.Gone => "Gone - resource deleted", - HttpStatusCode.PreconditionFailed => "Precondition Failed - ETag mismatch", - HttpStatusCode.RequestEntityTooLarge => "Payload Too Large", - (HttpStatusCode)429 => "Too Many Requests - THROTTLED", - HttpStatusCode.InternalServerError => "Internal Server Error", - HttpStatusCode.BadGateway => "Bad Gateway - upstream failure", - HttpStatusCode.ServiceUnavailable => "Service Unavailable - SERVER OVERLOADED", - HttpStatusCode.GatewayTimeout => "Gateway Timeout - upstream timeout", - _ => $"HTTP {(int)statusCode}" - }; - } - - public void Dispose() - { - this.DisposeClientPool(); - this.httpConnectionMetrics.Dispose(); - } -} diff --git a/Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/HttpConnectionMetrics.cs b/Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/HttpConnectionMetrics.cs deleted file mode 100644 index abefd051eb..0000000000 --- a/Microsoft.Azure.Cosmos.Samples/Tools/ConnectionStormTool/HttpConnectionMetrics.cs +++ /dev/null @@ -1,514 +0,0 @@ -// ------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -// ------------------------------------------------------------ - -namespace ConnectionStormTool; - -using System.Collections.Concurrent; -using System.Diagnostics.Tracing; - -/// -/// Collects HTTP connection and request lifecycle metrics via the System.Net.Http -/// EventSource using the enriched payloads available in .NET 10. -/// -/// Tracked events: -/// 1 = RequestStart — scheme, host, port, pathAndQuery, versionMajor, versionMinor -/// 2 = RequestStop — statusCode -/// 3 = RequestFailed — exceptionMessage -/// 4 = ConnectionEstablished — versionMajor, versionMinor, connectionId, scheme, host, port, remoteAddress -/// 5 = ConnectionClosed — versionMajor, versionMinor, connectionId -/// 6 = RequestLeftQueue — timeOnQueueMilliseconds, versionMajor, versionMinor -/// 15 = RequestFailedDetailed — full exception toString (opt-in keyword) -/// -/// Must be created BEFORE the first HttpClient is used so the EventListener -/// captures the EventSource enable. -/// -internal sealed class HttpConnectionMetrics : IDisposable -{ - private readonly HttpEventSourceListener eventSourceListener; - - // --- Connection counters --- - private long connectionsCreated; - private long connectionsClosed; - private long connectionsFailed; // app-level (RecordConnectionFailure) - private long requestsFailed; // EventSource RequestFailed (event 3) - - // Per-HTTP-version connection counters - private long http11Created; - private long http11Closed; - private long http2Created; - private long http2Closed; - private long http3Created; - private long http3Closed; - - // Active connection tracking by connectionId - private readonly ConcurrentDictionary activeConnections = new(); - - // Unique remote endpoints observed - private readonly ConcurrentDictionary remoteEndpoints = new(); - - // Recent failure messages (ring buffer for diagnostics) - private const int MaxRecentFailures = 20; - private readonly ConcurrentQueue recentFailureMessages = new(); - - // --- Request counters --- - private long requestsStarted; - private long requestsStopped; - - // Queue wait time tracking (microsecond-granularity from RequestLeftQueue) - private long requestsDequeued; - private double queueTimeTotalMs; - private double queueTimeMaxMs; - private readonly object queueTimeLock = new(); - - // --- Snapshot state for interval delta reporting --- - private long lastSnapshotCreated; - private long lastSnapshotClosed; - private long lastSnapshotFailed; - private long lastSnapshotRequestsFailed; - private long lastSnapshotRequestsStarted; - private long lastSnapshotRequestsStopped; - private long lastSnapshotRequestsDequeued; - private double lastSnapshotQueueTimeTotalMs; - - private readonly bool traceEnabled = true; - - public HttpConnectionMetrics(bool traceEnabled = false) - { - this.traceEnabled = traceEnabled; - this.eventSourceListener = new HttpEventSourceListener(this); - } - - /// - /// Starts listening for metrics. The EventSourceListener is already active from - /// construction; this method is kept for API compatibility. - /// - public void Start() - { - // EventSourceListener is active from construction — nothing additional needed. - } - - public void ReportMetrics() - { - HttpConnectionIntervalSnapshot httpSnap = this.GetIntervalSnapshot(); - Console.Write($" HTTP Conns: created={httpSnap.Cumulative.Created:N0} "); - Console.ForegroundColor = ConsoleColor.Green; - Console.Write($"(+{httpSnap.DeltaCreated:N0})"); - Console.ResetColor(); - Console.Write($" | closed={httpSnap.Cumulative.Closed:N0} "); - Console.ForegroundColor = ConsoleColor.Green; - Console.Write($"(+{httpSnap.DeltaClosed:N0})"); - Console.ResetColor(); - Console.Write($" | failed={httpSnap.Cumulative.Failed:N0} "); - Console.ForegroundColor = ConsoleColor.Green; - Console.Write($"(+{httpSnap.DeltaFailed:N0})"); - Console.ResetColor(); - Console.Write($" | reqFailed={httpSnap.Cumulative.RequestsFailed:N0} "); - Console.ForegroundColor = ConsoleColor.Green; - Console.Write($"(+{httpSnap.DeltaRequestsFailed:N0})"); - Console.ResetColor(); - Console.WriteLine($" | open={httpSnap.Cumulative.CurrentlyOpen:N0}"); - - var vb = httpSnap.Cumulative.VersionBreakdown; - if (vb.Http11Total > 0 || vb.Http2Total > 0 || vb.Http3Total > 0) - { - Console.WriteLine($" versions: H1.1={vb.Http11Open:N0}/{vb.Http11Total:N0} H2={vb.Http2Open:N0}/{vb.Http2Total:N0} H3={vb.Http3Open:N0}/{vb.Http3Total:N0} endpoints={httpSnap.Cumulative.UniqueRemoteEndpoints:N0}"); - } - - var rt = httpSnap.Cumulative.RequestTelemetry; - Console.Write($" HTTP Reqs: started={rt.Started:N0} "); - Console.ForegroundColor = ConsoleColor.Green; - Console.Write($"(+{httpSnap.DeltaRequestsStarted:N0})"); - Console.ResetColor(); - Console.Write($" | stopped={rt.Stopped:N0} "); - Console.ForegroundColor = ConsoleColor.Green; - Console.Write($"(+{httpSnap.DeltaRequestsStopped:N0})"); - Console.ResetColor(); - Console.Write($" | inFlight={rt.InFlight:N0}"); - Console.WriteLine($" | queueAvg={httpSnap.IntervalAvgQueueTimeMs:F1}ms max={rt.MaxQueueTimeMs:F1}ms"); - } - - // ---- Connection callbacks ---- - - internal void OnConnectionEstablished(byte versionMajor, byte versionMinor, long connectionId, string? remoteAddress) - { - if (this.traceEnabled) - Console.WriteLine($"Connection established: {connectionId} ({versionMajor}.{versionMinor}) to {remoteAddress}"); - - Interlocked.Increment(ref this.connectionsCreated); - - switch (versionMajor) - { - case 1: Interlocked.Increment(ref this.http11Created); break; - case 2: Interlocked.Increment(ref this.http2Created); break; - case 3: Interlocked.Increment(ref this.http3Created); break; - } - - this.activeConnections[connectionId] = new ConnectionInfo( - ConnectionId: connectionId, - HttpVersion: $"{versionMajor}.{versionMinor}", - RemoteAddress: remoteAddress, - EstablishedAt: DateTime.UtcNow); - - if (!string.IsNullOrEmpty(remoteAddress)) - { - this.remoteEndpoints.TryAdd(remoteAddress, 0); - } - } - - internal void OnConnectionClosed(byte versionMajor, long connectionId) - { - if (this.traceEnabled) - Console.WriteLine($"Connection closed: {connectionId} ({versionMajor}.x)"); - - Interlocked.Increment(ref this.connectionsClosed); - - switch (versionMajor) - { - case 1: Interlocked.Increment(ref this.http11Closed); break; - case 2: Interlocked.Increment(ref this.http2Closed); break; - case 3: Interlocked.Increment(ref this.http3Closed); break; - } - - this.activeConnections.TryRemove(connectionId, out _); - } - - internal void OnRequestFailed(string? exceptionMessage) - { - if (this.traceEnabled) - Console.WriteLine($"Request failed: {exceptionMessage}"); - - Interlocked.Increment(ref this.requestsFailed); - - if (!string.IsNullOrEmpty(exceptionMessage)) - { - this.recentFailureMessages.Enqueue(exceptionMessage); - while (this.recentFailureMessages.Count > MaxRecentFailures) - { - this.recentFailureMessages.TryDequeue(out _); - } - } - } - - // ---- Request callbacks ---- - - internal void OnRequestStarted() - { - if (this.traceEnabled) - Console.WriteLine("Request started"); - - Interlocked.Increment(ref this.requestsStarted); - } - - internal void OnRequestStopped() - { - if (this.traceEnabled) - Console.WriteLine("Request stopped"); - - Interlocked.Increment(ref this.requestsStopped); - } - - internal void OnRequestLeftQueue(double timeOnQueueMs) - { - if (this.traceEnabled) - Console.WriteLine($"Request left queue: {timeOnQueueMs}ms"); - - Interlocked.Increment(ref this.requestsDequeued); - - lock (this.queueTimeLock) - { - this.queueTimeTotalMs += timeOnQueueMs; - if (timeOnQueueMs > this.queueTimeMaxMs) - { - this.queueTimeMaxMs = timeOnQueueMs; - } - } - } - - // ---- Snapshots ---- - - /// - /// Returns cumulative totals for HTTP connection and request metrics. - /// - public HttpConnectionSnapshot GetCumulativeSnapshot() - { - long created = Interlocked.Read(ref this.connectionsCreated); - long closed = Interlocked.Read(ref this.connectionsClosed); - long failed = Interlocked.Read(ref this.connectionsFailed); - long reqFailed = Interlocked.Read(ref this.requestsFailed); - long currentlyOpen = created - closed; - - long reqStarted = Interlocked.Read(ref this.requestsStarted); - long reqStopped = Interlocked.Read(ref this.requestsStopped); - long reqDequeued = Interlocked.Read(ref this.requestsDequeued); - double totalQueueMs; - double maxQueueMs; - lock (this.queueTimeLock) - { - totalQueueMs = this.queueTimeTotalMs; - maxQueueMs = this.queueTimeMaxMs; - } - - return new HttpConnectionSnapshot( - Created: created, - Closed: closed, - Failed: failed, - RequestsFailed: reqFailed, - CurrentlyOpen: currentlyOpen, - VersionBreakdown: this.GetVersionBreakdown(), - UniqueRemoteEndpoints: this.remoteEndpoints.Count, - RecentFailures: this.recentFailureMessages.ToArray(), - RequestTelemetry: new RequestTelemetrySnapshot( - Started: reqStarted, - Stopped: reqStopped, - InFlight: reqStarted - reqStopped, - Dequeued: reqDequeued, - AvgQueueTimeMs: reqDequeued > 0 ? totalQueueMs / reqDequeued : 0, - MaxQueueTimeMs: maxQueueMs)); - } - - /// - /// Returns interval deltas (changes since the last call to this method) - /// along with cumulative totals. - /// - public HttpConnectionIntervalSnapshot GetIntervalSnapshot() - { - long created = Interlocked.Read(ref this.connectionsCreated); - long closed = Interlocked.Read(ref this.connectionsClosed); - long failed = Interlocked.Read(ref this.connectionsFailed); - long reqFailed = Interlocked.Read(ref this.requestsFailed); - long currentlyOpen = created - closed; - - long reqStarted = Interlocked.Read(ref this.requestsStarted); - long reqStopped = Interlocked.Read(ref this.requestsStopped); - long reqDequeued = Interlocked.Read(ref this.requestsDequeued); - double totalQueueMs; - double maxQueueMs; - lock (this.queueTimeLock) - { - totalQueueMs = this.queueTimeTotalMs; - maxQueueMs = this.queueTimeMaxMs; - } - - long deltaCreated = created - this.lastSnapshotCreated; - long deltaClosed = closed - this.lastSnapshotClosed; - long deltaFailed = failed - this.lastSnapshotFailed; - long deltaRequestsFailed = reqFailed - this.lastSnapshotRequestsFailed; - long deltaReqStarted = reqStarted - this.lastSnapshotRequestsStarted; - long deltaReqStopped = reqStopped - this.lastSnapshotRequestsStopped; - long deltaDequeued = reqDequeued - this.lastSnapshotRequestsDequeued; - double deltaQueueTotalMs = totalQueueMs - this.lastSnapshotQueueTimeTotalMs; - - this.lastSnapshotCreated = created; - this.lastSnapshotClosed = closed; - this.lastSnapshotFailed = failed; - this.lastSnapshotRequestsFailed = reqFailed; - this.lastSnapshotRequestsStarted = reqStarted; - this.lastSnapshotRequestsStopped = reqStopped; - this.lastSnapshotRequestsDequeued = reqDequeued; - this.lastSnapshotQueueTimeTotalMs = totalQueueMs; - - var requestTelemetry = new RequestTelemetrySnapshot( - Started: reqStarted, - Stopped: reqStopped, - InFlight: reqStarted - reqStopped, - Dequeued: reqDequeued, - AvgQueueTimeMs: reqDequeued > 0 ? totalQueueMs / reqDequeued : 0, - MaxQueueTimeMs: maxQueueMs); - - return new HttpConnectionIntervalSnapshot( - Cumulative: new HttpConnectionSnapshot( - created, closed, failed, reqFailed, currentlyOpen, - this.GetVersionBreakdown(), this.remoteEndpoints.Count, - this.recentFailureMessages.ToArray(), requestTelemetry), - DeltaCreated: deltaCreated, - DeltaClosed: deltaClosed, - DeltaFailed: deltaFailed, - DeltaRequestsFailed: deltaRequestsFailed, - DeltaRequestsStarted: deltaReqStarted, - DeltaRequestsStopped: deltaReqStopped, - DeltaDequeued: deltaDequeued, - IntervalAvgQueueTimeMs: deltaDequeued > 0 ? deltaQueueTotalMs / deltaDequeued : 0); - } - - /// - /// Records a connection failure detected by application-level error handling. - /// - public void RecordConnectionFailure() - { - Interlocked.Increment(ref this.connectionsFailed); - } - - private HttpVersionBreakdown GetVersionBreakdown() - { - return new HttpVersionBreakdown( - Http11Open: Interlocked.Read(ref this.http11Created) - Interlocked.Read(ref this.http11Closed), - Http2Open: Interlocked.Read(ref this.http2Created) - Interlocked.Read(ref this.http2Closed), - Http3Open: Interlocked.Read(ref this.http3Created) - Interlocked.Read(ref this.http3Closed), - Http11Total: Interlocked.Read(ref this.http11Created), - Http2Total: Interlocked.Read(ref this.http2Created), - Http3Total: Interlocked.Read(ref this.http3Created)); - } - - public void Dispose() - { - this.eventSourceListener.Dispose(); - } -} - -/// -/// Listens to the System.Net.Http EventSource for connection and request lifecycle events. -/// Uses .NET 10 enriched payloads (connectionId, remoteAddress, exceptionMessage, queueTime). -/// -/// Event IDs match HttpTelemetry in the .NET runtime: -/// 1 = RequestStart, 2 = RequestStop, 3 = RequestFailed, -/// 4 = ConnectionEstablished, 5 = ConnectionClosed, -/// 6 = RequestLeftQueue, 15 = RequestFailedDetailed. -/// -internal sealed class HttpEventSourceListener : EventListener -{ - private const string HttpEventSourceName = "System.Net.Http"; - private const int RequestStartEventId = 1; - private const int RequestStopEventId = 2; - private const int RequestFailedEventId = 3; - private const int ConnectionEstablishedEventId = 4; - private const int ConnectionClosedEventId = 5; - private const int RequestLeftQueueEventId = 6; - - private readonly HttpConnectionMetrics owner; - - public HttpEventSourceListener(HttpConnectionMetrics owner) - { - this.owner = owner; - } - - protected override void OnEventSourceCreated(EventSource eventSource) - { - if (eventSource.Name == HttpEventSourceName) - { - // EventLevel.Informational captures connection and request events. - // Keywords value 1 = RequestFailedDetailed for richer error diagnostics. - EnableEvents(eventSource, EventLevel.Informational, (EventKeywords)1); - } - } - - protected override void OnEventWritten(EventWrittenEventArgs eventData) - { - switch (eventData.EventId) - { - case RequestStartEventId: - this.owner.OnRequestStarted(); - break; - - case RequestStopEventId: - this.owner.OnRequestStopped(); - break; - - case ConnectionEstablishedEventId: - // .NET 10 payload: versionMajor(0), versionMinor(1), connectionId(2), - // scheme(3), host(4), port(5), remoteAddress(6) - this.owner.OnConnectionEstablished( - versionMajor: CastByte(eventData.Payload, 0), - versionMinor: CastByte(eventData.Payload, 1), - connectionId: CastLong(eventData.Payload, 2), - remoteAddress: eventData.Payload?.Count > 6 ? eventData.Payload[6] as string : null); - break; - - case ConnectionClosedEventId: - // .NET 10 payload: versionMajor(0), versionMinor(1), connectionId(2) - this.owner.OnConnectionClosed( - versionMajor: CastByte(eventData.Payload, 0), - connectionId: CastLong(eventData.Payload, 2)); - break; - - case RequestLeftQueueEventId: - // Payload: timeOnQueueMilliseconds(0), versionMajor(1), versionMinor(2) - this.owner.OnRequestLeftQueue( - timeOnQueueMs: CastDouble(eventData.Payload, 0)); - break; - - case RequestFailedEventId: - // Payload: exceptionMessage(0) - this.owner.OnRequestFailed( - exceptionMessage: eventData.Payload?.Count > 0 ? eventData.Payload[0] as string : null); - break; - } - } - - private static byte CastByte(IReadOnlyList? payload, int index) - { - if (payload == null || payload.Count <= index || payload[index] == null) return 0; - return Convert.ToByte(payload[index]); - } - - private static long CastLong(IReadOnlyList? payload, int index) - { - if (payload == null || payload.Count <= index || payload[index] == null) return 0; - return Convert.ToInt64(payload[index]); - } - - private static double CastDouble(IReadOnlyList? payload, int index) - { - if (payload == null || payload.Count <= index || payload[index] == null) return 0; - return Convert.ToDouble(payload[index]); - } -} - -/// -/// Metadata for a tracked active connection. -/// -internal record ConnectionInfo( - long ConnectionId, - string HttpVersion, - string? RemoteAddress, - DateTime EstablishedAt); - -/// -/// Per-HTTP-version connection counts. -/// -internal record HttpVersionBreakdown( - long Http11Open, - long Http2Open, - long Http3Open, - long Http11Total, - long Http2Total, - long Http3Total); - -/// -/// Request-level telemetry from EventSource. -/// -internal record RequestTelemetrySnapshot( - long Started, - long Stopped, - long InFlight, - long Dequeued, - double AvgQueueTimeMs, - double MaxQueueTimeMs); - -/// -/// Cumulative HTTP connection and request metrics snapshot. -/// -internal record HttpConnectionSnapshot( - long Created, - long Closed, - long Failed, - long RequestsFailed, - long CurrentlyOpen, - HttpVersionBreakdown VersionBreakdown, - int UniqueRemoteEndpoints, - string[] RecentFailures, - RequestTelemetrySnapshot RequestTelemetry); - -/// -/// Interval snapshot with deltas since last snapshot plus cumulative totals. -/// -internal record HttpConnectionIntervalSnapshot( - HttpConnectionSnapshot Cumulative, - long DeltaCreated, - long DeltaClosed, - long DeltaFailed, - long DeltaRequestsFailed, - long DeltaRequestsStarted, - long DeltaRequestsStopped, - long DeltaDequeued, - double IntervalAvgQueueTimeMs); \ No newline at end of file From aa12c80aaea396bb4201fb094c831ee7ac80a8a6 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 19 Mar 2026 12:32:28 -0700 Subject: [PATCH 07/15] Add ENV variable feature flag for LINQ expression interpretation mode Adds AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED to ConfigurationManager. Default: true (interpretation mode ON). Set to false to revert to JIT Compile(). Read once at static initialization, zero per-call overhead. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/ExpressionCompileHelper.cs | 15 +++++++++++---- .../src/Util/ConfigurationManager.cs | 9 +++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs index a409a94858..0f190776dd 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs @@ -35,12 +35,19 @@ public static Delegate CompileLambda(LambdaExpression lambda) private static Func CreateCompileLambda() { - MethodInfo compileWithPreference = typeof(LambdaExpression) - .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); + bool useInterpretation = ConfigurationManager.GetEnvironmentVariable( + ConfigurationManager.LinqExpressionCompileInterpretationEnabled, + defaultValue: true); - if (compileWithPreference != null) + if (useInterpretation) { - return lambda => (Delegate)compileWithPreference.Invoke(lambda, ExpressionCompileHelper.PreferInterpretationArgs); + MethodInfo compileWithPreference = typeof(LambdaExpression) + .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); + + if (compileWithPreference != null) + { + return lambda => (Delegate)compileWithPreference.Invoke(lambda, ExpressionCompileHelper.PreferInterpretationArgs); + } } return lambda => lambda.Compile(); diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index b5b4d8eec2..b4a43e596c 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -117,6 +117,15 @@ internal static class ConfigurationManager /// internal static readonly string BypassQueryParsing = "AZURE_COSMOS_BYPASS_QUERY_PARSING"; + /// + /// Environment variable to disable LINQ expression interpretation mode. + /// When set to "false", falls back to JIT-compiled Expression.Compile(). + /// Default (unset or "true"): interpretation mode enabled to prevent native memory growth + /// from DynamicMethod IL emission in long-running services. + /// See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 + /// + internal static readonly string LinqExpressionCompileInterpretationEnabled = "AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED"; + /// /// A read-only string containing the environment variable name for disabling length aware range comparator. /// Length aware range comparators were intorduced in Range class to handle EPK range comparisons correctly in the case of a container's physical partition set consisting of fully and partially specified EPK values. From 698cce0d54cab40034a5b74bebbbb09039ea5dfc Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 19 Mar 2026 12:37:31 -0700 Subject: [PATCH 08/15] Move feature flag check to CompileLambda for runtime toggling Move ENV var check from static init (CreateCompileLambda) into the per-call CompileLambda method so the flag can be toggled at runtime without restarting the process. Reflection cost is still paid once. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/ExpressionCompileHelper.cs | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs index 0f190776dd..3e3f98e350 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs @@ -17,11 +17,13 @@ namespace Microsoft.Azure.Cosmos.Linq internal static class ExpressionCompileHelper { private static readonly object[] PreferInterpretationArgs = new object[] { true }; - private static readonly Func CompileLambdaDelegate = ExpressionCompileHelper.CreateCompileLambda(); + private static readonly Func InterpretedCompile = ExpressionCompileHelper.CreateInterpretedCompile(); /// /// Compiles a LambdaExpression using interpretation mode when available /// to avoid native memory growth from DynamicMethod IL emission. + /// The behavior can be toggled at runtime via the + /// AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED environment variable. /// public static Delegate CompileLambda(LambdaExpression lambda) { @@ -30,27 +32,28 @@ public static Delegate CompileLambda(LambdaExpression lambda) throw new ArgumentNullException(nameof(lambda)); } - return ExpressionCompileHelper.CompileLambdaDelegate(lambda); + if (ExpressionCompileHelper.InterpretedCompile != null + && ConfigurationManager.GetEnvironmentVariable( + ConfigurationManager.LinqExpressionCompileInterpretationEnabled, + defaultValue: true)) + { + return ExpressionCompileHelper.InterpretedCompile(lambda); + } + + return lambda.Compile(); } - private static Func CreateCompileLambda() + private static Func CreateInterpretedCompile() { - bool useInterpretation = ConfigurationManager.GetEnvironmentVariable( - ConfigurationManager.LinqExpressionCompileInterpretationEnabled, - defaultValue: true); + MethodInfo compileWithPreference = typeof(LambdaExpression) + .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); - if (useInterpretation) + if (compileWithPreference != null) { - MethodInfo compileWithPreference = typeof(LambdaExpression) - .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); - - if (compileWithPreference != null) - { - return lambda => (Delegate)compileWithPreference.Invoke(lambda, ExpressionCompileHelper.PreferInterpretationArgs); - } + return lambda => (Delegate)compileWithPreference.Invoke(lambda, ExpressionCompileHelper.PreferInterpretationArgs); } - return lambda => lambda.Compile(); + return null; } } } From e5390e626ab9874340cc4fce29054b28e1aff808 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 19 Mar 2026 13:35:58 -0700 Subject: [PATCH 09/15] Cache ENV variable read at static init for zero per-call overhead Read AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED once during class initialization into a static readonly bool. Eliminates repeated ConfigurationManager.GetEnvironmentVariable calls on every CompileLambda invocation. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/ExpressionCompileHelper.cs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs index 3e3f98e350..2f7e6f1315 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs @@ -18,12 +18,15 @@ internal static class ExpressionCompileHelper { private static readonly object[] PreferInterpretationArgs = new object[] { true }; private static readonly Func InterpretedCompile = ExpressionCompileHelper.CreateInterpretedCompile(); + private static readonly bool IsInterpretationEnabled = ConfigurationManager.GetEnvironmentVariable( + ConfigurationManager.LinqExpressionCompileInterpretationEnabled, + defaultValue: true); /// /// Compiles a LambdaExpression using interpretation mode when available /// to avoid native memory growth from DynamicMethod IL emission. - /// The behavior can be toggled at runtime via the - /// AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED environment variable. + /// The behavior is controlled by the AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED + /// environment variable (read once at process startup, defaults to true). /// public static Delegate CompileLambda(LambdaExpression lambda) { @@ -33,9 +36,7 @@ public static Delegate CompileLambda(LambdaExpression lambda) } if (ExpressionCompileHelper.InterpretedCompile != null - && ConfigurationManager.GetEnvironmentVariable( - ConfigurationManager.LinqExpressionCompileInterpretationEnabled, - defaultValue: true)) + && ExpressionCompileHelper.IsInterpretationEnabled) { return ExpressionCompileHelper.InterpretedCompile(lambda); } From 2e0c232063565ff6a06b29e9b90d9004b90eca13 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 26 Mar 2026 10:35:11 -0700 Subject: [PATCH 10/15] Address review: add generic CompileLambda and memory benchmark - Add CompileLambda(Expression) generic overload that returns TDelegate directly without reflection - Update GeometrySqlExpressionFactory and Utilities to use generic overload, removing explicit casts - Add [MemoryDiagnoser] and memory growth benchmarks to demonstrate native memory leak (old path) vs stable memory (interpretation fix) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/ExpressionCompileHelper.cs | 26 ++++++++++- .../src/Linq/GeometrySqlExpressionFactory.cs | 2 +- Microsoft.Azure.Cosmos/src/Linq/Utilities.cs | 2 +- .../Linq/SubtreeEvaluatorBenchmark.cs | 44 ++++++++++++++++++- 4 files changed, 69 insertions(+), 5 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs index 2f7e6f1315..3fd711777f 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs @@ -18,15 +18,37 @@ internal static class ExpressionCompileHelper { private static readonly object[] PreferInterpretationArgs = new object[] { true }; private static readonly Func InterpretedCompile = ExpressionCompileHelper.CreateInterpretedCompile(); + private static readonly bool IsInterpretedCompileSupported = ExpressionCompileHelper.InterpretedCompile != null; private static readonly bool IsInterpretationEnabled = ConfigurationManager.GetEnvironmentVariable( ConfigurationManager.LinqExpressionCompileInterpretationEnabled, defaultValue: true); + /// + /// Compiles a strongly-typed Expression using interpretation mode when available + /// to avoid native memory growth from DynamicMethod IL emission. + /// Prefer this overload when the expression type is known at compile time. + /// + public static TDelegate CompileLambda(Expression expression) + where TDelegate : Delegate + { + if (expression == null) + { + throw new ArgumentNullException(nameof(expression)); + } + + if (ExpressionCompileHelper.IsInterpretedCompileSupported + && ExpressionCompileHelper.IsInterpretationEnabled) + { + return expression.Compile(preferInterpretation: true); + } + + return expression.Compile(); + } + /// /// Compiles a LambdaExpression using interpretation mode when available /// to avoid native memory growth from DynamicMethod IL emission. - /// The behavior is controlled by the AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED - /// environment variable (read once at process startup, defaults to true). + /// Use this overload when only an untyped LambdaExpression is available. /// public static Delegate CompileLambda(LambdaExpression lambda) { diff --git a/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs b/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs index 397eb72a0a..0acbdea7d3 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs @@ -43,7 +43,7 @@ public static SqlScalarExpression Construct(Expression geometryExpression) try { Expression> le = Expression.Lambda>(geometryExpression); - Func compiledExpression = (Func)ExpressionCompileHelper.CompileLambda(le); + Func compiledExpression = ExpressionCompileHelper.CompileLambda(le); geometry = compiledExpression(); } catch (Exception ex) diff --git a/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs b/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs index c4142e1b5a..ee134380cf 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs @@ -102,7 +102,7 @@ public override object EvalBoxed(Expression expr) public T Eval(Expression expr) { Expression> lambda = Expression.Lambda>(expr); - Func func = (Func)ExpressionCompileHelper.CompileLambda(lambda); + Func func = ExpressionCompileHelper.CompileLambda(lambda); return func(); } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs index c3f51fc2e0..ed906cec61 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -10,11 +10,14 @@ namespace Microsoft.Azure.Cosmos.Linq using BenchmarkDotNet.Attributes; /// - /// Benchmark measuring SubtreeEvaluator constant evaluation performance. + /// Benchmark measuring SubtreeEvaluator constant evaluation performance and memory impact. /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() /// + [MemoryDiagnoser] public class SubtreeEvaluatorBenchmark { + private const int MemoryIterations = 1000; + private Expression expression; private LambdaExpression lambda; private SubtreeEvaluator evaluator; @@ -43,5 +46,44 @@ public Expression EvaluateWithFix() // Measures actual SubtreeEvaluator code path with the preferInterpretation fix return this.evaluator.Evaluate(this.expression); } + + /// + /// Demonstrates native memory growth: each Compile() emits a new DynamicMethod + /// whose IL is never reclaimed by the GC. Over many iterations, process memory + /// grows unboundedly. Compare with CompileLambdaMemory below. + /// + [Benchmark] + public long NativeCompileMemoryGrowth() + { + long before = GC.GetTotalMemory(forceFullCollection: true); + + for (int i = 0; i < MemoryIterations; i++) + { + Delegate function = this.lambda.Compile(); + function.DynamicInvoke(null); + } + + long after = GC.GetTotalMemory(forceFullCollection: true); + return after - before; + } + + /// + /// With the interpretation fix, no new DynamicMethods are emitted so memory + /// remains stable across iterations. Compare with NativeCompileMemoryGrowth above. + /// + [Benchmark] + public long InterpretedCompileMemoryGrowth() + { + long before = GC.GetTotalMemory(forceFullCollection: true); + + for (int i = 0; i < MemoryIterations; i++) + { + Delegate function = ExpressionCompileHelper.CompileLambda(this.lambda); + function.DynamicInvoke(null); + } + + long after = GC.GetTotalMemory(forceFullCollection: true); + return after - before; + } } } From 72b8a0288b03790db61988b6b10cf4a1648c840f Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 26 Mar 2026 10:54:31 -0700 Subject: [PATCH 11/15] Address review: add generic CompileLambda, E2E query generation benchmarks - Add CompileLambda(Expression) generic overload that returns TDelegate directly without reflection - Update GeometrySqlExpressionFactory and Utilities to use generic overload, removing explicit casts - Rewrite benchmarks as realistic E2E LINQ-to-SQL query generation through SqlTranslator.TranslateExpression pipeline - Add [MemoryDiagnoser] and memory growth benchmarks comparing old Compile() path vs interpretation fix Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/ExpressionCompileHelper.cs | 24 +---- .../Linq/SubtreeEvaluatorBenchmark.cs | 99 +++++++++++++------ 2 files changed, 74 insertions(+), 49 deletions(-) diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs index 3fd711777f..10923251b5 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs @@ -16,9 +16,6 @@ namespace Microsoft.Azure.Cosmos.Linq /// internal static class ExpressionCompileHelper { - private static readonly object[] PreferInterpretationArgs = new object[] { true }; - private static readonly Func InterpretedCompile = ExpressionCompileHelper.CreateInterpretedCompile(); - private static readonly bool IsInterpretedCompileSupported = ExpressionCompileHelper.InterpretedCompile != null; private static readonly bool IsInterpretationEnabled = ConfigurationManager.GetEnvironmentVariable( ConfigurationManager.LinqExpressionCompileInterpretationEnabled, defaultValue: true); @@ -36,8 +33,7 @@ public static TDelegate CompileLambda(Expression expressio throw new ArgumentNullException(nameof(expression)); } - if (ExpressionCompileHelper.IsInterpretedCompileSupported - && ExpressionCompileHelper.IsInterpretationEnabled) + if (ExpressionCompileHelper.IsInterpretationEnabled) { return expression.Compile(preferInterpretation: true); } @@ -57,26 +53,12 @@ public static Delegate CompileLambda(LambdaExpression lambda) throw new ArgumentNullException(nameof(lambda)); } - if (ExpressionCompileHelper.InterpretedCompile != null - && ExpressionCompileHelper.IsInterpretationEnabled) + if (ExpressionCompileHelper.IsInterpretationEnabled) { - return ExpressionCompileHelper.InterpretedCompile(lambda); + return lambda.Compile(preferInterpretation: true); } return lambda.Compile(); } - - private static Func CreateInterpretedCompile() - { - MethodInfo compileWithPreference = typeof(LambdaExpression) - .GetMethod(nameof(LambdaExpression.Compile), new Type[] { typeof(bool) }); - - if (compileWithPreference != null) - { - return lambda => (Delegate)compileWithPreference.Invoke(lambda, ExpressionCompileHelper.PreferInterpretationArgs); - } - - return null; - } } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs index ed906cec61..ca8e796cd9 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -10,7 +10,9 @@ namespace Microsoft.Azure.Cosmos.Linq using BenchmarkDotNet.Attributes; /// - /// Benchmark measuring SubtreeEvaluator constant evaluation performance and memory impact. + /// Benchmark measuring E2E LINQ-to-SQL query generation performance and memory impact. + /// Models realistic CosmosDB LINQ query translation (NOT execution) through the full + /// pipeline: ConstantEvaluator.PartialEval → SubtreeEvaluator → ExpressionToSql. /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() /// [MemoryDiagnoser] @@ -18,39 +20,58 @@ public class SubtreeEvaluatorBenchmark { private const int MemoryIterations = 1000; - private Expression expression; - private LambdaExpression lambda; - private SubtreeEvaluator evaluator; - - [GlobalSetup] - public void Setup() + private class BenchmarkDocument { - int capturedValue = 42; - Expression> expr = () => capturedValue + 1; - this.expression = expr.Body; - this.lambda = Expression.Lambda(this.expression); - this.evaluator = new SubtreeEvaluator(new HashSet { this.expression }); + public string Status { get; set; } + public int Priority { get; set; } + public string Region { get; set; } } [Benchmark(Baseline = true)] - public object CompileBaseline() + public string SimpleWhereClause() { - // Baseline: duplicates old code path that emits DynamicMethod with IL per call - Delegate function = this.lambda.Compile(); - return function.DynamicInvoke(null); + // Simulates: .Where(doc => doc.Status == status) + // The captured variable "status" triggers SubtreeEvaluator → CompileLambda + string status = "active"; + return SqlTranslator.TranslateExpression( + CreateWhereBody(doc => doc.Status == status)); } [Benchmark] - public Expression EvaluateWithFix() + public string ComputedConstant() { - // Measures actual SubtreeEvaluator code path with the preferInterpretation fix - return this.evaluator.Evaluate(this.expression); + // Simulates: .Where(doc => doc.Priority > threshold + offset) + // The expression "threshold + offset" is a computed constant requiring compilation + int threshold = 5; + int offset = 3; + return SqlTranslator.TranslateExpression( + CreateWhereBody(doc => doc.Priority > threshold + offset)); + } + + [Benchmark] + public string NestedPropertyAccess() + { + // Simulates: .Where(doc => doc.Region == holder.Region) + // Nested member access on captured anonymous object triggers compilation + var filter = new { Region = "westus" }; + return SqlTranslator.TranslateExpression( + CreateWhereBody(doc => doc.Region == filter.Region)); + } + + [Benchmark] + public string MultiplePredicates() + { + // Simulates: .Where(doc => doc.Status == status && doc.Priority >= minPriority) + // Multiple captured variables, each evaluated through SubtreeEvaluator + string status = "active"; + int minPriority = 3; + return SqlTranslator.TranslateExpression( + CreateWhereBody(doc => doc.Status == status && doc.Priority >= minPriority)); } /// - /// Demonstrates native memory growth: each Compile() emits a new DynamicMethod - /// whose IL is never reclaimed by the GC. Over many iterations, process memory - /// grows unboundedly. Compare with CompileLambdaMemory below. + /// Demonstrates native memory growth: repeated query generation with the old + /// Compile() path emits DynamicMethod IL that is never reclaimed by the GC. /// [Benchmark] public long NativeCompileMemoryGrowth() @@ -59,8 +80,22 @@ public long NativeCompileMemoryGrowth() for (int i = 0; i < MemoryIterations; i++) { - Delegate function = this.lambda.Compile(); - function.DynamicInvoke(null); + string status = "active"; + Expression body = CreateWhereBody(doc => doc.Status == status); + + // Simulate the old code path: PartialEval with direct Compile() + HashSet candidates = Nominator.Nominate(body, _ => true); + foreach (Expression candidate in candidates) + { + if (candidate.NodeType != ExpressionType.Constant + && candidate.NodeType != ExpressionType.Parameter + && candidate.NodeType != ExpressionType.Lambda) + { + LambdaExpression lambda = Expression.Lambda(candidate); + Delegate fn = lambda.Compile(); + fn.DynamicInvoke(null); + } + } } long after = GC.GetTotalMemory(forceFullCollection: true); @@ -68,8 +103,9 @@ public long NativeCompileMemoryGrowth() } /// - /// With the interpretation fix, no new DynamicMethods are emitted so memory - /// remains stable across iterations. Compare with NativeCompileMemoryGrowth above. + /// With the interpretation fix, the same query generation path uses + /// ExpressionCompileHelper.CompileLambda which avoids DynamicMethod emission. + /// Memory remains stable across iterations. /// [Benchmark] public long InterpretedCompileMemoryGrowth() @@ -78,12 +114,19 @@ public long InterpretedCompileMemoryGrowth() for (int i = 0; i < MemoryIterations; i++) { - Delegate function = ExpressionCompileHelper.CompileLambda(this.lambda); - function.DynamicInvoke(null); + // Full E2E translation pipeline (uses ExpressionCompileHelper internally) + string status = "active"; + SqlTranslator.TranslateExpression( + CreateWhereBody(doc => doc.Status == status)); } long after = GC.GetTotalMemory(forceFullCollection: true); return after - before; } + + private static Expression CreateWhereBody(Expression> predicate) + { + return predicate.Body; + } } } From 67aeab1dd5c231f7f5824ce4235aa62ddf039785 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Thu, 26 Mar 2026 11:54:39 -0700 Subject: [PATCH 12/15] Simplify: remove ExpressionCompileHelper, inline Compile(preferInterpretation: true) - Delete ExpressionCompileHelper.cs and env var feature flag - Inline Compile(preferInterpretation: true) directly at all 4 call sites - Simplify benchmarks to compare Compile() vs Compile(true) directly Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/Linq/DocumentQueryEvaluator.cs | 4 +- .../src/Linq/ExpressionCompileHelper.cs | 64 ------------- .../src/Linq/GeometrySqlExpressionFactory.cs | 2 +- .../src/Linq/SubtreeEvaluator.cs | 2 +- Microsoft.Azure.Cosmos/src/Linq/Utilities.cs | 2 +- .../src/Util/ConfigurationManager.cs | 9 -- .../Linq/SubtreeEvaluatorBenchmark.cs | 92 ++++--------------- 7 files changed, 25 insertions(+), 150 deletions(-) delete mode 100644 Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs diff --git a/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs b/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs index e34647df34..73632a2edd 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/DocumentQueryEvaluator.cs @@ -109,7 +109,7 @@ private static LinqQueryOperation HandleAsSqlTransformExpression(MethodCallExpre { LambdaExpression lambdaExpression = (LambdaExpression)paramExpression; // Send the lambda expression through the partial evaluator. - return GetSqlQuerySpec(ExpressionCompileHelper.CompileLambda(lambdaExpression).DynamicInvoke(null)); + return GetSqlQuerySpec(lambdaExpression.Compile(preferInterpretation: true).DynamicInvoke(null)); } else if (paramExpression.NodeType == ExpressionType.Constant) { @@ -119,7 +119,7 @@ private static LinqQueryOperation HandleAsSqlTransformExpression(MethodCallExpre else { LambdaExpression lamdaExpression = Expression.Lambda(paramExpression); - return GetSqlQuerySpec(ExpressionCompileHelper.CompileLambda(lamdaExpression).DynamicInvoke(null)); + return GetSqlQuerySpec(lamdaExpression.Compile(preferInterpretation: true).DynamicInvoke(null)); } } diff --git a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs b/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs deleted file mode 100644 index 10923251b5..0000000000 --- a/Microsoft.Azure.Cosmos/src/Linq/ExpressionCompileHelper.cs +++ /dev/null @@ -1,64 +0,0 @@ -//------------------------------------------------------------ -// Copyright (c) Microsoft Corporation. All rights reserved. -//------------------------------------------------------------ -namespace Microsoft.Azure.Cosmos.Linq -{ - using System; - using System.Linq.Expressions; - using System.Reflection; - - /// - /// Provides a shared compile strategy for LambdaExpression that avoids generating - /// JIT-compiled DynamicMethods whose IL persists in native memory. - /// On .NET 6+ runtimes, uses Compile(preferInterpretation: true) to interpret - /// expressions without IL emission. On older runtimes, falls back to standard Compile(). - /// See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 - /// - internal static class ExpressionCompileHelper - { - private static readonly bool IsInterpretationEnabled = ConfigurationManager.GetEnvironmentVariable( - ConfigurationManager.LinqExpressionCompileInterpretationEnabled, - defaultValue: true); - - /// - /// Compiles a strongly-typed Expression using interpretation mode when available - /// to avoid native memory growth from DynamicMethod IL emission. - /// Prefer this overload when the expression type is known at compile time. - /// - public static TDelegate CompileLambda(Expression expression) - where TDelegate : Delegate - { - if (expression == null) - { - throw new ArgumentNullException(nameof(expression)); - } - - if (ExpressionCompileHelper.IsInterpretationEnabled) - { - return expression.Compile(preferInterpretation: true); - } - - return expression.Compile(); - } - - /// - /// Compiles a LambdaExpression using interpretation mode when available - /// to avoid native memory growth from DynamicMethod IL emission. - /// Use this overload when only an untyped LambdaExpression is available. - /// - public static Delegate CompileLambda(LambdaExpression lambda) - { - if (lambda == null) - { - throw new ArgumentNullException(nameof(lambda)); - } - - if (ExpressionCompileHelper.IsInterpretationEnabled) - { - return lambda.Compile(preferInterpretation: true); - } - - return lambda.Compile(); - } - } -} diff --git a/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs b/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs index 0acbdea7d3..ecaeef42ff 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/GeometrySqlExpressionFactory.cs @@ -43,7 +43,7 @@ public static SqlScalarExpression Construct(Expression geometryExpression) try { Expression> le = Expression.Lambda>(geometryExpression); - Func compiledExpression = ExpressionCompileHelper.CompileLambda(le); + Func compiledExpression = le.Compile(preferInterpretation: true); geometry = compiledExpression(); } catch (Exception ex) diff --git a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs index 74a8b19b18..4d67fbd67d 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/SubtreeEvaluator.cs @@ -117,7 +117,7 @@ private Expression EvaluateConstant(Expression expression) } LambdaExpression lambda = Expression.Lambda(expression); - Delegate function = ExpressionCompileHelper.CompileLambda(lambda); + Delegate function = lambda.Compile(preferInterpretation: true); return Expression.Constant(function.DynamicInvoke(null), expression.Type); } diff --git a/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs b/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs index ee134380cf..4b53ae4682 100644 --- a/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs +++ b/Microsoft.Azure.Cosmos/src/Linq/Utilities.cs @@ -102,7 +102,7 @@ public override object EvalBoxed(Expression expr) public T Eval(Expression expr) { Expression> lambda = Expression.Lambda>(expr); - Func func = ExpressionCompileHelper.CompileLambda(lambda); + Func func = lambda.Compile(preferInterpretation: true); return func(); } } diff --git a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs index b4a43e596c..b5b4d8eec2 100644 --- a/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs +++ b/Microsoft.Azure.Cosmos/src/Util/ConfigurationManager.cs @@ -117,15 +117,6 @@ internal static class ConfigurationManager /// internal static readonly string BypassQueryParsing = "AZURE_COSMOS_BYPASS_QUERY_PARSING"; - /// - /// Environment variable to disable LINQ expression interpretation mode. - /// When set to "false", falls back to JIT-compiled Expression.Compile(). - /// Default (unset or "true"): interpretation mode enabled to prevent native memory growth - /// from DynamicMethod IL emission in long-running services. - /// See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 - /// - internal static readonly string LinqExpressionCompileInterpretationEnabled = "AZURE_COSMOS_LINQ_EXPRESSION_INTERPRETATION_ENABLED"; - /// /// A read-only string containing the environment variable name for disabling length aware range comparator. /// Length aware range comparators were intorduced in Range class to handle EPK range comparisons correctly in the case of a container's physical partition set consisting of fully and partially specified EPK values. diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs index ca8e796cd9..ca1321b73a 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -5,14 +5,12 @@ namespace Microsoft.Azure.Cosmos.Linq { using System; - using System.Collections.Generic; using System.Linq.Expressions; using BenchmarkDotNet.Attributes; /// - /// Benchmark measuring E2E LINQ-to-SQL query generation performance and memory impact. - /// Models realistic CosmosDB LINQ query translation (NOT execution) through the full - /// pipeline: ConstantEvaluator.PartialEval → SubtreeEvaluator → ExpressionToSql. + /// Benchmark comparing Expression.Compile() vs Compile(preferInterpretation: true) + /// in the context of CosmosDB LINQ-to-SQL query generation. /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() /// [MemoryDiagnoser] @@ -20,104 +18,54 @@ public class SubtreeEvaluatorBenchmark { private const int MemoryIterations = 1000; - private class BenchmarkDocument - { - public string Status { get; set; } - public int Priority { get; set; } - public string Region { get; set; } - } + private LambdaExpression lambda; - [Benchmark(Baseline = true)] - public string SimpleWhereClause() + [GlobalSetup] + public void Setup() { - // Simulates: .Where(doc => doc.Status == status) - // The captured variable "status" triggers SubtreeEvaluator → CompileLambda string status = "active"; - return SqlTranslator.TranslateExpression( - CreateWhereBody(doc => doc.Status == status)); + Expression> expr = () => status == "active"; + this.lambda = Expression.Lambda(expr.Body); } - [Benchmark] - public string ComputedConstant() - { - // Simulates: .Where(doc => doc.Priority > threshold + offset) - // The expression "threshold + offset" is a computed constant requiring compilation - int threshold = 5; - int offset = 3; - return SqlTranslator.TranslateExpression( - CreateWhereBody(doc => doc.Priority > threshold + offset)); - } - - [Benchmark] - public string NestedPropertyAccess() + [Benchmark(Baseline = true)] + public object Compile() { - // Simulates: .Where(doc => doc.Region == holder.Region) - // Nested member access on captured anonymous object triggers compilation - var filter = new { Region = "westus" }; - return SqlTranslator.TranslateExpression( - CreateWhereBody(doc => doc.Region == filter.Region)); + Delegate fn = this.lambda.Compile(); + return fn.DynamicInvoke(null); } [Benchmark] - public string MultiplePredicates() + public object CompileWithInterpretation() { - // Simulates: .Where(doc => doc.Status == status && doc.Priority >= minPriority) - // Multiple captured variables, each evaluated through SubtreeEvaluator - string status = "active"; - int minPriority = 3; - return SqlTranslator.TranslateExpression( - CreateWhereBody(doc => doc.Status == status && doc.Priority >= minPriority)); + Delegate fn = this.lambda.Compile(preferInterpretation: true); + return fn.DynamicInvoke(null); } - /// - /// Demonstrates native memory growth: repeated query generation with the old - /// Compile() path emits DynamicMethod IL that is never reclaimed by the GC. - /// [Benchmark] - public long NativeCompileMemoryGrowth() + public long CompileMemoryGrowth() { long before = GC.GetTotalMemory(forceFullCollection: true); for (int i = 0; i < MemoryIterations; i++) { - string status = "active"; - Expression body = CreateWhereBody(doc => doc.Status == status); - - // Simulate the old code path: PartialEval with direct Compile() - HashSet candidates = Nominator.Nominate(body, _ => true); - foreach (Expression candidate in candidates) - { - if (candidate.NodeType != ExpressionType.Constant - && candidate.NodeType != ExpressionType.Parameter - && candidate.NodeType != ExpressionType.Lambda) - { - LambdaExpression lambda = Expression.Lambda(candidate); - Delegate fn = lambda.Compile(); - fn.DynamicInvoke(null); - } - } + Delegate fn = this.lambda.Compile(); + fn.DynamicInvoke(null); } long after = GC.GetTotalMemory(forceFullCollection: true); return after - before; } - /// - /// With the interpretation fix, the same query generation path uses - /// ExpressionCompileHelper.CompileLambda which avoids DynamicMethod emission. - /// Memory remains stable across iterations. - /// [Benchmark] - public long InterpretedCompileMemoryGrowth() + public long CompileWithInterpretationMemoryGrowth() { long before = GC.GetTotalMemory(forceFullCollection: true); for (int i = 0; i < MemoryIterations; i++) { - // Full E2E translation pipeline (uses ExpressionCompileHelper internally) - string status = "active"; - SqlTranslator.TranslateExpression( - CreateWhereBody(doc => doc.Status == status)); + Delegate fn = this.lambda.Compile(preferInterpretation: true); + fn.DynamicInvoke(null); } long after = GC.GetTotalMemory(forceFullCollection: true); From 469230c5bbffd6d014503405729971cda215e54a Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Fri, 27 Mar 2026 09:39:16 -0700 Subject: [PATCH 13/15] Add guard test to prevent bare .Compile() in LINQ source files Scans all .cs files under src/Linq/ and fails if any bare .Compile() is found without preferInterpretation: true. Prevents reintroduction of the native memory leak from DynamicMethod IL emission (#5487). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Linq/LinqCompileGuardTests.cs | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/LinqCompileGuardTests.cs diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/LinqCompileGuardTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/LinqCompileGuardTests.cs new file mode 100644 index 0000000000..6e7393d20b --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Linq/LinqCompileGuardTests.cs @@ -0,0 +1,75 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Linq +{ + using System; + using System.Collections.Generic; + using System.IO; + using System.Linq; + using System.Text.RegularExpressions; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + /// + /// Guards against introducing bare Expression.Compile() calls in the LINQ provider. + /// All Compile() calls must use Compile(preferInterpretation: true) to avoid native + /// memory leaks from DynamicMethod IL emission. + /// See: https://github.com/Azure/azure-cosmos-dotnet-v3/issues/5487 + /// + [TestClass] + public class LinqCompileGuardTests + { + // Matches .Compile() with no arguments — the problematic pattern + private static readonly Regex BareCompilePattern = new Regex( + @"\.Compile\(\s*\)", + RegexOptions.Compiled); + + // Matches .Compile(preferInterpretation: true) — the correct pattern + private static readonly Regex InterpretedCompilePattern = new Regex( + @"\.Compile\(\s*preferInterpretation\s*:\s*true\s*\)", + RegexOptions.Compiled); + + [TestMethod] + public void LinqSourceFiles_ShouldNotUseBareCompile() + { + string linqDirectory = Path.GetFullPath( + Path.Combine( + Directory.GetCurrentDirectory(), + "..", "..", "..", "..", "..", + "src", "Linq")); + + Assert.IsTrue( + Directory.Exists(linqDirectory), + $"LINQ source directory not found at: {linqDirectory}"); + + string[] sourceFiles = Directory.GetFiles(linqDirectory, "*.cs", SearchOption.AllDirectories); + Assert.IsTrue(sourceFiles.Length > 0, "No source files found in LINQ directory."); + + List violations = new List(); + + foreach (string file in sourceFiles) + { + string[] lines = File.ReadAllLines(file); + string fileName = Path.GetFileName(file); + + for (int i = 0; i < lines.Length; i++) + { + string line = lines[i]; + + if (BareCompilePattern.IsMatch(line) && !InterpretedCompilePattern.IsMatch(line)) + { + violations.Add($" {fileName}:{i + 1} => {line.Trim()}"); + } + } + } + + Assert.AreEqual( + 0, + violations.Count, + $"Found bare .Compile() calls without preferInterpretation: true. " + + $"Use .Compile(preferInterpretation: true) to avoid native memory leaks " + + $"(see issue #5487):\n{string.Join("\n", violations)}"); + } + } +} From e4de241a916505ba2f5650b0c0bbc4e5dd74499d Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Fri, 27 Mar 2026 21:37:16 -0700 Subject: [PATCH 14/15] Removing dead code --- .../Linq/SubtreeEvaluatorBenchmark.cs | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs index ca1321b73a..f53a2850cb 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -71,10 +71,5 @@ public long CompileWithInterpretationMemoryGrowth() long after = GC.GetTotalMemory(forceFullCollection: true); return after - before; } - - private static Expression CreateWhereBody(Expression> predicate) - { - return predicate.Body; - } } } From bd2302615f5d2f1ab3fa6ef844570f47a1af475b Mon Sep 17 00:00:00 2001 From: Kiran Kumar Kolli Date: Fri, 27 Mar 2026 22:40:13 -0700 Subject: [PATCH 15/15] Adding NativeMemoryProfiler --- .../Linq/SubtreeEvaluatorBenchmark.cs | 42 ++++--------------- ...soft.Azure.Cosmos.Performance.Tests.csproj | 1 + 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs index f53a2850cb..fa67ff1ba5 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Linq/SubtreeEvaluatorBenchmark.cs @@ -7,17 +7,23 @@ namespace Microsoft.Azure.Cosmos.Linq using System; using System.Linq.Expressions; using BenchmarkDotNet.Attributes; + using BenchmarkDotNet.Diagnostics.Windows.Configs; /// /// Benchmark comparing Expression.Compile() vs Compile(preferInterpretation: true) /// in the context of CosmosDB LINQ-to-SQL query generation. - /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile() + /// Validates fix for GitHub Issue #5487: Unbounded JIT/IL growth from Expression.Compile(). + /// + /// [MemoryDiagnoser] reports managed GC allocations (Gen0/Gen1/Allocated columns). + /// [NativeMemoryProfiler] uses ETW to track native memory allocations and leaks per method, + /// adding "Allocated native memory" and "Native memory leak" columns to the results table. + /// Note: NativeMemoryProfiler requires Windows and elevated (admin) privileges. /// + [ShortRunJob] [MemoryDiagnoser] + // [NativeMemoryProfiler] // Enable this line to include native memory profiling, requires Windows and admin privileges. public class SubtreeEvaluatorBenchmark { - private const int MemoryIterations = 1000; - private LambdaExpression lambda; [GlobalSetup] @@ -41,35 +47,5 @@ public object CompileWithInterpretation() Delegate fn = this.lambda.Compile(preferInterpretation: true); return fn.DynamicInvoke(null); } - - [Benchmark] - public long CompileMemoryGrowth() - { - long before = GC.GetTotalMemory(forceFullCollection: true); - - for (int i = 0; i < MemoryIterations; i++) - { - Delegate fn = this.lambda.Compile(); - fn.DynamicInvoke(null); - } - - long after = GC.GetTotalMemory(forceFullCollection: true); - return after - before; - } - - [Benchmark] - public long CompileWithInterpretationMemoryGrowth() - { - long before = GC.GetTotalMemory(forceFullCollection: true); - - for (int i = 0; i < MemoryIterations; i++) - { - Delegate fn = this.lambda.Compile(preferInterpretation: true); - fn.DynamicInvoke(null); - } - - long after = GC.GetTotalMemory(forceFullCollection: true); - return after - before; - } } } diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Microsoft.Azure.Cosmos.Performance.Tests.csproj b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Microsoft.Azure.Cosmos.Performance.Tests.csproj index 8ef2a239f5..ff80e2ffb5 100644 --- a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Microsoft.Azure.Cosmos.Performance.Tests.csproj +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Performance.Tests/Microsoft.Azure.Cosmos.Performance.Tests.csproj @@ -17,6 +17,7 @@ +