diff --git a/Documentation/how-to-build-WebAssembly.md b/Documentation/how-to-build-WebAssembly.md index 7948e2a1371..5c74af192bc 100644 --- a/Documentation/how-to-build-WebAssembly.md +++ b/Documentation/how-to-build-WebAssembly.md @@ -3,7 +3,7 @@ ## Build WebAssembly on Windows ## 1. Install Emscripten by following the instructions [here](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html). -2. Follow the instructions [here](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html#updating-the-sdk) to update Emscripten to 1.39.8 ```./emsdk install 1.39.8``` followed by ```./emsdk activate 1.39.8``` +2. Follow the instructions [here](https://kripken.github.io/emscripten-site/docs/getting_started/downloads.html#updating-the-sdk) to update Emscripten to 1.39.19 ```./emsdk install 1.39.19``` followed by ```./emsdk activate 1.39.19``` 3. Install [Firefox](https://www.getfirefox.com) (for testing). 3. Get CoreRT set up by following the [Visual Studio instructions](how-to-build-and-run-ilcompiler-in-visual-studio.md). 4. Build the WebAssembly runtime by running ```build.cmd wasm``` from the repo root. diff --git a/Documentation/using-corert/README.md b/Documentation/using-corert/README.md index 8355417f1e0..6e9a5e2d5ad 100644 --- a/Documentation/using-corert/README.md +++ b/Documentation/using-corert/README.md @@ -7,3 +7,4 @@ In that area contains list of topics which describe how to use CoreRT on your pr - [Reflection Free Mode](reflection-free-mode.md) - [Reflection In AOT](reflection-in-aot-mode.md) - [Troubleshooting](troubleshooting-corert.md) +- [RD.xml Documentation](rd-xml-format.md) diff --git a/Documentation/using-corert/rd-xml-format.md b/Documentation/using-corert/rd-xml-format.md index 93e02d8fb49..e09adff303e 100644 --- a/Documentation/using-corert/rd-xml-format.md +++ b/Documentation/using-corert/rd-xml-format.md @@ -6,7 +6,7 @@ An rd.xml file can be supplemented to help ILCompiler find types that should be Minimal Rd.xml configuration -``` +```xml @@ -25,7 +25,7 @@ There 3 forms how assembly can be configured - Module metadata and selected types. Module metadata only just need simple `` tag with short name of the assembly. -``` +```xml @@ -34,7 +34,7 @@ Module metadata only just need simple `` tag with short name of the as ``` All types in the assembly require adding `Dynamic` attribute with value `Required All`. *NOTE*: This is the only available value for this attribute. -``` +```xml @@ -45,7 +45,7 @@ Note that if you have generic types in the assembly, then specific instantiation then you should include these instantiation using nested `` tag. Module metadata and selected types option based on module metadata only mode with added `` tags inside ``. -``` +```xml @@ -61,12 +61,12 @@ Type directive provides a way to specify what types are needed. Developer has tw - Select which methods should be rooted. Take all type methods: -``` +```xml ``` Example how specify typenames -``` +```c# // just int System.Int32 // string[] @@ -83,13 +83,13 @@ System.Collections.Generic.Dictionary`2[[System.Int32,System.Private.CoreLib],[S Note that it likely does not make sense to have generic type to be placed here, since code generated over specific instantiation of the generic type. Example of invalid scenario: -``` +```c# // List System.Collections.Generic.List`1 ``` To select which methods should be rooted add nested `` tags. -``` +```xml @@ -102,7 +102,7 @@ To select which methods should be rooted add nested `` tags. ``` Alternatively you can specify optional `` tag, if you want only specific overload. For example: -``` +```xml @@ -117,7 +117,7 @@ Alternatively you can specify optional `` tag, if you want only speci ``` or if you want instantiate generic method you can pass ``. -``` +```xml diff --git a/eng/install-emscripten.cmd b/eng/install-emscripten.cmd index a621c649ca2..8c9979829bb 100644 --- a/eng/install-emscripten.cmd +++ b/eng/install-emscripten.cmd @@ -5,14 +5,14 @@ git clone https://github.com/emscripten-core/emsdk.git cd emsdk rem checkout a known good version to avoid a random break when emscripten changes the top of tree. -git checkout 92d512f +git checkout dec8a63 powershell -NoProfile -NoLogo -ExecutionPolicy ByPass -command "& """%~dp0update-machine-certs.ps1""" %*" rem Use the python that is downloaded to native-tools explicitly as its not on the path -call "%1"\..\native-tools\bin\python3 emsdk.py install 1.39.8 +call "%1"\..\native-tools\bin\python3 emsdk.py install 1.39.19 if %errorlevel% NEQ 0 goto fail -call emsdk activate 1.39.8 +call emsdk activate 1.39.19 if %errorlevel% NEQ 0 goto fail exit /b 0 diff --git a/src/BuildIntegration/Microsoft.NETCore.Native.targets b/src/BuildIntegration/Microsoft.NETCore.Native.targets index 0247f478c2d..6d2a6766aaa 100644 --- a/src/BuildIntegration/Microsoft.NETCore.Native.targets +++ b/src/BuildIntegration/Microsoft.NETCore.Native.targets @@ -37,8 +37,8 @@ The .NET Foundation licenses this file to you under the MIT license. .obj - .o - .bc + .o + .bc .lib .a @@ -61,6 +61,7 @@ The .NET Foundation licenses this file to you under the MIT license. .def .exports + $(NativeIntermediateOutputPath)$(TargetName)$(LlvmObjectExt) $(NativeIntermediateOutputPath)$(TargetName)$(NativeObjectExt) $(NativeOutputPath)$(TargetName)$(NativeBinaryExt) $(NativeIntermediateOutputPath)$(TargetName)$(ExportsFileExt) @@ -70,7 +71,7 @@ The .NET Foundation licenses this file to you under the MIT license. IlcCompile CppCompile - IlcCompile + WasmObject $(NativeOutputPath) $(NativeIntermediateOutputPath) @@ -275,6 +276,26 @@ The .NET Foundation licenses this file to you under the MIT license. + + + + + + "$(LlvmObject)" -c -o "$(NativeObject)" -s ERROR_ON_UNDEFINED_SYMBOLS=0 -s DISABLE_EXCEPTION_CATCHING=0 + $(EmccArgs) -O2 + $(EmccArgs) -g3 + + + + + + + - "$(NativeObject)" -o "$(NativeBinary)" -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s ERROR_ON_UNDEFINED_SYMBOLS=0 -s DISABLE_EXCEPTION_CATCHING=0 --emrun + "$(NativeObject)" -o "$(NativeBinary)" -s ALLOW_MEMORY_GROWTH=1 -s ERROR_ON_UNDEFINED_SYMBOLS=0 -s DISABLE_EXCEPTION_CATCHING=0 --emrun $(EmccArgs) "$(IlcPath)/sdk/libPortableRuntime.a" "$(IlcPath)/sdk/libbootstrappercpp.a" "$(IlcPath)/sdk/libSystem.Private.CoreLib.Native.a" $(EmccExtraArgs) $(EmccArgs) -O2 -flto $(EmccArgs) -g3 diff --git a/src/Common/src/TypeSystem/Common/TargetDetails.cs b/src/Common/src/TypeSystem/Common/TargetDetails.cs index 79601a4eedd..7a84d6e3cf1 100644 --- a/src/Common/src/TypeSystem/Common/TargetDetails.cs +++ b/src/Common/src/TypeSystem/Common/TargetDetails.cs @@ -274,7 +274,8 @@ public LayoutInt GetObjectAlignment(LayoutInt fieldAlignment) switch (Architecture) { case TargetArchitecture.ARM: - // ARM supports two alignments for objects on the GC heap (4 byte and 8 byte) + case TargetArchitecture.Wasm32: + // ARM & Wasm32 support two alignments for objects on the GC heap (4 byte and 8 byte) if (fieldAlignment.IsIndeterminate) return LayoutInt.Indeterminate; @@ -286,7 +287,6 @@ public LayoutInt GetObjectAlignment(LayoutInt fieldAlignment) case TargetArchitecture.ARM64: return new LayoutInt(8); case TargetArchitecture.X86: - case TargetArchitecture.Wasm32: return new LayoutInt(4); default: throw new NotSupportedException(); diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs index ba62660b598..1556406c6f9 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/NativeLayoutSignatureNode.cs @@ -55,7 +55,16 @@ public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb) public int Offset => 0; protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler); - public override ObjectNodeSection Section => ObjectNodeSection.ReadOnlyDataSection; + public override ObjectNodeSection Section + { + get + { + if (_identity.Context.Target.IsWindows) + return ObjectNodeSection.ReadOnlyDataSection; + else + return ObjectNodeSection.DataSection; + } + } public override bool IsShareable => false; public override bool StaticDependenciesAreComputed => true; diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs index ffc8db575bc..7273b37119f 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectDataBuilder.cs @@ -302,6 +302,7 @@ public void EmitReloc(ISymbolNode symbol, RelocType relocType, int delta = 0) case RelocType.IMAGE_REL_BASED_THUMB_MOV32: case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: // Do not vacate space for this kind of relocation, because // the space is embedded in the instruction. break; diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs index f38a31ccf4a..21981bcfe61 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/ObjectWriter.cs @@ -857,6 +857,8 @@ public void EmitSymbolDefinition(int currentOffset) { foreach (var name in nodes) { + + _sb.Clear(); AppendExternCPrefix(_sb); name.AppendMangledName(_nodeFactory.NameMangler, _sb); @@ -1043,7 +1045,7 @@ public static void EmitObject(string objectFilePath, IEnumerable // The DWARF CFI unwind is implemented for AMD64 & ARM32 only. TargetArchitecture tarch = factory.Target.Architecture; if (!factory.Target.IsWindows && - (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM)) + (tarch == TargetArchitecture.X64 || tarch == TargetArchitecture.ARM || tarch == TargetArchitecture.ARM64)) objectWriter.BuildCFIMap(factory, node); // Build debug location map @@ -1089,17 +1091,24 @@ public static void EmitObject(string objectFilePath, IEnumerable } int size = objectWriter.EmitSymbolReference(reloc.Target, (int)delta, reloc.RelocType); - // Emit a copy of original Thumb2 instruction that came from RyuJIT - if (reloc.RelocType == RelocType.IMAGE_REL_BASED_THUMB_MOV32 || - reloc.RelocType == RelocType.IMAGE_REL_BASED_THUMB_BRANCH24) + // Emit a copy of original Thumb2/ARM64 instruction that came from RyuJIT + + switch (reloc.RelocType) { - unsafe - { - fixed (void* location = &nodeContents.Data[i]) + case RelocType.IMAGE_REL_BASED_THUMB_MOV32: + case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: + case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12L: + unsafe { - objectWriter.EmitBytes((IntPtr)location, size); + fixed (void* location = &nodeContents.Data[i]) + { + objectWriter.EmitBytes((IntPtr)location, size); + } } - } + break; } // Update nextRelocIndex/Offset diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs index fbb9408222c..e0dd43e6d44 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Relocation.cs @@ -257,6 +257,45 @@ private static unsafe void PutArm64Rel12(uint* pCode, int imm12) Debug.Assert(GetArm64Rel12(pCode) == imm12); } + private static unsafe int GetArm64Rel28(uint* pCode) + { + uint branchInstr = *pCode; + + // first shift 6 bits left to set the sign bit, + // then arithmetic shift right by 4 bits + int imm28 = (((int)(branchInstr & 0x03FFFFFF)) << 6) >> 4; + + return imm28; + } + + private static bool FitsInArm64Rel28(long imm28) + { + return (imm28 >= -0x08000000L) && (imm28 < 0x08000000L); + } + + private static unsafe void PutArm64Rel28(uint* pCode, long imm28) + { + // Verify that we got a valid offset + Debug.Assert(FitsInArm64Rel28(imm28)); + + Debug.Assert((imm28 & 0x3) == 0); // the low two bits must be zero + + uint branchInstr = *pCode; + + branchInstr &= 0xFC000000; // keep bits 31-26 + + Debug.Assert((branchInstr & 0x7FFFFFFF) == 0x14000000); // Must be B or BL + + // Assemble the pc-relative delta 'imm28' into the branch instruction + branchInstr |= (uint)(((imm28 >> 2) & 0x03FFFFFFU)); + + *pCode = branchInstr; // write the assembled instruction + + Debug.Assert(GetArm64Rel28(pCode) == imm28); + } + + + public Relocation(RelocType relocType, int offset, ISymbolNode target) { RelocType = relocType; @@ -286,6 +325,9 @@ public static unsafe void WriteValue(RelocType relocType, void* location, long v case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: PutThumb2BlRel24((ushort*)location, (uint)value); break; + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + PutArm64Rel28((uint*)location, value); + break; case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: PutArm64Rel21((uint*)location, (int)value); break; @@ -318,6 +360,8 @@ public static unsafe long ReadValue(RelocType relocType, void* location) return (long)GetThumb2Mov32((ushort*)location); case RelocType.IMAGE_REL_BASED_THUMB_BRANCH24: return (long)GetThumb2BlRel24((ushort*)location); + case RelocType.IMAGE_REL_BASED_ARM64_BRANCH26: + return (long)GetArm64Rel28((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: return GetArm64Rel21((uint*)location); case RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs index ee2a3c7516b..e7b8ea0e211 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64Emitter.cs @@ -25,7 +25,7 @@ public void EmitMOV(Register regDst, ref AddrMode memory) public void EmitMOV(Register regDst, Register regSrc) { - throw new NotImplementedException(); + Builder.EmitUInt((uint)(0b1_0_1_01010_000_00000_000000_11111_00000u | ((uint)regSrc << 16) | (uint)regDst)); } public void EmitMOV(Register regDst, ushort imm16) @@ -35,6 +35,17 @@ public void EmitMOV(Register regDst, ushort imm16) Builder.EmitUInt(instruction); } + public void EmitMOV(Register regDst, ISymbolNode symbol) + { + // ADRP regDst, [symbol (21bit ADRP thing)] + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21); + Builder.EmitUInt(0x90000000u | (byte)regDst); + + // Add regDst, (12bit LDR page offset reloc) + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A); + Builder.EmitUInt((uint)(0b1_0_0_100010_0_000000000000_00000_00000 | ((byte)regDst << 5) | (byte)regDst)); + } + // ldr regDst, [PC + imm19] public void EmitLDR(Register regDst, short offset) { @@ -54,6 +65,25 @@ public void EmitLDR(Register regDst, Register regAddr) Builder.EmitUInt(instruction); } + public void EmitLDR(Register regDst, Register regSrc, int offset) + { + Debug.Assert(offset >= -255 && offset <= 4095); + if (offset >= 0) + { + Debug.Assert(offset % 8 == 0); + + offset /= 8; + + Builder.EmitUInt((uint)(0b11_1110_0_1_0_1_000000000000_00000_00000u | ((uint)offset << 10) | ((uint)regSrc << 5) | (uint)regDst)); + } + else + { + uint o = (uint)offset & 0x1FF; + + Builder.EmitUInt((uint)(0b11_1110_0_0_010_000000000_1_1_00000_00000u | (o << 12) | ((uint)regSrc << 5) | (uint)regDst)); + } + } + public void EmitLEAQ(Register reg, ISymbolNode symbol, int delta = 0) { throw new NotImplementedException(); @@ -69,12 +99,38 @@ public void EmitCMP(ref AddrMode addrMode, sbyte immediate) throw new NotImplementedException(); } + public void EmitCMP(Register reg, sbyte immediate) + { + if (immediate >= 0) + { + Builder.EmitUInt((uint)(0b1_1_1_100010_0_000000000000_00000_11111u | immediate << 10) | ((uint)reg << 5)); + } + else + { + throw new NotImplementedException(); + } + } + // add reg, immediate public void EmitADD(Register reg, byte immediate) { Builder.EmitInt((int)(0x91 << 24) | (immediate << 10) | ((byte)reg << 5) | (byte) reg); } + public void EmitSUB(Register reg, int immediate) + { + if (immediate >= 0) + { + Debug.Assert(immediate % 4 == 0); + + Builder.EmitUInt((uint)(0b1_1_0_100010_0_000000000000_00000_00000u | immediate << 10) | ((uint)reg << 5) | (uint)reg); + } + else + { + throw new NotImplementedException(); + } + } + public void EmitJMP(ISymbolNode symbol) { if (symbol.RepresentsIndirectionCell) @@ -100,11 +156,26 @@ public void EmitJMP(ISymbolNode symbol) } } + public void EmitJMP(Register reg) + { + Builder.EmitUInt((uint)(0b11010110_0_0_0_11111_00000_0_00000_00000u | ((uint)reg << 5))); + } + public void EmitINT3() { - throw new NotImplementedException(); + Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); + } + + public void EmitINT3(uint id) + { + Builder.EmitUInt(0b11010100_001_1111111111111111_000_0_0); + Builder.EmitUInt((uint)(0b0_00101_00000000000000000000000000u | ((uint)4))); + Builder.EmitUInt(0xdeadc0de); + Builder.EmitUInt(id); + Builder.EmitUInt(0xdeadc0de); } + public void EmitJmpToAddrMode(ref AddrMode addrMode) { throw new NotImplementedException(); @@ -112,12 +183,13 @@ public void EmitJmpToAddrMode(ref AddrMode addrMode) public void EmitRET() { - throw new NotImplementedException(); + Builder.EmitUInt(0b11010110_0_1_0_11111_00000_0_11110_00000); } public void EmitRETIfEqual() { - throw new NotImplementedException(); + Builder.EmitUInt(0b01010100_0000000000000000010_0_0001u); + EmitRET(); } private bool InSignedByteRange(int i) diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs index 7e510164a45..360e4719efb 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunGenericHelperNode.cs @@ -15,22 +15,207 @@ partial class ReadyToRunGenericHelperNode { protected Register GetContextRegister(ref /* readonly */ ARM64Emitter encoder) { - throw new NotImplementedException(); + if (_id == ReadyToRunHelperId.DelegateCtor) + return encoder.TargetRegister.Arg2; + else + return encoder.TargetRegister.Arg0; } protected void EmitDictionaryLookup(NodeFactory factory, ref ARM64Emitter encoder, Register context, Register result, GenericLookupResult lookup, bool relocsOnly) { - throw new NotImplementedException(); + // INVARIANT: must not trash context register + + // Find the generic dictionary slot + int dictionarySlot = 0; + if (!relocsOnly) + { + // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly. + dictionarySlot = factory.GenericDictionaryLayout(_dictionaryOwner).GetSlotForEntry(lookup); + } + + // Load the generic dictionary cell + encoder.EmitLDR(result, context, dictionarySlot * factory.Target.PointerSize); + + switch (lookup.LookupResultReferenceType(factory)) + { + case GenericLookupResultReferenceType.Indirect: + // Do another indirection + encoder.EmitLDR(result, result); + break; + + case GenericLookupResultReferenceType.ConditionalIndirect: + // Test result, 0x1 + // JEQ L1 + // mov result, [result-1] + // L1: + throw new NotImplementedException(); + + default: + break; + } } protected sealed override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // First load the generic context into the context register. + EmitLoadGenericContext(factory, ref encoder, relocsOnly); + + Register contextRegister = GetContextRegister(ref encoder); + + switch (_id) + { + case ReadyToRunHelperId.GetNonGCStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + + MetadataType target = (MetadataType)_target; + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg0); + //encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg3); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetGCStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg0); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + + MetadataType target = (MetadataType)_target; + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); + + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetThreadStaticBase: + { + Debug.Assert(contextRegister == encoder.TargetRegister.Arg0); + + MetadataType target = (MetadataType)_target; + + // Look up the index cell + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1, _lookupSignature, relocsOnly); + + ISymbolNode helperEntrypoint; + if (factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + // There is a lazy class constructor. We need the non-GC static base because that's where the + // class constructor context lives. + GenericLookupResult nonGcRegionLookup = factory.GenericLookup.TypeNonGCStaticBase(target); + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, nonGcRegionLookup, relocsOnly); + int cctorContextSize = NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target); + encoder.EmitSUB(encoder.TargetRegister.Arg2, cctorContextSize); + + helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase); + } + else + { + helperEntrypoint = factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType); + } + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLDR(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg1); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg1, factory.Target.PointerSize); + + encoder.EmitJMP(helperEntrypoint); + } + break; + + case ReadyToRunHelperId.DelegateCtor: + { + // This is a weird helper. Codegen populated Arg0 and Arg1 with the values that the constructor + // method expects. Codegen also passed us the generic context in Arg2. + // We now need to load the delegate target method into Arg2 (using a dictionary lookup) + // and the optional 4th parameter, and call the ctor. + + Debug.Assert(contextRegister == encoder.TargetRegister.Arg2); + + var target = (DelegateCreationInfo)_target; + + EmitDictionaryLookup(factory, ref encoder, encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, _lookupSignature, relocsOnly); + + if (target.Thunk != null) + { + Debug.Assert(target.Constructor.Method.Signature.Length == 3); + encoder.EmitMOV(encoder.TargetRegister.Arg3, target.Thunk); + } + else + { + Debug.Assert(target.Constructor.Method.Signature.Length == 2); + } + + encoder.EmitJMP(target.Constructor); + } + break; + + // These are all simple: just get the thing from the dictionary and we're done + case ReadyToRunHelperId.TypeHandle: + case ReadyToRunHelperId.MethodHandle: + case ReadyToRunHelperId.FieldHandle: + case ReadyToRunHelperId.MethodDictionary: + case ReadyToRunHelperId.MethodEntry: + case ReadyToRunHelperId.VirtualDispatchCell: + case ReadyToRunHelperId.DefaultConstructor: + case ReadyToRunHelperId.ObjectAllocator: + case ReadyToRunHelperId.TypeHandleForCasting: + { + EmitDictionaryLookup(factory, ref encoder, contextRegister, encoder.TargetRegister.Result, _lookupSignature, relocsOnly); + encoder.EmitRET(); + } + break; + + default: + encoder.EmitINT3(); + Console.WriteLine("Misiing R2R for {0}", Id.ToString()); + break; + } } protected virtual void EmitLoadGenericContext(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // Assume generic context is already loaded in the context register. } } @@ -38,7 +223,22 @@ partial class ReadyToRunGenericLookupFromTypeNode { protected override void EmitLoadGenericContext(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + // We start with context register pointing to the EEType + Register contextRegister = GetContextRegister(ref encoder); + + // Locate the VTable slot that points to the dictionary + int vtableSlot = 0; + if (!relocsOnly) + { + // The concrete slot won't be known until we're emitting data - don't ask for it in relocsOnly. + vtableSlot = VirtualMethodSlotHelper.GetGenericDictionarySlot(factory, (TypeDesc)_dictionaryOwner); + } + + int pointerSize = factory.Target.PointerSize; + int slotOffset = EETypeNode.GetVTableOffset(pointerSize) + (vtableSlot * pointerSize); + + // Load the dictionary pointer from the VTable + encoder.EmitLDR(contextRegister, contextRegister, slotOffset); } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs index 28d7e50239f..1771968661a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/ARM64ReadyToRunHelperNode.cs @@ -16,7 +16,186 @@ public partial class ReadyToRunHelperNode { protected override void EmitCode(NodeFactory factory, ref ARM64Emitter encoder, bool relocsOnly) { - throw new NotImplementedException(); + switch (Id) + { + case ReadyToRunHelperId.VirtualCall: + { + MethodDesc targetMethod = (MethodDesc)Target; + + Debug.Assert(!targetMethod.OwningType.IsInterface); + Debug.Assert(!targetMethod.CanMethodBeInSealedVTable()); + + int pointerSize = factory.Target.PointerSize; + + int slot = 0; + if (!relocsOnly) + { + slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType); + Debug.Assert(slot != -1); + } + + encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg0, 0); + encoder.EmitLDR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.IntraProcedureCallScratch1, + EETypeNode.GetVTableOffset(pointerSize) + (slot * pointerSize)); + encoder.EmitJMP(encoder.TargetRegister.IntraProcedureCallScratch1); + } + break; + + case ReadyToRunHelperId.GetNonGCStaticBase: + { + MetadataType target = (MetadataType)Target; + + bool hasLazyStaticConstructor = factory.PreinitializationManager.HasLazyStaticConstructor(target); + encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeNonGCStaticsSymbol(target)); + + if (!hasLazyStaticConstructor) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg3, encoder.TargetRegister.Result); + encoder.EmitSUB(encoder.TargetRegister.Arg3, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg3, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg2, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg3); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnNonGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetThreadStaticBase: + { + MetadataType target = (MetadataType)Target; + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLDR(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLDR(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + } + else + { + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + // TODO: performance optimization - inline the check verifying whether we need to trigger the cctor + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + } + } + break; + + case ReadyToRunHelperId.GetGCStaticBase: + { + MetadataType target = (MetadataType)Target; + + encoder.EmitMOV(encoder.TargetRegister.Result, factory.TypeGCStaticsSymbol(target)); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitRET(); + } + else + { + // We need to trigger the cctor before returning the base. It is stored at the beginning of the non-GC statics region. + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitSUB(encoder.TargetRegister.Arg2, NonGCStaticsNode.GetClassConstructorContextStorageSize(factory.Target, target)); + encoder.EmitLDR(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, (short)factory.Target.PointerSize); + encoder.EmitCMP(encoder.TargetRegister.Arg3, 1); + encoder.EmitRETIfEqual(); + + encoder.EmitMOV(encoder.TargetRegister.Arg1, encoder.TargetRegister.Result); + encoder.EmitMOV(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnGCStaticBase)); + } + } + break; + + case ReadyToRunHelperId.DelegateCtor: + { + DelegateCreationInfo target = (DelegateCreationInfo)Target; + + if (target.TargetNeedsVTableLookup) + { + Debug.Assert(!target.TargetMethod.CanMethodBeInSealedVTable()); + + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg1); + + int slot = 0; + if (!relocsOnly) + slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, target.TargetMethod, target.TargetMethod.OwningType); + + Debug.Assert(slot != -1); + encoder.EmitLDR(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, + EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)); + } + else + { + ISymbolNode targetMethodNode = target.GetTargetNode(factory); + encoder.EmitMOV(encoder.TargetRegister.Arg2, target.GetTargetNode(factory)); + } + + if (target.Thunk != null) + { + Debug.Assert(target.Constructor.Method.Signature.Length == 3); + encoder.EmitMOV(encoder.TargetRegister.Arg3, target.Thunk); + } + else + { + Debug.Assert(target.Constructor.Method.Signature.Length == 2); + } + + encoder.EmitJMP(target.Constructor); + } + break; + + case ReadyToRunHelperId.ResolveVirtualFunction: + { + // Not tested + encoder.EmitINT3(); + + MethodDesc targetMethod = (MethodDesc)Target; + if (targetMethod.OwningType.IsInterface) + { + encoder.EmitMOV(encoder.TargetRegister.Arg1, factory.InterfaceDispatchCell(targetMethod)); + encoder.EmitJMP(factory.ExternSymbol("RhpResolveInterfaceMethod")); + } + else + { + if (relocsOnly) + break; + + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Arg0); + + Debug.Assert(!targetMethod.CanMethodBeInSealedVTable()); + + int slot = VirtualMethodSlotHelper.GetVirtualMethodSlot(factory, targetMethod, targetMethod.OwningType); + Debug.Assert(slot != -1); + encoder.EmitLDR(encoder.TargetRegister.Result, encoder.TargetRegister.Result, + ((short)(EETypeNode.GetVTableOffset(factory.Target.PointerSize) + (slot * factory.Target.PointerSize)))); + encoder.EmitRET(); + } + } + break; + + + default: + throw new NotImplementedException(); + } } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs index ad69fdf9451..5a328f4c650 100644 --- a/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs +++ b/src/ILCompiler.Compiler/src/Compiler/DependencyAnalysis/Target_ARM64/TargetRegisterMap.cs @@ -20,6 +20,7 @@ public struct TargetRegisterMap public readonly Register Arg5; public readonly Register Arg6; public readonly Register Arg7; + public readonly Register IntraProcedureCallScratch1; public readonly Register Result; public TargetRegisterMap(TargetOS os) @@ -32,6 +33,7 @@ public TargetRegisterMap(TargetOS os) Arg5 = Register.X5; Arg6 = Register.X6; Arg7 = Register.X7; + IntraProcedureCallScratch1 = Register.X16; Result = Register.X0; } } diff --git a/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs index adca3d89466..90f7b77a4f7 100644 --- a/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/ILCompiler.Compiler/src/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -19,7 +19,8 @@ public ExpectedIsaFeaturesRootProvider(InstructionSetSupport isaSupport) void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootProvider) { if (_isaSupport.Architecture == TargetArchitecture.X64 - || _isaSupport.Architecture == TargetArchitecture.X86) + || _isaSupport.Architecture == TargetArchitecture.X86 + || _isaSupport.Architecture == TargetArchitecture.ARM64) { int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); diff --git a/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs index ec35284a865..7f51dd80d6a 100644 --- a/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/ILCompiler.Compiler/src/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -56,9 +56,23 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte string id = InstructionSetSupport.GetHardwareIntrinsicId(method.Context.Target.Architecture, method.OwningType); - Debug.Assert(method.Context.Target.Architecture == TargetArchitecture.X64 - || method.Context.Target.Architecture == TargetArchitecture.X86); - int flag = XArchIntrinsicConstants.FromHardwareIntrinsicId(id); + int flag = 0; + + switch (method.Context.Target.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + flag = XArchIntrinsicConstants.FromHardwareIntrinsicId(id); + break; + + case TargetArchitecture.ARM64: + flag = Arm64IntrinsicConstants.FromHardwareIntrinsicId(id); + break; + + default: + Debug.Fail("Unsupported Architecture"); + break; + } var emit = new ILEmitter(); ILCodeStream codeStream = emit.NewCodeStream(); @@ -75,12 +89,22 @@ public static MethodIL EmitIsSupportedIL(MethodDesc method, FieldDesc isSupporte public static int GetRuntimeRequiredIsaFlags(InstructionSetSupport instructionSetSupport) { - Debug.Assert(instructionSetSupport.Architecture == TargetArchitecture.X64 || - instructionSetSupport.Architecture == TargetArchitecture.X86); - return XArchIntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + switch (instructionSetSupport.Architecture) + { + case TargetArchitecture.X86: + case TargetArchitecture.X64: + return XArchIntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + + case TargetArchitecture.ARM64: + return Arm64IntrinsicConstants.FromInstructionSetFlags(instructionSetSupport.SupportedFlags); + + default: + Debug.Fail("Unsupported Architecture"); + return 0; + } } - // Keep this enumeration in sync with startup.cpp in the native runtime. + // Keep these enumerations in sync with startup.cpp in the native runtime. private static class XArchIntrinsicConstants { // SSE and SSE2 are baseline ISAs - they're always available @@ -166,5 +190,68 @@ public static int FromInstructionSetFlags(InstructionSetFlags instructionSets) return result; } } + + private static class Arm64IntrinsicConstants + { + public const int ArmBase = 0x0001; + public const int ArmBase_Arm64 = 0x0002; + public const int AdvSimd = 0x0004; + public const int AdvSimd_Arm64 = 0x0008; + public const int Aes = 0x0010; + public const int Crc32 = 0x0020; + public const int Crc32_Arm64 = 0x0040; + public const int Sha1 = 0x0080; + public const int Sha256 = 0x0100; + public const int Atomics = 0x0200; + public const int Vector64 = 0x0400; + public const int Vector128 = 0x0800; + + public static int FromHardwareIntrinsicId(string id) + { + return id switch + { + "ArmBase" => ArmBase, + "ArmBase_Arm64" => ArmBase_Arm64, + "AdvSimd" => AdvSimd, + "AdvSimd_Arm64" => AdvSimd_Arm64, + "Aes" => Aes, + "Crc32" => Crc32, + "Crc32_Arm64" => Crc32_Arm64, + "Sha1" => Sha1, + "Sha256" => Sha256, + "Atomics" => Atomics, + "Vector64" => Vector64, + "Vector128" => Vector128, + _ => throw new NotSupportedException(), + }; + } + + public static int FromInstructionSetFlags(InstructionSetFlags instructionSets) + { + int result = 0; + + foreach (InstructionSet instructionSet in instructionSets) + { + result |= instructionSet switch + { + InstructionSet.ARM64_ArmBase => ArmBase, + InstructionSet.ARM64_ArmBase_Arm64 => ArmBase_Arm64, + InstructionSet.ARM64_AdvSimd => AdvSimd, + InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd_Arm64, + InstructionSet.ARM64_Aes => Aes, + InstructionSet.ARM64_Crc32 => Crc32, + InstructionSet.ARM64_Crc32_Arm64 => Crc32_Arm64, + InstructionSet.ARM64_Sha1 => Sha1, + InstructionSet.ARM64_Sha256 => Sha256, + InstructionSet.ARM64_Atomics => Atomics, + InstructionSet.ARM64_Vector64 => Vector64, + InstructionSet.ARM64_Vector128 => Vector128, + _ => throw new NotSupportedException() + }; + } + + return result; + } + } } } diff --git a/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs b/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs index 61a20149364..81a609543fd 100644 --- a/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs +++ b/src/ILCompiler.Compiler/src/Compiler/JitHelper.cs @@ -62,13 +62,13 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.WriteBarrier: - mangledName = "RhpAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpAssignRefArm64" : "RhpAssignRef"; break; case ReadyToRunHelper.CheckedWriteBarrier: - mangledName = "RhpCheckedAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpCheckedAssignRefArm64" : "RhpCheckedAssignRef"; break; case ReadyToRunHelper.ByRefWriteBarrier: - mangledName = "RhpByRefAssignRef"; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpByRefAssignRefArm64" : "RhpByRefAssignRef"; break; case ReadyToRunHelper.WriteBarrier_EAX: mangledName = "RhpAssignRefEAX"; diff --git a/src/ILCompiler.Compiler/src/Compiler/TypeExtensions.cs b/src/ILCompiler.Compiler/src/Compiler/TypeExtensions.cs index 89fd08f3dba..c70b6fa99f8 100644 --- a/src/ILCompiler.Compiler/src/Compiler/TypeExtensions.cs +++ b/src/ILCompiler.Compiler/src/Compiler/TypeExtensions.cs @@ -202,11 +202,11 @@ public static bool IsArrayTypeWithoutGenericInterfaces(this TypeDesc type) /// /// Determines whether an object of type '' requires 8-byte alignment on - /// 32bit ARM architectures. + /// 32bit ARM or 32bit Wasm architectures. /// public static bool RequiresAlign8(this TypeDesc type) { - if (type.Context.Target.Architecture != TargetArchitecture.ARM) + if (type.Context.Target.Architecture != TargetArchitecture.ARM && type.Context.Target.Architecture != TargetArchitecture.Wasm32) { return false; } diff --git a/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs b/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs index 56cc5f497ae..ccd065dfaca 100644 --- a/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs +++ b/src/ILCompiler.Compiler/src/Compiler/VectorOfTFieldLayoutAlgorithm.cs @@ -64,6 +64,17 @@ public override bool ComputeContainsGCPointers(DefType type) public override ValueTypeShapeCharacteristics ComputeValueTypeShapeCharacteristics(DefType type) { + if (type.Context.Target.Architecture == TargetArchitecture.ARM64 && + type.Instantiation[0].IsPrimitiveNumeric) + { + return type.InstanceFieldSize.AsInt switch + { + 8 => ValueTypeShapeCharacteristics.Vector64Aggregate, + 16 => ValueTypeShapeCharacteristics.Vector128Aggregate, + _ => ValueTypeShapeCharacteristics.None + }; + } + return _fallbackAlgorithm.ComputeValueTypeShapeCharacteristics(type); } diff --git a/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs b/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs index c1cac0a4fd5..b995160d073 100644 --- a/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/ILCompiler.RyuJit/src/JitInterface/CorInfoImpl.RyuJit.cs @@ -130,6 +130,7 @@ private void ComputeLookup(ref CORINFO_RESOLVED_TOKEN pResolvedToken, object ent { lookup.runtimeLookup.offset1 = IntPtr.Zero; } + lookup.runtimeLookup.sizeOffset = CORINFO.CORINFO_NO_SIZE_CHECK; lookup.runtimeLookup.testForFixup = false; // TODO: this will be needed in true multifile lookup.runtimeLookup.testForNull = false; lookup.runtimeLookup.indirectFirstOffset = false; @@ -671,7 +672,7 @@ private ObjectNode.ObjectData EncodeEHInfo() RelocType rel = (_compilation.NodeFactory.Target.IsWindows) ? RelocType.IMAGE_REL_BASED_ABSOLUTE : - RelocType.IMAGE_REL_BASED_REL32; + RelocType.IMAGE_REL_BASED_RELPTR32; if (_compilation.NodeFactory.Target.Abi == TargetAbi.Jit) rel = RelocType.IMAGE_REL_BASED_REL32; diff --git a/src/ILCompiler.WebAssembly/src/CodeGen/ILToWebAssemblyImporter.cs b/src/ILCompiler.WebAssembly/src/CodeGen/ILToWebAssemblyImporter.cs index 48d6974320c..2e79f8a8543 100644 --- a/src/ILCompiler.WebAssembly/src/CodeGen/ILToWebAssemblyImporter.cs +++ b/src/ILCompiler.WebAssembly/src/CodeGen/ILToWebAssemblyImporter.cs @@ -820,12 +820,6 @@ private void ImportLoadVar(int index, bool argument) PushLoadExpression(GetStackValueKind(type), (argument ? "arg" : "loc") + index + "_", typedLoadLocation, type); } - private LLVMValueRef LoadTemp(int index) - { - LLVMValueRef address = LoadVarAddress(index, LocalVarKind.Temp, out TypeDesc type); - return _builder.BuildLoad(CastToPointerToTypeDesc(address, type, $"Temp{index}_"), $"LdTemp{index}_"); - } - internal LLVMValueRef LoadTemp(int index, LLVMTypeRef asType) { LLVMValueRef address = LoadVarAddress(index, LocalVarKind.Temp, out TypeDesc type); @@ -1061,6 +1055,7 @@ private StackValueKind GetStackValueKind(TypeDesc type) case TypeFlags.ByRef: return StackValueKind.ByRef; case TypeFlags.Pointer: + case TypeFlags.FunctionPointer: return StackValueKind.NativeInt; default: return StackValueKind.Unknown; @@ -1072,7 +1067,7 @@ private void ImportStoreVar(int index, bool argument) TypeDesc varType; StackEntry toStore = _stack.Pop(); LLVMValueRef varAddress = LoadVarAddress(index, argument ? LocalVarKind.Argument : LocalVarKind.Local, out varType); - CastingStore(varAddress, toStore, varType, $"Variable{index}_"); + CastingStore(varAddress, toStore, varType, false, $"Variable{index}_"); } private void ImportStoreHelper(LLVMValueRef toStore, LLVMTypeRef valueType, LLVMValueRef basePtr, uint offset, string name = null, LLVMBuilderRef builder = default(LLVMBuilderRef)) @@ -1104,10 +1099,76 @@ private LLVMValueRef CastToPointerToTypeDesc(LLVMValueRef source, TypeDesc type, return CastIfNecessary(source, LLVMTypeRef.CreatePointer(GetLLVMTypeForTypeDesc(type), 0), (name ?? "") + type.ToString()); } - private void CastingStore(LLVMValueRef address, StackEntry value, TypeDesc targetType, string targetName = null) + private void CastingStore(LLVMValueRef address, StackEntry value, TypeDesc targetType, bool withGCBarrier, string targetName = null) { - var typedStoreLocation = CastToPointerToTypeDesc(address, targetType, targetName); - _builder.BuildStore(value.ValueAsType(targetType, _builder), typedStoreLocation); + if (withGCBarrier && targetType.IsGCPointer) + { + CallRuntime(_method.Context, "InternalCalls", "RhpAssignRef", new StackEntry[] + { + new ExpressionEntry(StackValueKind.Int32, "address", address), value + }); + } + else + { + var typedStoreLocation = CastToPointerToTypeDesc(address, targetType, targetName); + var llvmValue = value.ValueAsType(targetType, _builder); + if (withGCBarrier && IsStruct(targetType)) + { + StoreStruct(address, llvmValue, targetType, typedStoreLocation); + } + else + { + _builder.BuildStore(llvmValue, typedStoreLocation); + } + } + } + + private static bool IsStruct(TypeDesc typeDesc) + { + return typeDesc.IsValueType && !typeDesc.IsPrimitive && !typeDesc.IsEnum; + } + + private void StoreStruct(LLVMValueRef address, LLVMValueRef llvmValue, TypeDesc targetType, LLVMValueRef typedStoreLocation, bool childStruct = false) + { + // TODO: if this is used for anything multithreaded, this foreach and the subsequent BuildStore are susceptible to a race condition + foreach (FieldDesc f in targetType.GetFields()) + { + if (f.IsStatic) continue; + if (IsStruct(f.FieldType) && llvmValue.TypeOf.IsPackedStruct) + { + LLVMValueRef targetAddress = _builder.BuildGEP(address, new[] { BuildConstInt32(f.Offset.AsInt) }); + uint index = LLVMSharpInterop.ElementAtOffset(_compilation.TargetData, llvmValue.TypeOf, (ulong)f.Offset.AsInt); + LLVMValueRef fieldValue = _builder.BuildExtractValue(llvmValue, index); + //recurse into struct + StoreStruct(targetAddress, fieldValue, f.FieldType, CastToPointerToTypeDesc(targetAddress, f.FieldType), true); + } + else if (f.FieldType.IsGCPointer) + { + LLVMValueRef targetAddress = _builder.BuildGEP(address, new[] {BuildConstInt32(f.Offset.AsInt)}); + LLVMValueRef fieldValue; + if (llvmValue.TypeOf.IsPackedStruct) + { + uint index = LLVMSharpInterop.ElementAtOffset(_compilation.TargetData, llvmValue.TypeOf, (ulong) f.Offset.AsInt); + fieldValue = _builder.BuildExtractValue(llvmValue, index); + Debug.Assert(fieldValue.TypeOf.Kind == LLVMTypeKind.LLVMPointerTypeKind, "expected an LLVM pointer type"); + } + else + { + // single field IL structs are not LLVM structs + fieldValue = llvmValue; + } + CallRuntime(_method.Context, "InternalCalls", "RhpAssignRef", + new StackEntry[] + { + new ExpressionEntry(StackValueKind.Int32, "targetAddress", targetAddress), + new ExpressionEntry(StackValueKind.ObjRef, "sourceAddress", fieldValue) + }); + } + } + if (!childStruct) + { + _builder.BuildStore(llvmValue, typedStoreLocation); // just copy all the fields again for simplicity, if all the fields were set using RhpAssignRef then a possible optimisation would be to skip this line + } } private LLVMValueRef CastIfNecessary(LLVMValueRef source, LLVMTypeRef valueType, string name = null, bool unsigned = false) @@ -1219,6 +1280,8 @@ internal static LLVMTypeRef GetLLVMTypeForTypeDesc(TypeDesc type) case TypeFlags.Pointer: return LLVMTypeRef.CreatePointer(type.GetParameterType().IsVoid ? LLVMTypeRef.Int8 : GetLLVMTypeForTypeDesc(type.GetParameterType()), 0); + case TypeFlags.FunctionPointer: + return LLVMTypeRef.CreatePointer(LLVMTypeRef.Int8, 0); case TypeFlags.Int64: case TypeFlags.UInt64: @@ -3578,7 +3641,6 @@ private void ImportLoadIndirect(TypeDesc type) { var pointer = _stack.Pop(); Debug.Assert(pointer is ExpressionEntry || pointer is ConstantEntry); - var expressionPointer = pointer as ExpressionEntry; if (type == null) { type = GetWellKnownType(WellKnownType.Object); @@ -3600,19 +3662,36 @@ private void ImportStoreIndirect(TypeDesc type) StackEntry destinationPointer = _stack.Pop(); LLVMValueRef typedValue; LLVMValueRef typedPointer; + bool requireWriteBarrier; if (type != null) { - typedValue = value.ValueAsType(type, _builder); typedPointer = destinationPointer.ValueAsType(type.MakePointerType(), _builder); + typedValue = value.ValueAsType(type, _builder); + if (IsStruct(type)) + { + StoreStruct(typedPointer, typedValue, type, typedPointer); + return; + } + requireWriteBarrier = type.IsGCPointer; } else { typedPointer = destinationPointer.ValueAsType(LLVMTypeRef.CreatePointer(LLVMTypeRef.Int32, 0), _builder); typedValue = value.ValueAsInt32(_builder, false); + requireWriteBarrier = (value is ExpressionEntry) && !((ExpressionEntry)value).RawLLVMValue.IsNull && value.Type.IsGCPointer; + } + if (requireWriteBarrier) + { + CallRuntime(_method.Context, "InternalCalls", "RhpAssignRef", new StackEntry[] + { + new ExpressionEntry(StackValueKind.Int32, "typedPointer", typedPointer), value + }); + } + else + { + _builder.BuildStore(typedValue, typedPointer); } - - _builder.BuildStore(typedValue, typedPointer); } private void ImportBinaryOperation(ILOpcode opcode) @@ -3729,8 +3808,8 @@ private void ImportBinaryOperation(ILOpcode opcode) break; case ILOpcode.add_ovf: - Debug.Assert(type.Category == TypeFlags.Int32 || type.Category == TypeFlags.Int64); - if (type.Category == TypeFlags.Int32) + Debug.Assert(CanPerformSignedOverflowOperations(op1.Kind)); + if (Is32BitStackValue(op1.Kind)) { BuildAddOverflowChecksForSize(ref AddOvf32Function, left, right, LLVMTypeRef.Int32, BuildConstInt32(int.MaxValue), BuildConstInt32(int.MinValue), true); } @@ -3742,8 +3821,8 @@ private void ImportBinaryOperation(ILOpcode opcode) result = _builder.BuildAdd(left, right, "add"); break; case ILOpcode.add_ovf_un: - Debug.Assert(type.Category == TypeFlags.UInt32 || type.Category == TypeFlags.Int32 || type.Category == TypeFlags.UInt64 || type.Category == TypeFlags.Int64 || type.Category == TypeFlags.Pointer); - if (type.Category == TypeFlags.UInt32 || type.Category == TypeFlags.Int32 || type.Category == TypeFlags.Pointer) + Debug.Assert(CanPerformUnsignedOverflowOperations(op1.Kind)); + if (Is32BitStackValue(op1.Kind)) { BuildAddOverflowChecksForSize(ref AddOvfUn32Function, left, right, LLVMTypeRef.Int32, BuildConstUInt32(uint.MaxValue), BuildConstInt32(0), false); } @@ -3755,8 +3834,8 @@ private void ImportBinaryOperation(ILOpcode opcode) result = _builder.BuildAdd(left, right, "add"); break; case ILOpcode.sub_ovf: - Debug.Assert(type.Category == TypeFlags.Int32 || type.Category == TypeFlags.Int64); - if (type.Category == TypeFlags.Int32) + Debug.Assert(CanPerformSignedOverflowOperations(op1.Kind)); + if (Is32BitStackValue(op1.Kind)) { BuildSubOverflowChecksForSize(ref SubOvf32Function, left, right, LLVMTypeRef.Int32, BuildConstInt32(int.MaxValue), BuildConstInt32(int.MinValue), true); } @@ -3768,8 +3847,8 @@ private void ImportBinaryOperation(ILOpcode opcode) result = _builder.BuildSub(left, right, "sub"); break; case ILOpcode.sub_ovf_un: - Debug.Assert(type.Category == TypeFlags.UInt32 || type.Category == TypeFlags.Int32 || type.Category == TypeFlags.UInt64 || type.Category == TypeFlags.Int64 || type.Category == TypeFlags.Pointer); - if (type.Category == TypeFlags.UInt32 || type.Category == TypeFlags.Int32 || type.Category == TypeFlags.Pointer) + Debug.Assert(CanPerformUnsignedOverflowOperations(op1.Kind)); + if (Is32BitStackValue(op1.Kind)) { BuildSubOverflowChecksForSize(ref SubOvfUn32Function, left, right, LLVMTypeRef.Int32, BuildConstUInt32(uint.MaxValue), BuildConstInt32(0), false); } @@ -3800,6 +3879,21 @@ private void ImportBinaryOperation(ILOpcode opcode) PushExpression(kind, "binop", result, type); } + bool CanPerformSignedOverflowOperations(StackValueKind kind) + { + return kind == StackValueKind.Int32 || kind == StackValueKind.Int64; + } + + bool CanPerformUnsignedOverflowOperations(StackValueKind kind) + { + return CanPerformSignedOverflowOperations(kind) || kind == StackValueKind.ByRef || + kind == StackValueKind.ObjRef || kind == StackValueKind.NativeInt; + } + + bool Is32BitStackValue(StackValueKind kind) + { + return kind == StackValueKind.Int32 || kind == StackValueKind.ByRef || kind == StackValueKind.ObjRef || kind == StackValueKind.NativeInt; + } LLVMValueRef StartOverflowCheckFunction(LLVMTypeRef sizeTypeRef, bool signed, string throwFuncName, out LLVMValueRef leftOp, out LLVMValueRef rightOp, out LLVMBuilderRef builder, out LLVMBasicBlockRef elseBlock, @@ -4720,7 +4814,7 @@ private void ImportStoreField(int token, bool isStatic) StackEntry valueEntry = _stack.Pop(); LLVMValueRef fieldAddress = GetFieldAddress(runtimeDeterminedField, field, isStatic); - CastingStore(fieldAddress, valueEntry, field.FieldType); + CastingStore(fieldAddress, valueEntry, field.FieldType, true); } // Loads symbol address. Address is represented as a i32* @@ -4859,9 +4953,18 @@ private void ImportNewArray(int token) else { arguments = new StackEntry[] { new LoadExpressionEntry(StackValueKind.ValueType, "eeType", GetEETypePointerForTypeDesc(runtimeDeterminedArrayType, true), eeTypeDesc), sizeOfArray }; - //TODO: call GetNewArrayHelperForType from JitHelper.cs (needs refactoring) } - PushNonNull(CallRuntime(_compilation.TypeSystemContext, InternalCalls, "RhpNewArray", arguments, runtimeDeterminedArrayType)); + var helper = GetNewArrayHelperForType(runtimeDeterminedArrayType); + PushNonNull(CallRuntime(_compilation.TypeSystemContext, InternalCalls, helper, arguments, runtimeDeterminedArrayType)); + } + + //TODO: copy of the same method in JitHelper.cs but that is internal + public static string GetNewArrayHelperForType(TypeDesc type) + { + if (type.RequiresAlign8()) + return "RhpNewArrayAlign8"; + + return "RhpNewArray"; } LLVMValueRef GetGenericContext() @@ -4936,7 +5039,7 @@ private void ImportStoreElement(TypeDesc elementType) StackEntry arrayReference = _stack.Pop(); var nullSafeElementType = elementType ?? GetWellKnownType(WellKnownType.Object); LLVMValueRef elementAddress = GetElementAddress(index.ValueAsInt32(_builder, true), arrayReference.ValueAsType(LLVMTypeRef.CreatePointer(LLVMTypeRef.Int8, 0), _builder), nullSafeElementType); - CastingStore(elementAddress, value, nullSafeElementType); + CastingStore(elementAddress, value, nullSafeElementType, true); } private void ImportLoadLength() diff --git a/src/ILCompiler.WebAssembly/src/CodeGen/LLVMSharpInterop.cs b/src/ILCompiler.WebAssembly/src/CodeGen/LLVMSharpInterop.cs new file mode 100644 index 00000000000..e4a2fb9c1b0 --- /dev/null +++ b/src/ILCompiler.WebAssembly/src/CodeGen/LLVMSharpInterop.cs @@ -0,0 +1,15 @@ +using LLVMSharp.Interop; + +namespace Internal.IL +{ + /// + /// Workaround while waiting for https://github.com/microsoft/LLVMSharp/pull/141 + /// + internal class LLVMSharpInterop + { + internal static unsafe uint ElementAtOffset(LLVMTargetDataRef targetDataRef, LLVMTypeRef structTypeRef, ulong offset) + { + return LLVM.ElementAtOffset(targetDataRef, structTypeRef, offset); + } + } +} diff --git a/src/ILCompiler.WebAssembly/src/CodeGen/WebAssemblyObjectWriter.cs b/src/ILCompiler.WebAssembly/src/CodeGen/WebAssemblyObjectWriter.cs index 113229192a9..430e703db43 100644 --- a/src/ILCompiler.WebAssembly/src/CodeGen/WebAssemblyObjectWriter.cs +++ b/src/ILCompiler.WebAssembly/src/CodeGen/WebAssemblyObjectWriter.cs @@ -272,6 +272,14 @@ private void EmitNativeMain(LLVMContextRef context) }, "returnValue"); + LLVMValueRef RhpReversePInvokeReturn2 = Module.GetNamedFunction("RhpReversePInvokeReturn2"); + LLVMTypeRef reversePInvokeFunctionType = LLVMTypeRef.CreateFunction(LLVMTypeRef.Void, new LLVMTypeRef[] { LLVMTypeRef.CreatePointer(reversePInvokeFrameType, 0) }, false); + if (RhpReversePInvoke2.Handle == IntPtr.Zero) + { + RhpReversePInvokeReturn2 = Module.AddFunction("RhpReversePInvokeReturn2", reversePInvokeFunctionType); + } + builder.BuildCall(RhpReversePInvokeReturn2, new LLVMValueRef[] { reversePinvokeFrame }, ""); + builder.BuildRet(mainReturn); mainFunc.Linkage = LLVMLinkage.LLVMExternalLinkage; } diff --git a/src/ILCompiler.WebAssembly/src/Compiler/WebAssemblyCodegenCompilation.cs b/src/ILCompiler.WebAssembly/src/Compiler/WebAssemblyCodegenCompilation.cs index 0fcb2982101..ae2075f7e8e 100644 --- a/src/ILCompiler.WebAssembly/src/Compiler/WebAssemblyCodegenCompilation.cs +++ b/src/ILCompiler.WebAssembly/src/Compiler/WebAssemblyCodegenCompilation.cs @@ -17,6 +17,7 @@ public sealed class WebAssemblyCodegenCompilation : Compilation { internal WebAssemblyCodegenConfigProvider Options { get; } internal LLVMModuleRef Module { get; } + internal LLVMTargetDataRef TargetData { get; } public new WebAssemblyCodegenNodeFactory NodeFactory { get; } internal LLVMDIBuilderRef DIBuilder { get; } internal Dictionary DebugMetadataMap { get; } @@ -32,7 +33,7 @@ internal WebAssemblyCodegenCompilation( { NodeFactory = nodeFactory; LLVMModuleRef m = LLVMModuleRef.CreateWithName("netscripten"); - m.Target = "wasm32-unknown-unknown-wasm"; + m.Target = "wasm32-unknown-emscripten"; // https://llvm.org/docs/LangRef.html#langref-datalayout // e litte endian, mangled names // m:e ELF mangling @@ -41,8 +42,8 @@ internal WebAssemblyCodegenCompilation( // n:32:64 native widths // S128 natural alignment of stack m.DataLayout = "e-m:e-p:32:32-i64:64-n32:64-S128"; - Module = m; - + Module = m; + TargetData = m.CreateExecutionEngine().TargetData; Options = options; DIBuilder = Module.CreateDIBuilder(); DebugMetadataMap = new Dictionary(); diff --git a/src/ILCompiler.WebAssembly/src/ILCompiler.WebAssembly.csproj b/src/ILCompiler.WebAssembly/src/ILCompiler.WebAssembly.csproj index 63257e819bd..401bf46abb2 100644 --- a/src/ILCompiler.WebAssembly/src/ILCompiler.WebAssembly.csproj +++ b/src/ILCompiler.WebAssembly/src/ILCompiler.WebAssembly.csproj @@ -40,6 +40,7 @@ + diff --git a/src/ILCompiler/src/ConfigurablePInvokePolicy.cs b/src/ILCompiler/src/ConfigurablePInvokePolicy.cs index f656e06ab23..75304120732 100644 --- a/src/ILCompiler/src/ConfigurablePInvokePolicy.cs +++ b/src/ILCompiler/src/ConfigurablePInvokePolicy.cs @@ -70,7 +70,8 @@ public override bool GenerateDirectCall(string importModule, string methodName) else { // Account for System.Private.CoreLib.Native / System.Globalization.Native / System.Native / etc - return importModule.StartsWith("libSystem."); + // TODO: Remove "System." prefix - temporary workaround for https://github.com/dotnet/corert/issues/8241 + return importModule.StartsWith("libSystem.") || importModule.StartsWith("System."); } } } diff --git a/src/ILCompiler/src/Program.cs b/src/ILCompiler/src/Program.cs index 484924561df..9638e56eb6d 100644 --- a/src/ILCompiler/src/Program.cs +++ b/src/ILCompiler/src/Program.cs @@ -403,6 +403,14 @@ private int Run(string[] args) optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); } } + else if (_targetArchitecture == TargetArchitecture.ARM64) + { + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("aes"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("crc"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2"); + optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse"); + } optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _, (string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException()); diff --git a/src/JitInterface/src/CorInfoImpl.cs b/src/JitInterface/src/CorInfoImpl.cs index 2be99788f34..ffef463bfaf 100644 --- a/src/JitInterface/src/CorInfoImpl.cs +++ b/src/JitInterface/src/CorInfoImpl.cs @@ -43,8 +43,16 @@ private enum ImageFileMachine ARM = 0x01c4, ARM64 = 0xaa64, } + private enum CFI_OPCODE + { + CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. + CFI_DEF_CFA_REGISTER, // New register is used to compute CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA // Take address from register and add offset to it. + }; - internal const string JitLibrary = "clrjitilc"; + //internal const string JitLibrary = "clrjitilc"; + internal const string JitLibrary = "protononjit"; #if SUPPORT_JIT private const string JitSupportLibrary = "*"; @@ -2596,7 +2604,11 @@ private bool getTailCallHelpers(ref CORINFO_RESOLVED_TOKEN callToken, CORINFO_SI { // Slow tailcalls are not supported yet // https://github.com/dotnet/runtime/issues/35423 +#if READYTORUN throw new NotImplementedException(nameof(getTailCallHelpers)); +#else + return false; +#endif } private byte[] _code; @@ -2687,9 +2699,146 @@ private void allocUnwindInfo(byte* pHotCode, byte* pColdCode, uint startOffset, blobData[i] = pUnwindBlock[i]; } + var target = _compilation.TypeSystemContext.Target; + + if (target.Architecture == TargetArchitecture.ARM64 && target.OperatingSystem == TargetOS.Linux) + { + blobData = CompressARM64CFI(blobData); + } + _frameInfos[_usedFrameInfos++] = new FrameInfo(flags, (int)startOffset, (int)endOffset, blobData); } + private byte[] CompressARM64CFI(byte[] blobData) + { + if (blobData == null || blobData.Length == 0) + { + return blobData; + } + + Debug.Assert(blobData.Length % 8 == 0); + + short spReg = -1; + + int codeOffset = 0; + short cfaRegister = spReg; + int cfaOffset = 0; + int spOffset = 0; + + int[] registerOffset = new int[96]; + + for (int i = 0; i < registerOffset.Length; i++) + { + registerOffset[i] = int.MinValue; + } + + int offset = 0; + while (offset < blobData.Length) + { + codeOffset = Math.Max(codeOffset, blobData[offset++]); + CFI_OPCODE opcode = (CFI_OPCODE)blobData[offset++]; + short dwarfReg = BitConverter.ToInt16(blobData, offset); + offset += sizeof(short); + int cfiOffset = BitConverter.ToInt32(blobData, offset); + offset += sizeof(int); + + switch (opcode) + { + case CFI_OPCODE.CFI_DEF_CFA_REGISTER: + cfaRegister = dwarfReg; + + if (spOffset != 0) + { + for (int i = 0; i < registerOffset.Length; i++) + { + if (registerOffset[i] != int.MinValue) + { + registerOffset[i] -= spOffset; + } + } + + cfaOffset += spOffset; + spOffset = 0; + } + + break; + + case CFI_OPCODE.CFI_REL_OFFSET: + Debug.Assert(cfaRegister == spReg); + registerOffset[dwarfReg] = cfiOffset; + break; + + case CFI_OPCODE.CFI_ADJUST_CFA_OFFSET: + if (cfaRegister != spReg) + { + cfaOffset += cfiOffset; + } + else + { + spOffset += cfiOffset; + + for (int i = 0; i < registerOffset.Length; i++) + { + if (registerOffset[i] != int.MinValue) + { + registerOffset[i] += cfiOffset; + } + } + } + break; + } + } + + using (MemoryStream cfiStream = new MemoryStream()) + { + int storeOffset = 0; + + using (BinaryWriter cfiWriter = new BinaryWriter(cfiStream)) + { + if (cfaRegister != -1) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write(cfaOffset != 0 ? (byte)CFI_OPCODE.CFI_DEF_CFA : (byte)CFI_OPCODE.CFI_DEF_CFA_REGISTER); + cfiWriter.Write(cfaRegister); + cfiWriter.Write(cfaOffset); + storeOffset = cfaOffset; + } + else + { + if (cfaOffset != 0) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_ADJUST_CFA_OFFSET); + cfiWriter.Write((short)-1); + cfiWriter.Write(cfaOffset); + } + + if (spOffset != 0) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_DEF_CFA); + cfiWriter.Write((short)31); + cfiWriter.Write(spOffset); + //storeOffset = -spOffset; + } + } + + for (int i = registerOffset.Length - 1; i >= 0; i--) + { + if (registerOffset[i] != int.MinValue) + { + cfiWriter.Write((byte)codeOffset); + cfiWriter.Write((byte)CFI_OPCODE.CFI_REL_OFFSET); + cfiWriter.Write((short)i); + cfiWriter.Write(registerOffset[i] + storeOffset); + } + } + } + + return cfiStream.ToArray(); + } + } + private void* allocGCInfo(UIntPtr size) { _gcInfo = new byte[(int)size]; @@ -2811,11 +2960,14 @@ private static RelocType GetRelocType(TargetArchitecture targetArchitecture, ush if (targetArchitecture != TargetArchitecture.ARM64) return (RelocType)fRelocType; + const ushort IMAGE_REL_ARM64_BRANCH26 = 3; const ushort IMAGE_REL_ARM64_PAGEBASE_REL21 = 4; const ushort IMAGE_REL_ARM64_PAGEOFFSET_12A = 6; switch (fRelocType) { + case IMAGE_REL_ARM64_BRANCH26: + return RelocType.IMAGE_REL_BASED_ARM64_BRANCH26; case IMAGE_REL_ARM64_PAGEBASE_REL21: return RelocType.IMAGE_REL_BASED_ARM64_PAGEBASE_REL21; case IMAGE_REL_ARM64_PAGEOFFSET_12A: diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt index d1b27073ee4..f2257fe1c6a 100644 --- a/src/Native/CMakeLists.txt +++ b/src/Native/CMakeLists.txt @@ -50,6 +50,7 @@ elseif(CLR_CMAKE_TARGET_ARCH STREQUAL wasm) set(CLR_CMAKE_PLATFORM_ARCH_WASM 1) add_definitions(-DTARGET_WASM=1) add_definitions(-DHOST_WASM=1) + add_definitions(-DFEATURE_64BIT_ALIGNMENT=1) else() clr_unknown_arch() endif() diff --git a/src/Native/ObjWriter/cfi.h b/src/Native/ObjWriter/cfi.h index 4d5a3648207..c161b260adb 100644 --- a/src/Native/ObjWriter/cfi.h +++ b/src/Native/ObjWriter/cfi.h @@ -13,7 +13,8 @@ enum CFI_OPCODE { CFI_ADJUST_CFA_OFFSET, // Offset is adjusted relative to the current one. CFI_DEF_CFA_REGISTER, // New register is used to compute CFA - CFI_REL_OFFSET // Register is saved at offset from the current CFA + CFI_REL_OFFSET, // Register is saved at offset from the current CFA + CFI_DEF_CFA // Take address from register and add offset to it. }; struct CFI_CODE diff --git a/src/Native/ObjWriter/llvm.patch b/src/Native/ObjWriter/llvm.patch index 67ae23ec956..a3eca0ecbe3 100644 --- a/src/Native/ObjWriter/llvm.patch +++ b/src/Native/ObjWriter/llvm.patch @@ -1,5 +1,5 @@ diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h -index 7c1189e..d1d77c9 100644 +index 7c1189e46ab..d1d77c97311 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -101,6 +101,11 @@ public: @@ -15,7 +15,7 @@ index 7c1189e..d1d77c9 100644 /// can change its size during relaxation. virtual void EmitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &); diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h -index 5390e79..5b258e7 100644 +index 5390e794242..5b258e76787 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -115,6 +115,7 @@ public: @@ -37,7 +37,7 @@ index 5390e79..5b258e7 100644 /// method uses .byte directives instead of .ascii or .asciz for readability. virtual void EmitBinaryData(StringRef Data); diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp -index 174397e..ef7161f 100644 +index 174397e2739..ef7161fb56c 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -122,7 +122,7 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { @@ -68,7 +68,7 @@ index 174397e..ef7161f 100644 // We need to create a local symbol to avoid relocations. Frame.Begin = getContext().createTempSymbol(); diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp -index 2bfb9a6..a710098 100644 +index 2bfb9a63eed..a710098e798 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -830,6 +830,7 @@ void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, @@ -79,8 +79,56 @@ index 2bfb9a6..a710098 100644 void MCStreamer::EmitBinaryData(StringRef Data) { EmitBytes(Data); } void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) { visitUsedExpr(*Value); +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +index 2bd0cbf9f7c..e7643d5f66d 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +@@ -41,6 +41,15 @@ public: + return AArch64::NumTargetFixupKinds; + } + ++ Optional getFixupKind(StringRef Name) const { ++ return StringSwitch>(Name) ++ .Case("R_AARCH64_JUMP26", (MCFixupKind)AArch64::fixup_aarch64_pcrel_call26) ++ .Case("R_AARCH64_ADR_PREL_LO21",(MCFixupKind)AArch64::fixup_aarch64_pcrel_adrp_imm21) ++ .Case("R_AARCH64_ADD_ABS_LO12_NC", (MCFixupKind)AArch64::fixup_aarch64_add_imm12) ++ .Default(MCAsmBackend::getFixupKind(Name)); ++ } ++ ++ + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { + const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined +@@ -126,6 +135,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { + case AArch64::fixup_aarch64_pcrel_call26: + case FK_Data_4: + case FK_SecRel_4: ++ case FK_PCRel_4: + return 4; + + case FK_Data_8: +@@ -222,6 +232,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value, + case FK_Data_8: + case FK_SecRel_2: + case FK_SecRel_4: ++ case FK_PCRel_4: + return Value; + } + } +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +index 89c3e5b4c76..ba105365d74 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +@@ -129,6 +129,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, + case FK_Data_2: + return R_CLS(PREL16); + case FK_Data_4: ++ case FK_PCRel_4: + return R_CLS(PREL32); + case FK_Data_8: + if (IsILP32) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp -index a77df7a..e1aa752 100644 +index a77df7a2598..e1aa7526f9b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -48,6 +48,14 @@ public: @@ -118,7 +166,7 @@ index a77df7a..e1aa752 100644 return 2; case FK_SecRel_4: diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h -index 0237496..01676a0 100644 +index 02374966daf..01676a01683 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -36,6 +36,7 @@ public: @@ -130,7 +178,7 @@ index 0237496..01676a0 100644 bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp -index 59f31be..9b95598 100644 +index 59f31be69d5..9b95598f99f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -103,6 +103,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, @@ -144,7 +192,7 @@ index 59f31be..9b95598 100644 case ARM::fixup_arm_uncondbl: switch (Modifier) { diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp -index 93f4006..81e4caa 100644 +index 93f4006cee8..108e9c51e13 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -396,6 +396,7 @@ private: @@ -245,7 +293,7 @@ index 93f4006..81e4caa 100644 const SmallVectorImpl &Opcodes) { FlushPendingOffset(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp -index 4a94318..f4f5aa1 100644 +index 4a943187ab6..f4f5aa11bf8 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -61,6 +61,7 @@ void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {} @@ -257,7 +305,7 @@ index 4a94318..f4f5aa1 100644 const SmallVectorImpl &Opcodes) { } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt -index b654b8c..58d2515 100644 +index b654b8c5cb8..58d25159af8 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -46,6 +46,7 @@ add_llvm_external_project(clang) diff --git a/src/Native/ObjWriter/objwriter.cpp b/src/Native/ObjWriter/objwriter.cpp index b9c31e62c54..a95a781ab64 100644 --- a/src/Native/ObjWriter/objwriter.cpp +++ b/src/Native/ObjWriter/objwriter.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/Win64EH.h" #include "llvm/Target/TargetMachine.h" +#include "..\..\..\lib\Target\AArch64\MCTargetDesc\AArch64MCExpr.h" using namespace llvm; using namespace llvm::codeview; @@ -309,7 +310,13 @@ void ObjectWriter::SetCodeSectionAttribute(const char *SectionName, } void ObjectWriter::EmitAlignment(int ByteAlignment) { - Streamer->EmitValueToAlignment(ByteAlignment, 0x90 /* Nop */); + int64_t fillValue = 0x90; //x86 nop + + if (TMachine->getTargetTriple().getArch() == llvm::Triple::ArchType::aarch64) { + fillValue = 0; // ARM64 bad + } + + Streamer->EmitValueToAlignment(ByteAlignment, fillValue); } void ObjectWriter::EmitBlob(int BlobSize, const char *Blob) { @@ -333,15 +340,28 @@ void ObjectWriter::EmitSymbolDef(const char *SymbolName, bool global) { Streamer->EmitSymbolAttribute(Sym, MCSA_Local); } + Triple TheTriple = TMachine->getTargetTriple(); + // A Thumb2 function symbol should be marked with an appropriate ELF // attribute to make later computation of a relocation address value correct - if (GetTriple().getArch() == Triple::thumb && - GetTriple().getObjectFormat() == Triple::ELF && + + if (TheTriple.getObjectFormat() == Triple::ELF && Streamer->getCurrentSectionOnly()->getKind().isText()) { - Streamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + switch (TheTriple.getArch()) { + case Triple::thumb: + case Triple::aarch64: + Streamer->EmitSymbolAttribute(Sym, MCSA_ELF_TypeFunction); + break; + + default: + break; + } } - Streamer->EmitLabel(Sym); + if (Sym->isUndefined()) + { + Streamer->EmitLabel(Sym); + } } const MCSymbolRefExpr * @@ -353,6 +373,8 @@ ObjectWriter::GetSymbolRefExpr(const char *SymbolName, return MCSymbolRefExpr::create(T, Kind, *OutContext); } + + unsigned ObjectWriter::GetDFSize() { return Streamer->getOrCreateDataFragment()->getContents().size(); } @@ -398,15 +420,16 @@ int ObjectWriter::EmitSymbolRef(const char *SymbolName, case RelocType::IMAGE_REL_BASED_DIR64: Size = 8; break; - case RelocType::IMAGE_REL_BASED_REL32: + case RelocType::IMAGE_REL_BASED_REL32: { Size = 4; - IsPCRel = true; + IsPCRel = true; if (ObjFileInfo->getObjectFileType() == ObjFileInfo->IsELF) { - // PLT is valid only for code symbols, - // but there shouldn't be references to global data symbols - Kind = MCSymbolRefExpr::VK_PLT; + // PLT is valid only for code symbols, + // but there shouldn't be references to global data symbols + Kind = MCSymbolRefExpr::VK_PLT; } break; + } case RelocType::IMAGE_REL_BASED_RELPTR32: Size = 4; IsPCRel = true; @@ -424,6 +447,25 @@ int ObjectWriter::EmitSymbolRef(const char *SymbolName, EmitRelocDirective(GetDFSize(), "R_ARM_THM_JUMP24", TargetExpr); return 4; } + case RelocType::IMAGE_REL_BASED_ARM64_BRANCH26: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + EmitRelocDirective(GetDFSize(), "R_AARCH64_JUMP26", TargetExpr); + return 4; + } + case RelocType::IMAGE_REL_BASED_ARM64_PAGEBASE_REL21: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + TargetExpr = + AArch64MCExpr::create(TargetExpr, AArch64MCExpr::VK_CALL, *OutContext); + EmitRelocDirective(GetDFSize(), "R_AARCH64_ADR_PREL_LO21", TargetExpr); + return 4; + } + case RelocType::IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A: { + const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta); + TargetExpr = + AArch64MCExpr::create(TargetExpr, AArch64MCExpr::VK_LO12, *OutContext); + EmitRelocDirective(GetDFSize(), "R_AARCH64_ADD_ABS_LO12_NC", TargetExpr); + return 4; + } } const MCExpr *TargetExpr = GenTargetExpr(SymbolName, Kind, Delta, IsPCRel, Size); @@ -510,6 +552,11 @@ void ObjectWriter::EmitCFICode(int Offset, const char *Blob) { "Unexpected Offset Value for OpDefCfaRegister"); Streamer->EmitCFIDefCfaRegister(CfiCode->DwarfReg); break; + case CFI_DEF_CFA: + assert(CfiCode->Offset != 0 && + "Unexpected Offset Value for OpDefCfa"); + Streamer->EmitCFIDefCfa(CfiCode->DwarfReg, CfiCode->Offset); + break; default: assert(false && "Unrecognized CFI"); break; diff --git a/src/Native/ObjWriter/objwriter.h b/src/Native/ObjWriter/objwriter.h index 925ae5068d1..6a41613a4e3 100644 --- a/src/Native/ObjWriter/objwriter.h +++ b/src/Native/ObjWriter/objwriter.h @@ -53,7 +53,10 @@ enum class RelocType { IMAGE_REL_BASED_DIR64 = 0x0A, IMAGE_REL_BASED_REL32 = 0x10, IMAGE_REL_BASED_THUMB_BRANCH24 = 0x13, + IMAGE_REL_BASED_ARM64_BRANCH26 = 0x15, IMAGE_REL_BASED_RELPTR32 = 0x7C, + IMAGE_REL_BASED_ARM64_PAGEBASE_REL21 = 0x81, + IMAGE_REL_BASED_ARM64_PAGEOFFSET_12A = 0x82, }; class ObjectWriter { diff --git a/src/Native/Runtime/GCHelpers.cpp b/src/Native/Runtime/GCHelpers.cpp index 53b90cd3913..66f04a1b0a2 100644 --- a/src/Native/Runtime/GCHelpers.cpp +++ b/src/Native/Runtime/GCHelpers.cpp @@ -214,14 +214,76 @@ COOP_PINVOKE_HELPER(Int64, RhGetAllocatedBytesForCurrentThread, ()) return currentAllocated; } -COOP_PINVOKE_HELPER(void, RhGetMemoryInfo, ( - UInt64* highMemLoadThresholdBytes, UInt64* totalAvailableMemoryBytes, - UInt64* lastRecordedMemLoadBytes, UInt32* lastRecordedMemLoadPct, - size_t* lastRecordedHeapSizeBytes, size_t* lastRecordedFragmentationBytes)) -{ - return GCHeapUtilities::GetGCHeap()->GetMemoryInfo(highMemLoadThresholdBytes, totalAvailableMemoryBytes, - lastRecordedMemLoadBytes, lastRecordedMemLoadPct, - lastRecordedHeapSizeBytes, lastRecordedFragmentationBytes); +struct RH_GC_GENERATION_INFO +{ + UInt64 sizeBefore; + UInt64 fragmentationBefore; + UInt64 sizeAfter; + UInt64 fragmentationAfter; +}; + +#if defined(TARGET_X86) && !defined(TARGET_UNIX) +#include "pshpack4.h" +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable:4121) // alignment of a member was sensitive to packing +#endif +#endif +struct RH_GH_MEMORY_INFO +{ +public: + UInt64 highMemLoadThresholdBytes; + UInt64 totalAvailableMemoryBytes; + UInt64 lastRecordedMemLoadBytes; + UInt64 lastRecordedHeapSizeBytes; + UInt64 lastRecordedFragmentationBytes; + UInt64 totalCommittedBytes; + UInt64 promotedBytes; + UInt64 pinnedObjectCount; + UInt64 finalizationPendingCount; + UInt64 index; + UInt32 generation; + UInt32 pauseTimePercent; + UInt8 isCompaction; + UInt8 isConcurrent; + RH_GC_GENERATION_INFO generationInfo0; + RH_GC_GENERATION_INFO generationInfo1; + RH_GC_GENERATION_INFO generationInfo2; + RH_GC_GENERATION_INFO generationInfo3; + RH_GC_GENERATION_INFO generationInfo4; + UInt64 pauseDuration0; + UInt64 pauseDuration1; +}; +#if defined(TARGET_X86) && !defined(TARGET_UNIX) +#ifdef _MSC_VER +#pragma warning(pop) +#endif +#include "poppack.h" +#endif + +COOP_PINVOKE_HELPER(void, RhGetMemoryInfo, (RH_GH_MEMORY_INFO* pData, int kind)) +{ + UInt64* genInfoRaw = (UInt64*)&(pData->generationInfo0); + UInt64* pauseInfoRaw = (UInt64*)&(pData->pauseDuration0); + + return GCHeapUtilities::GetGCHeap()->GetMemoryInfo( + &(pData->highMemLoadThresholdBytes), + &(pData->totalAvailableMemoryBytes), + &(pData->lastRecordedMemLoadBytes), + &(pData->lastRecordedHeapSizeBytes), + &(pData->lastRecordedFragmentationBytes), + &(pData->totalCommittedBytes), + &(pData->promotedBytes), + &(pData->pinnedObjectCount), + &(pData->finalizationPendingCount), + &(pData->index), + &(pData->generation), + &(pData->pauseTimePercent), + (bool*)&(pData->isCompaction), + (bool*)&(pData->isConcurrent), + genInfoRaw, + pauseInfoRaw, + kind); } COOP_PINVOKE_HELPER(Int64, RhGetTotalAllocatedBytes, ()) diff --git a/src/Native/Runtime/PalRedhawk.h b/src/Native/Runtime/PalRedhawk.h index 7789c05346b..27e1aff03a1 100644 --- a/src/Native/Runtime/PalRedhawk.h +++ b/src/Native/Runtime/PalRedhawk.h @@ -852,6 +852,27 @@ REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport(); REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled(); #endif // defined(HOST_X86) || defined(HOST_AMD64) +#if defined(HOST_ARM64) +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs +enum ARM64IntrinsicConstants +{ + ARM64IntrinsicConstants_ArmBase = 0x0001, + ARM64IntrinsicConstants_ArmBase_Arm64 = 0x0002, + ARM64IntrinsicConstants_AdvSimd = 0x0004, + ARM64IntrinsicConstants_AdvSimd_Arm64 = 0x0008, + ARM64IntrinsicConstants_Aes = 0x0010, + ARM64IntrinsicConstants_Crc32 = 0x0020, + ARM64IntrinsicConstants_Crc32_Arm64 = 0x0040, + ARM64IntrinsicConstants_Sha1 = 0x0080, + ARM64IntrinsicConstants_Sha256 = 0x0100, + ARM64IntrinsicConstants_Atomics = 0x0200, + ARM64IntrinsicConstants_Vector64 = 0x0400, + ARM64IntrinsicConstants_Vector128 = 0x0800 +}; + +REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags); +#endif //defined(HOST_ARM64) + #include "PalRedhawkInline.h" #endif // !PAL_REDHAWK_INCLUDED diff --git a/src/Native/Runtime/arm64/AllocFast.S b/src/Native/Runtime/arm64/AllocFast.S index 876f2dfbcb8..07e68455950 100644 --- a/src/Native/Runtime/arm64/AllocFast.S +++ b/src/Native/Runtime/arm64/AllocFast.S @@ -1,4 +1,293 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +// GC type flags +GC_ALLOC_FINALIZE = 1 +GC_ALLOC_ALIGN8_BIAS = 4 +GC_ALLOC_ALIGN8 = 8 + +// +// Rename fields of nested structs +// +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// x0 == EEType + LEAF_ENTRY RhpNewFast, _TEXT + + // x1 = GetThread() + INLINE_GETTHREAD x1 + + // + // x0 contains EEType pointer + // + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + // + // x0: EEType pointer + // x1: Thread pointer + // x2: base size + // + + // Load potential new object address into x12. + ldr x12, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x13, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x13 + bhi RhpNewFast_RarePath + + // Update the alloc pointer to account for the allocation. + str x2, [x1, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer + str x0, [x12, #OFFSETOF__Object__m_pEEType] + + mov x0, x12 + ret + +RhpNewFast_RarePath: + mov x1, #0 + b RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// x0 == EEType + LEAF_ENTRY RhpNewFinalizable, _TEXT + mov x1, #GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// x0 == EEType +// x1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME x3 + + // x3: transition frame + + // Preserve the EEType in x19 + mov x19, x0 + + ldr w2, [x0, #OFFSETOF__EEType__m_uBaseSize] + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + // Set the new objects EEType pointer on success. + cbz x0, NewOutOfMemory + str x19, [x0, #OFFSETOF__Object__m_pEEType] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + ldr w1, [x19, #OFFSETOF__EEType__m_uBaseSize] + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x1, x2 + blo New_SkipPublish + + // x0: object + // x1: already contains object size + bl RhpPublishObject // x0: this function returns the object that was passed-in + +New_SkipPublish: + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +NewOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov x0, x19 // EEType pointer + mov x1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// x0 == EEType +// x1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size wont overflow + // TODO: this should be actually MAX_STRING_LENGTH + mov x2, 0x7FFFFFFF + cmp x1, x2 + bhi StringSizeOverflow + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + mov w2, #STRING_COMPONENT_SIZE + mov x3, #(STRING_BASE_SIZE + 7) + umaddl x2, w1, w2, x3 // x2 = w1 * w2 + x3 + and x2, x2, #-8 + + // x0 == EEType + // x1 == element count + // x2 == string size + + INLINE_GETTHREAD x3 + + // Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + // Reload new object address into r12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in x0. + mov x0, x12 + + ret + +StringSizeOverflow: + // We get here if the length of the final string object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // x0 holds EEType pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhNewString, _Text + +// Allocate one dimensional, zero based array (SZARRAY). +// x0 == EEType +// x1 == element count + LEAF_ENTRY RhpNewArray, _Text + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + mov x2, #0x7FFFFFFF + cmp x1, x2 + bhi ArraySizeOverflow + + ldrh w2, [x0, #OFFSETOF__EEType__m_usComponentSize] + umull x2, w1, w2 + ldr w3, [x0, #OFFSETOF__EEType__m_uBaseSize] + add x2, x2, x3 + add x2, x2, #7 + and x2, x2, #-8 + + // x0 == EEType + // x1 == element count + // x2 == array size + + INLINE_GETTHREAD x3 + + // Load potential new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add x2, x2, x12 + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_limit] + cmp x2, x12 + bhi RhpNewArrayRare + + // Reload new object address into x12. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Update the alloc pointer to account for the allocation. + str x2, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + + // Set the new objects EEType pointer and element count. + str x0, [x12, #OFFSETOF__Object__m_pEEType] + str x1, [x12, #OFFSETOF__Array__m_Length] + + // Return the object allocated in r0. + mov x0, x12 + + ret + +ArraySizeOverflow: + // We get here if the size of the final array object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // x0 holds EEType pointer already + mov x1, #1 // Indicate that we should throw OverflowException + b RhExceptionHandling_FailedAllocation + LEAF_END RhpNewArray, _TEXT + +// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +// x0 == EEType +// x1 == element count +// x2 == array size + Thread::m_alloc_context::alloc_ptr +// x3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from x2. + ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr] + sub x2, x2, x12 + + PUSH_COOP_PINVOKE_FRAME x3 + + // Preserve data we will need later into the callee saved registers + mov x19, x0 // Preserve EEType + mov x20, x1 // Preserve element count + mov x21, x2 // Preserve array size + + mov x1, #0 + + // void* RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame) + bl RhpGcAlloc + + // Set the new objects EEType pointer and length on success. + cbz x0, ArrayOutOfMemory + + // Success, set the array type and element count in the new object. + str x19, [x0, #OFFSETOF__Object__m_pEEType] + str x20, [x0, #OFFSETOF__Array__m_Length] + + // If the object is bigger than RH_LARGE_OBJECT_SIZE, we must publish it to the BGC + movk x2, #(RH_LARGE_OBJECT_SIZE & 0xFFFF) + movk x2, #(RH_LARGE_OBJECT_SIZE >> 16), lsl #16 + cmp x21, x2 + blo NewArray_SkipPublish + + // x0 = newly allocated array. x1 = size + mov x1, x21 + bl RhpPublishObject + +NewArray_SkipPublish: + + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + +ArrayOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + mov x0, x19 // EEType Pointer + mov x1, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b RhExceptionHandling_FailedAllocation + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/Native/Runtime/arm64/CallDescrWorker.S b/src/Native/Runtime/arm64/CallDescrWorker.S index 876f2dfbcb8..24f9fdf2aab 100644 --- a/src/Native/Runtime/arm64/CallDescrWorker.S +++ b/src/Native/Runtime/arm64/CallDescrWorker.S @@ -1,4 +1,140 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +//----------------------------------------------------------------------------- +// This helper routine enregisters the appropriate arguments and makes the +// actual call. +// +// INPUT: x0: pointer to CallDescrData struct +// +//----------------------------------------------------------------------------- +//void RhCallDescrWorker(CallDescrData * pCallDescrData); + NESTED_ENTRY RhCallDescrWorker, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 + PROLOG_SAVE_REG_PAIR x19, x20, #16 + + // Save the value of SP before we start pushing any arguments + mov x20, sp + + mov x19, x0 // save pCallDescrData in x19 + + ldr w1, [x19, #OFFSETOF__CallDescrData__numStackSlots] + cbz w1, Ldonestack + + // Add frame padding to ensure frame size is a multiple of 16 (a requirement of the OS ABI). + // We push two registers (above) and numStackSlots arguments (below). If this comes to an odd number + // of slots we must pad with another. This simplifies to "if the low bit of numStackSlots is set, + // extend the stack another eight bytes". + ldr x0, [x19, #OFFSETOF__CallDescrData__pSrc] + add x0, x0, x1, lsl #3 // pSrcEnd=pSrc+8*numStackSlots + ands x2, x1, #1 + beq Lstackloop + + // This loop copies numStackSlots words + // from [pSrcEnd-8,pSrcEnd-16,...] to [sp-8,sp-16,...] + + // Pad and store one stack slot as number of slots are odd + ldr x4, [x0,#-8]! + str x4, [sp,#-16]! + subs x1, x1, #1 + beq Ldonestack +Lstackloop: + ldp x2, x4, [x0,#-16]! + stp x2, x4, [sp,#-16]! + subs x1, x1, #2 + bne Lstackloop +Ldonestack: + + // If FP arguments are supplied in registers (x9 != NULL) then initialize all of them from the pointer + // given in x9. + ldr x9, [x19, #OFFSETOF__CallDescrData__pFloatArgumentRegisters] + cbz x9, LNoFloatingPoint + ldp d0, d1, [x9] + ldp d2, d3, [x9, #16] + ldp d4, d5, [x9, #32] + ldp d6, d7, [x9, #48] +LNoFloatingPoint: + + // Copy [pArgumentRegisters, ..., pArgumentRegisters + 64] + // into x0, ..., x7, x8 + + ldr x9, [x19, #OFFSETOF__CallDescrData__pArgumentRegisters] + ldp x0, x1, [x9] + ldp x2, x3, [x9, #16] + ldp x4, x5, [x9, #32] + ldp x6, x7, [x9, #48] + ldr x8, [x9, #64] + + // call pTarget + ldr x9, [x19, #OFFSETOF__CallDescrData__pTarget] + blr x9 + + EXPORT_POINTER_TO_ADDRESS PointerToReturnFromCallDescrThunk + + // Symbol used to identify thunk call to managed function so the special + // case unwinder can unwind through this function. Sadly we cannot directly + // export this symbol right now because it confuses DIA unwinder to believe + // it's the beginning of a new method, therefore we export the address + // of an auxiliary variable holding the address instead. + + ldr w3, [x19, #OFFSETOF__CallDescrData__fpReturnSize] + + // Unlike desktop returnValue is a pointer to a return buffer, not the buffer itself + ldr x19, [x19, #OFFSETOF__CallDescrData__pReturnBuffer] + + // Int return case + cbz w3, LIntReturn + + // Float return case + cmp w3, #4 + beq LFloatOrDoubleReturn + + // Double return case + cmp w3, #8 + bne LCheckHFAReturn + +LFloatOrDoubleReturn: + str d0, [x19] + b LReturnDone + +LCheckHFAReturn: + cmp w3, #16 + beq LFloatOrDoubleHFAReturn + cmp w3, #32 + beq LFloatOrDoubleHFAReturn + b LNoHFAReturn + +LFloatOrDoubleHFAReturn: + //Single/Double HFAReturn return case + stp d0, d1, [x19, #00] + stp d2, d3, [x19, #16] + b LReturnDone + +LNoHFAReturn: + + EMIT_BREAKPOINT // Unreachable + +LIntReturn: + // Save return value(s) into retbuf for int + stp x0, x1, [x19] + +LReturnDone: + +#ifdef _DEBUG + // Trash the floating point registers to ensure that the HFA return values + // won't survive by accident + ldp d0, d1, [sp] + ldp d2, d3, [sp, #16] +#endif + // Restore the value of SP + mov sp, x20 + + EPILOG_RESTORE_REG_PAIR x19, x20, #16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32 + EPILOG_RETURN + + NESTED_END RhCallDescrWorker diff --git a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S index 876f2dfbcb8..de6ff7319ed 100644 --- a/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S +++ b/src/Native/Runtime/arm64/CallingConventionConverterHelpers.S @@ -1,4 +1,61 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; CallingConventionCoverter Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +// Note: The "__jmpstub__" prefix is used to indicate to debugger +// that it must step-through this stub when it encounters it while +// stepping. + + + // + // void CallingConventionConverter_ReturnThunk() + // + LEAF_ENTRY CallingConventionConverter_ReturnThunk, _TEXT + ret + LEAF_END CallingConventionConverter_ReturnThunk, _TEXT + + // + // __jmpstub__CallingConventionConverter_CommonCallingStub + // + // struct CallingConventionConverter_CommonCallingStub_PointerData + // { + // void *ManagedCallConverterThunk; + // void *UniversalThunk; + // } + // + // struct CommonCallingStubInputData + // { + // ULONG_PTR CallingConventionId; + // CallingConventionConverter_CommonCallingStub_PointerData *commonData; // Only the ManagedCallConverterThunk field is used + // // However, it is specified just like other platforms, so the behavior of the common + // // calling stub is easier to debug + // } + // + // xip0 - Points at CommonCallingStubInputData + // + // + LEAF_ENTRY __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + ldr xip1, [xip0] // put CallingConventionId into xip1 as "parameter" to universal transition thunk + ldr xip0, [xip0, #POINTER_SIZE] // get pointer to CallingConventionConverter_CommonCallingStub_PointerData into xip0 + ldr x12, [xip0, #POINTER_SIZE] // get address of UniversalTransitionThunk (which we'll tailcall to later) + ldr xip0, [xip0] // get address of ManagedCallConverterThunk (target for universal thunk to call) + br x12 + LEAF_END __jmpstub__CallingConventionConverter_CommonCallingStub, _TEXT + + // + // void CallingConventionConverter_GetStubs(IntPtr *returnVoidStub, IntPtr *returnIntegerStub, IntPtr *commonCallingStub) + // + LEAF_ENTRY CallingConventionConverter_GetStubs, _TEXT + ldr x12, =CallingConventionConverter_ReturnThunk + str x12, [x0] // ARM doesn't need different return thunks. + str x12, [x1] + ldr x12, =__jmpstub__CallingConventionConverter_CommonCallingStub + str x12, [x2] + ret + LEAF_END CallingConventionConverter_GetStubs, _TEXT diff --git a/src/Native/Runtime/arm64/ExceptionHandling.S b/src/Native/Runtime/arm64/ExceptionHandling.S index 876f2dfbcb8..1dc4c83cbc0 100644 --- a/src/Native/Runtime/arm64/ExceptionHandling.S +++ b/src/Native/Runtime/arm64/ExceptionHandling.S @@ -1,4 +1,617 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + .macro ALLOC_THROW_FRAME exceptionType + + mov x3, sp + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + sub sp,sp,#0x50 + stp x3, x1, [sp] // x3 is the SP and x1 is the IP of the fault site + // TODO PROLOG_PUSH_MACHINE_FRAME + .else + PROLOG_STACK_ALLOC 0x50 + stp x3, lr, [sp] // x3 is the SP and lr is the IP of the fault site + .endif + stp d8, d9, [sp, #0x10] + stp d10, d11, [sp, #0x20] + stp d12, d13, [sp, #0x30] + stp d14, d15, [sp, #0x40] + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x70 + stp xzr, xzr, [sp, #0x10] // locations reserved for return value, not used for exception handling + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + .endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp,lr, #-0x60 + PROLOG_SAVE_REG_PAIR x19, x20, #0x10 + PROLOG_SAVE_REG_PAIR x21, x22, #0x20 + PROLOG_SAVE_REG_PAIR x23, x24, #0x30 + PROLOG_SAVE_REG_PAIR x25, x26, #0x40 + PROLOG_SAVE_REG_PAIR x27, x28, #0x50 + mov fp, sp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// ----------------------------------------------------------------------------- +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. + .macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x10 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x20 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x30 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x40 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x50 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x60 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + ldr x19, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + ldr x20, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + ldr x21, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + ldr x22, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + ldr x23, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + ldr x24, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + ldr x25, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + ldr x26, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + ldr x27, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + ldr x28, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + // + // load FP preserved regs + // + add x12, \regdisplayReg, #OFFSETOF__REGDISPLAY__D + ldp d8, d9, [x12, #0x00] + ldp d10, d11, [x12, #0x10] + ldp d12, d13, [x12, #0x20] + ldp d14, d15, [x12, #0x30] + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro SAVE_PRESERVED_REGISTERS regdisplayReg + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x19, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x20, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x21, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x22, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x23, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x24, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x25, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x26, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x27, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x28, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str fp, [x12] + // + // store vfp preserved regs + // + add x12, \regdisplayReg, #OFFSETOF__REGDISPLAY__D + stp d8, d9, [x12, #0x00] + stp d10, d11, [x12, #0x10] + stp d12, d13, [x12, #0x20] + stp d14, d15, [x12, #0x30] + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if 0 // def _DEBUG // @TODO: temporarily removed because trashing the frame pointer breaks the debugger + movz x3, #0xbaad, LSL #48 + movk x3, #0xdeed, LSL #32 + movk x3, #0xbaad, LSL #16 + movk x3, #0xdeed + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX19] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX20] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX21] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX22] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX23] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX24] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX25] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX26] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX27] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pX28] + str x3, [x12] + ldr x12, [\regdisplayReg, #OFFSETOF__REGDISPLAY__pFP] + str x3, [x12] +#endif // _DEBUG + .endm + + + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: W0: exception code of fault +// X1: faulting IP +// +// OUTPUT: +// + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #2 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // w0: exception code + // x1: ExInfo* + bl RhThrowHwEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: X0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved LR, which may not match where we have saved LR. + + ldr x1, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz x1, NotHijacked + + ldr x3, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + + // x0: exception object + // x1: hijacked return address + // x2: pThread + // x3: hijacked return address location + + add x12, sp, #(STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite + cmp x3, x12 // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + blo TailCallWasHijacked + + // normal case where a valid return address location is hijacked + str x1, [x3] + b ClearThreadState + +TailCallWasHijacked: + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // stick the previous return address in LR as well as in the right spots in our PAL_LIMITED_CONTEXT. + mov lr, x1 + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__LR)] + str lr, [sp, #(rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP)] + +ClearThreadState: + + // clear the Thread's hijack state + str xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + +NotHijacked: + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_kind] // pExInfo->m_kind = ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // x0: exception object + // x1: ExInfo* + bl RhThrowEx + + EXPORT_POINTER_TO_ADDRESS PointerToRhpThrowEx2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx, _TEXT + + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + // x2 = GetThread() + INLINE_GETTHREAD x2 + + add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo* + str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null + strb wzr, [x1, #OFFSETOF__ExInfo__m_kind] // init to a deterministic value (ExKind.None) + mov w3, #1 + strb w3, [x1, #OFFSETOF__ExInfo__m_passNumber] // pExInfo->m_passNumber = 1 + mov w3, #0xFFFFFFFF + str w3, [x1, #OFFSETOF__ExInfo__m_idxCurClause] // pExInfo->m_idxCurClause = MaxTryRegionIdx + + // link the ExInfo into the thread's ExInfo chain + ldr x3, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] + mov x0, x3 // x0 <- current ExInfo + str x3, [x1, #OFFSETOF__ExInfo__m_pPrevExInfo] // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + str x1, [x2, #OFFSETOF__Thread__m_pExInfoStackHead] // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + add x2, sp, #rsp_offsetof_Context // x2 <- PAL_LIMITED_CONTEXT* + str x2, [x1, #OFFSETOF__ExInfo__m_pExContext] // pExInfo->m_pExContext = pContext + + // x0 contains the currently active ExInfo + // x1 contains the address of the new ExInfo + bl RhRethrow + + EXPORT_POINTER_TO_ADDRESS PointerToRhpRethrow2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(RtuObjectRef exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: X0: exception object +// X1: handler funclet address +// X2: REGDISPLAY* +// X3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x60 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x2, [sp, #0x40] // x0, x2 & x3 are saved so we have the exception object, REGDISPLAY and + stp x3, xzr, [sp, #0x50] // ExInfo later, xzr makes space for the local "is_not_handling_thread_abort" + +#define rsp_offset_is_not_handling_thread_abort 0x58 +#define rsp_offset_x2 0x48 +#define rsp_offset_x3 0x50 + + // + // clear the DoNotTriggerGc flag, trashes x4-x6 + // + INLINE_GETTHREAD x5 // x5 <- Thread* + + ldr x4, [x5, #OFFSETOF__Thread__m_threadAbortException] + sub x4, x4, x0 + str x4, [sp, #rsp_offset_is_not_handling_thread_abort] // Non-zero if the exception is not ThreadAbortException + + add x12, x5, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry_Catch: + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w6, w4, [x12] + cbz w6, ClearSuccess_Catch + b ClearRetry_Catch +ClearSuccess_Catch: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS x2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE x2 + + // + // call the funclet + // + // x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallCatchFunclet2 + + // x0 contains resume IP + + ldr x2, [sp, #rsp_offset_x2] // x2 <- REGDISPLAY* + +// @TODO: add debug-only validation code for ExInfo pop + + INLINE_GETTHREAD x1 // x1 <- Thread* + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK x1, x3, x12 // Thread in x1, trashes x3 and x12 + + ldr x3, [sp, #rsp_offset_x3] // x3 <- current ExInfo* + ldr x2, [x2, #OFFSETOF__REGDISPLAY__SP] // x2 <- resume SP value + +PopExInfoLoop: + ldr x3, [x3, #OFFSETOF__ExInfo__m_pPrevExInfo] // x3 <- next ExInfo + cbz x3, DonePopping // if (pExInfo == null) { we're done } + cmp x3, x2 + blt PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +DonePopping: + str x3, [x1, #OFFSETOF__Thread__m_pExInfoStackHead] // store the new head on the Thread + + adrp x3, RhpTrapThreads + add x3, x3, :lo12:RhpTrapThreads + ldr w3, [x3] + tbz x3, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + + ldr x3, [sp, #rsp_offset_is_not_handling_thread_abort] + cbnz x3, NoAbort + + // It was the ThreadAbortException, so rethrow it + // reset SP + mov x1, x0 // x1 <- continuation address as exception PC + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov sp, x2 + b RhpThrowHwEx + +NoAbort: + // reset SP and jump to continuation address + mov sp, x2 + br x0 + + NESTED_END RhpCallCatchFunclet, _Text + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: X0: handler funclet address +// X1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x50 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + stp x0, x1, [sp, #0x40] // x1 is saved so we have the REGDISPLAY later, x0 is just alignment padding + +#define rsp_offset_x1 0x48 + + + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + // + // clear the DoNotTriggerGc flag, trashes x2-x4 + // + INLINE_GETTHREAD x2 // x2 <- Thread* + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry: + ldxr w4, [x12] + bic w4, w4, #TSF_DoNotTriggerGc + stxr w3, w4, [x12] + cbz w3, ClearSuccess + b ClearRetry +ClearSuccess: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS x1 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE x1 + + // + // call the funclet + // + blr x0 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFinallyFunclet2 + + ldr x1, [sp, #rsp_offset_x1] // reload REGDISPLAY pointer + + // + // save new values of preserved regs into REGDISPLAY + // + SAVE_PRESERVED_REGISTERS x1 + + // + // set the DoNotTriggerGc flag, trashes x1-x3 + // + INLINE_GETTHREAD x2 // x2 <- Thread* + + add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags +SetRetry: + ldxr w1, [x12] + orr w1, w1, #TSF_DoNotTriggerGc + stxr w3, w1, [x12] + cbz w3, SetSuccess + b SetRetry +SetSuccess: + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x50 + EPILOG_RETURN + + NESTED_END RhpCallFinallyFunclet, _Text + + +// +// void* FASTCALL RhpCallFilterFunclet(RtuObjectRef exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: X0: exception object +// X1: filter funclet address +// X2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x40 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + + ldr x12, [x2, #OFFSETOF__REGDISPLAY__pFP] + ldr fp, [x12] + + // + // call the funclet + // + // x0 still contains the exception object + blr x1 + + EXPORT_POINTER_TO_ADDRESS PointerToRhpCallFilterFunclet2 + + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, Text diff --git a/src/Native/Runtime/arm64/InteropThunksHelpers.S b/src/Native/Runtime/arm64/InteropThunksHelpers.S index 876f2dfbcb8..8c6e4198bd7 100644 --- a/src/Native/Runtime/arm64/InteropThunksHelpers.S +++ b/src/Native/Runtime/arm64/InteropThunksHelpers.S @@ -1,4 +1,60 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: xip0: thunk's data block + // + // TRASHES: x9, x10, xip0 + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + + INLINE_GET_TLS_VAR x9, tls_thunkData + + // x9 = base address of TLS data + // xip0 = address of context cell in thunk's data + + // store thunk address in thread static + ldr x10, [xip0] + str x10, [x9] + + // Now load the target address and jump to it. + ldr xip0, [xip0, #POINTER_SIZE] + br xip0 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + adrp x0, RhCommonStub + add x0, x0, :lo12:RhCommonStub + ret + LEAF_END RhGetCommonStubAddress, _TEXT + + + // + // IntPtr RhGetCurrentThunkContext() + // + LEAF_ENTRY RhGetCurrentThunkContext, _TEXT + + INLINE_GET_TLS_VAR x0, tls_thunkData + + ldr x0, [x0] + + ret + + LEAF_END RhGetCurrentThunkContext, _TEXT diff --git a/src/Native/Runtime/arm64/MiscStubs.S b/src/Native/Runtime/arm64/MiscStubs.S index 53616c22696..a4130dc1af6 100644 --- a/src/Native/Runtime/arm64/MiscStubs.S +++ b/src/Native/Runtime/arm64/MiscStubs.S @@ -1,2 +1,241 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + + .global memcpy + .global memcpyGCRefs + .global memcpyGCRefsWithWriteBarrier + .global memcpyAnyWithWriteBarrier + .global GetClasslibCCtorCheck + +// +// Checks whether the static class constructor for the type indicated by the context structure has been +// executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +// execute the cctor and update the context to record this fact. +// +// Input: +// x0 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers and the condition codes may be trashed. +// + LEAF_ENTRY RhpCheckCctor, _TEXT + + // Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + // initial state is 0 and the remaining values are reserved for classlib use). This check is + // unsynchronized; if we go down the slow path and call the classlib then it is responsible for + // synchronizing with other threads and re-checking the value. + ldr w12, [x0, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor__SlowPath + ret +RhpCheckCctor__SlowPath: + mov x1, x0 + b RhpCheckCctor2 // tail-call the check cctor helper that actually has an implementation to call + // the cctor + + LEAF_END RhpCheckCctor, _TEXT + +// +// Checks whether the static class constructor for the type indicated by the context structure has been +// executed yet. If not the classlib is called via their CheckStaticClassConstruction callback which will +// execute the cctor and update the context to record this fact. +// +// Input: +// x0 : Value that must be preserved in this register across the cctor check. +// x1 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +// + LEAF_ENTRY RhpCheckCctor2, _TEXT + + // Check the m_initialized field of the context. The cctor has been run only if this equals 1 (the + // initial state is 0 and the remaining values are reserved for classlib use). This check is + // unsynchronized; if we go down the slow path and call the classlib then it is responsible for + // synchronizing with other threads and re-checking the value. + ldr w12, [x1, #OFFSETOF__StaticClassConstructionContext__m_initialized] + cmp w12, #1 + bne RhpCheckCctor2__SlowPath + ret + + LEAF_END RhpCheckCctor2 + +// +// Slow path helper for RhpCheckCctor. +// +// Input: +// x0 : Value that must be preserved in this register across the cctor check. +// x1 : Address of StaticClassConstructionContext structure +// +// Output: +// All volatile registers other than x0 may be trashed and the condition codes may also be trashed. +// + NESTED_ENTRY RhpCheckCctor2__SlowPath, _TEXT, NoHandler + + // Need to preserve x0, x1 and lr across helper call. fp is also pushed to keep the stack 16 byte aligned. + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x20 + stp x0, x1, [sp, #0x10] + + // Call a C++ helper to retrieve the address of the classlib callback. The caller's return address is + // passed as the argument to the helper; it's an address in the module and is used by the helper to + // locate the classlib. + mov x0, lr + bl GetClasslibCCtorCheck + + // X0 now contains the address of the classlib method to call. The single argument is the context + // structure address currently in stashed on the stack. Clean up and tail call to the classlib + // callback so we're not on the stack should a GC occur (so we don't need to worry about transition + // frames). + mov x12, x0 + ldp x0, x1, [sp, #0x10] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x20 + // tail-call the class lib cctor check function. This function is required to return its first + // argument, so that x0 can be preserved. + br x12 + + NESTED_END RhpCheckCctor__SlowPath2, _TEXT + + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyteNoGCRefs(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// + + LEAF_ENTRY RhpCopyMultibyteNoGCRefs, _TEXT + + // x0 dest + // x1 src + // x2 count + + cbz x2, NothingToCopy_NoGCRefs // check for a zero-length copy + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteNoGCRefsSrcAVLocation + ldrb wzr, [x1] + + // tail-call to plain-old-memcpy + b memcpy + +NothingToCopy_NoGCRefs: + // dest is already in x0 + ret + + LEAF_END RhpCopyMultibyteNoGCRefs, _TEXT + + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyte(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// + + LEAF_ENTRY RhpCopyMultibyte, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyte + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyGCRefs + +NothingToCopy_RhpCopyMultibyte: + // dest is already still in x0 + ret + + LEAF_END RhpCopyMultibyte, _TEXT + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyMultibyteWithWriteBarrier(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// Runs a card table update via RhpBulkWriteBarrier after the copy +// + + LEAF_ENTRY RhpCopyMultibyteWithWriteBarrier, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyMultibyteWithWriteBarrier + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyMultibyteWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyGCRefsWithWriteBarrier + +NothingToCopy_RhpCopyMultibyteWithWriteBarrier: + // dest is already still in x0 + ret + LEAF_END RhpCopyMultibyteWithWriteBarrier, _TEXT + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +// +// void* RhpCopyAnyWithWriteBarrier(void*, void*, size_t) +// +// The purpose of this wrapper is to hoist the potential null reference exceptions of copying memory up to a place where +// the stack unwinder and exception dispatch can properly transform the exception into a managed exception and dispatch +// it to managed code. +// Runs a card table update via RhpBulkWriteBarrier after the copy if it contained GC pointers +// + + LEAF_ENTRY RhpCopyAnyWithWriteBarrier, _TEXT + + // x0 dest + // x1 src + // x2 count + + // check for a zero-length copy + cbz x2, NothingToCopy_RhpCopyAnyWithWriteBarrier + + // Now check the dest and src pointers. If they AV, the EH subsystem will recognize the address of the AV, + // unwind the frame, and fixup the stack to make it look like the (managed) caller AV'ed, which will be + // translated to a managed exception as usual. + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierDestAVLocation + ldrb wzr, [x0] + ALTERNATE_ENTRY RhpCopyAnyWithWriteBarrierSrcAVLocation + ldrb wzr, [x1] + + // tail-call to the GC-safe memcpy implementation + b memcpyAnyWithWriteBarrier + +NothingToCopy_RhpCopyAnyWithWriteBarrier: + // dest is already still in x0 + ret + + LEAF_END RhpCopyAnyWithWriteBarrier, _TEXT diff --git a/src/Native/Runtime/arm64/PInvoke.S b/src/Native/Runtime/arm64/PInvoke.S index 876f2dfbcb8..508127601fb 100644 --- a/src/Native/Runtime/arm64/PInvoke.S +++ b/src/Native/Runtime/arm64/PInvoke.S @@ -1,4 +1,355 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_X19 = 0x00000001 +PTFF_SAVE_X20 = 0x00000002 +PTFF_SAVE_X21 = 0x00000004 +PTFF_SAVE_X22 = 0x00000008 +PTFF_SAVE_X23 = 0x00000010 +PTFF_SAVE_X24 = 0x00000020 +PTFF_SAVE_X25 = 0x00000040 +PTFF_SAVE_X26 = 0x00000080 +PTFF_SAVE_X27 = 0x00000100 +PTFF_SAVE_X28 = 0x00000200 +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 +PTFF_SAVE_X0 = 0x00000800 +PTFF_SAVE_X1 = 0x00001000 +PTFF_SAVE_X2 = 0x00002000 +PTFF_SAVE_X3 = 0x00004000 +PTFF_SAVE_X4 = 0x00008000 +PTFF_SAVE_X5 = 0x00010000 +PTFF_SAVE_X6 = 0x00020000 +PTFF_SAVE_X7 = 0x00040000 +PTFF_SAVE_X8 = 0x00080000 +PTFF_SAVE_X9 = 0x00100000 +PTFF_SAVE_X10 = 0x00200000 +PTFF_SAVE_X11 = 0x00400000 +PTFF_SAVE_X12 = 0x00800000 +PTFF_SAVE_X13 = 0x01000000 +PTFF_SAVE_X14 = 0x02000000 +PTFF_SAVE_X15 = 0x04000000 +PTFF_SAVE_X16 = 0x08000000 +PTFF_SAVE_X17 = 0x10000000 +PTFF_SAVE_X18 = 0x20000000 +PTFF_SAVE_ALL_SCRATCH = 0x3FFFF800 // NOTE: X0-X18 +PTFF_SAVE_FP = 0x40000000 +PTFF_SAVE_LR = 0x80000000 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +PTFF_THREAD_ABORT_BIT = 36 + +// Bit position for the flags above, to be used with tbz/tbnz instructions +TSF_Attached_Bit = 0 +TSF_SuppressGcStress_Bit = 3 +TSF_DoNotTriggerGc_Bit = 4 + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForSuspend -- rare path for RhpPInvoke and RhpReversePInvokeReturn +// +// +// INPUT: none +// +// TRASHES: none +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForSuspend, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0xA0 // Push down stack pointer and store FP and LR + + // Need to save argument registers x0-x7 and the return buffer register x8 + // Also save x9 which may be used for saving indirect call target + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x9, [sp, #0x50] + + // Save float argument registers as well since they are volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + bl RhpWaitForSuspend2 + + // Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + // Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldp x8, x9, [sp, #0x50] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0xA0 + EPILOG_RETURN + + NESTED_END RhpWaitForSuspend, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGCNoAbort +// +// +// INPUT: x9: transition frame +// +// TRASHES: None +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x40 // Push down stack pointer and store FP and LR + + // Save the integer return registers, as well as the floating return registers + stp x0, x1, [sp, #0x10] + stp d0, d1, [sp, #0x20] + stp d2, d3, [sp, #0x30] + + ldr x0, [x9, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + ldr w0, [x0, #OFFSETOF__Thread__m_ThreadStateFlags] + tbnz x0, #TSF_DoNotTriggerGc_Bit, Done + + mov x0, x9 // passing transition frame in x0 + bl RhpWaitForGC2 + +Done: + ldp x0, x1, [sp, #0x10] + ldp d0, d1, [sp, #0x20] + ldp d2, d3, [sp, #0x30] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x40 + EPILOG_RETURN + + NESTED_END RhpWaitForGCNoAbort + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGC +// +// +// INPUT: x9: transition frame +// +// TRASHES: x0, x1, x10 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x10 + + adrp x10, RhpTrapThreads + add x10, x10, :lo12:RhpTrapThreads + ldr w10, [x10] + tbz x10, #TrapThreadsFlags_TrapThreads_Bit, NoWait + bl RhpWaitForGCNoAbort +NoWait: + tbz x10, #TrapThreadsFlags_AbortInProgress_Bit, NoAbort + ldr x10, [x9, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + tbz x10, #PTFF_THREAD_ABORT_BIT, NoAbort + + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x10 + mov w0, #STATUS_REDHAWK_THREAD_ABORT + mov x1, lr // hijack target address as exception PC + b RhpThrowHwEx + +NoAbort: + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x10 + EPILOG_RETURN + + NESTED_END RhpWaitForGC, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpReversePInvoke +// +// IN: x9: address of reverse pinvoke frame +// 0: save slot for previous M->U transition frame +// 8: save slot for thread pointer to avoid re-calc in epilog sequence +// +// PRESERVES: x0 - x8 -- need to preserve these because the caller assumes they are not trashed +// +// TRASHES: x10, x11 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + LEAF_ENTRY RhpReversePInvoke, _TEXT + + INLINE_GETTHREAD x10 // x10 = Thread + str x10, [x9, #8] // save Thread pointer for RhpReversePInvokeReturn + + // x9 = reverse pinvoke frame + // x10 = thread + // x11 = scratch + + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_Attached_Bit, AttachThread + +ThreadAttached: + // + // Check for the correct mode. This is accessible via various odd things that we cannot completely + // prevent such as : + // 1) Registering a reverse pinvoke entrypoint as a vectored exception handler + // 2) Performing a managed delegate invoke on a reverse pinvoke delegate. + // + ldr x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + cbz x11, CheckBadTransition + + // Save previous TransitionFrame prior to making the mode transition so that it is always valid + // whenever we might attempt to hijack this thread. + str x11, [x9] + + str xzr, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + + adrp x11, RhpTrapThreads + add x11, x11, :lo12:RhpTrapThreads + ldr w11, [x11] + tbnz x11, #TrapThreadsFlags_TrapThreads_Bit, TrapThread + + ret + +CheckBadTransition: + // Allow 'bad transitions' in when the TSF_DoNotTriggerGc mode is set. This allows us to have + // [NativeCallable] methods that are called via the "restricted GC callouts" as well as from native, + // which is necessary because the methods are CCW vtable methods on interfaces passed to native. + ldr w11, [x10, #OFFSETOF__Thread__m_ThreadStateFlags] + tbz x11, #TSF_DoNotTriggerGc_Bit, BadTransition + + // zero-out our 'previous transition frame' save slot + mov x11, #0 + str x11, [x9] + + // nothing more to do + ret + +TrapThread: + // put the previous frame back (sets us back to preemptive mode) + ldr x11, [x9] + str x11, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + dmb ish + +AttachThread: + // passing address of reverse pinvoke frame in x9 + b RhpReversePInvokeAttachOrTrapThread + +BadTransition: + mov x0, lr // arg <- return address + b RhpReversePInvokeBadTransition + + LEAF_END RhpReversePInvoke, _TEXT + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpReversePInvokeAttachOrTrapThread -- rare path for RhpPInvoke +// +// +// INPUT: x9: address of reverse pinvoke frame +// +// PRESERVES: x0-x8 -- need to preserve these because the caller assumes they are not trashed +// +// TRASHES: none +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// + NESTED_ENTRY RhpReversePInvokeAttachOrTrapThread, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0xA0 // Push down stack pointer and store FP and LR + + // Need to save argument registers x0-x7 and the return buffer register x8 (twice for 16B alignment) + stp x0, x1, [sp, #0x10] + stp x2, x3, [sp, #0x20] + stp x4, x5, [sp, #0x30] + stp x6, x7, [sp, #0x40] + stp x8, x8, [sp, #0x50] + + // Save float argument registers as well since they are volatile + stp d0, d1, [sp, #0x60] + stp d2, d3, [sp, #0x70] + stp d4, d5, [sp, #0x80] + stp d6, d7, [sp, #0x90] + + mov x0, x9 // passing reverse pinvoke frame pointer in x0 + bl RhpReversePInvokeAttachOrTrapThread2 + + // Restore floating point registers + ldp d0, d1, [sp, #0x60] + ldp d2, d3, [sp, #0x70] + ldp d4, d5, [sp, #0x80] + ldp d6, d7, [sp, #0x90] + + // Restore the argument registers + ldp x0, x1, [sp, #0x10] + ldp x2, x3, [sp, #0x20] + ldp x4, x5, [sp, #0x30] + ldp x6, x7, [sp, #0x40] + ldr x8, [sp, #0x50] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0xA0 + EPILOG_RETURN + + NESTED_END RhpReversePInvokeTrapThread + +// +// RhpPInvoke +// +// IN: X0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + str fp, [x0, #OFFSETOF__PInvokeTransitionFrame__m_FramePointer] + str lr, [x0, #OFFSETOF__PInvokeTransitionFrame__m_RIP] + mov x9, SP + str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs] + mov x9, #PTFF_SAVE_SP + str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_Flags] + + // get TLS global variable address + // r0 = GetThread() + INLINE_GETTHREAD x10 + str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame] + + adrp x9, RhpTrapThreads + add x9, x9, :lo12:RhpTrapThreads + ldr w9, [x9] + cbnz w9, InvokeRareTrapThread // TrapThreadsFlags_None = 0 + ret + +InvokeRareTrapThread: + b C_FUNC(RhpWaitForSuspend2) +NESTED_END RhpPInvoke, _TEXT + + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ldr x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread] + mov x10, 0 + str x10, [x9, #OFFSETOF__Thread__m_pTransitionFrame] + + adrp x9, RhpTrapThreads + add x9, x9, :lo12:RhpTrapThreads + ldr w9, [x9] + cbnz w9, 0f // TrapThreadsFlags_None = 0 + ret +0: + // passing transition frame pointer in x0 + b RhpWaitForGC +LEAF_END RhpPInvokeReturn, _TEXT + diff --git a/src/Native/Runtime/arm64/StubDispatch.S b/src/Native/Runtime/arm64/StubDispatch.S index 01ed602a761..25aae897289 100644 --- a/src/Native/Runtime/arm64/StubDispatch.S +++ b/src/Native/Runtime/arm64/StubDispatch.S @@ -2,5 +2,109 @@ // The .NET Foundation licenses this file to you under the MIT license. #include +#include "AsmOffsets.inc" -// TODO: Implement Arm64 support +#define __tls_array 0 + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Check a single entry in the cache. + // x9 : Cache data structure. Also used for target address jump. + // x10 : Instance EEType* + // x11 : x11 still contains the indirection cell address. do not trash + // x12 : Trashed + ldr x12, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16))] + cmp x10, x12 + bne 0f + ldr x9, [x9, #(OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8)] + br x9 +0: + .endm + +// +// Macro that generates a stub consuming a cache with the given number of entries. +// + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + + // x11 currently holds the indirection cell address. We need to get the cache structure instead. + ldr x9, [x11, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in x0. + ldr x10, [x0] + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // x11 still contains the indirection cell address. + b RhpInterfaceDispatchSlow + + NESTED_END "RhpInterfaceDispatch\entries", _TEXT + + .endm + +// +// Define all the stub routines we currently need. +// + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + +// +// Initial dispatch on an interface when we dont have a cache yet. +// + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + // Just tail call to the cache miss helper. + b RhpInterfaceDispatchSlow + LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // xip1 has the interface dispatch cell address in it. + // load x12 to point to the vtable offset (which is stored in the m_pCache field). + ldr x12, [xip1, #OFFSETOF__InterfaceDispatchCell__m_pCache] + + // Load the EEType from the object instance in x0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ldr x13, [x0] + add x12, x12, x13 + + // Load the target address of the vtable into x12 + ldr x12, [x12] + + br x12 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // xip1 has the interface dispatch cell address in it. + // Calling convention of the universal thunk is: + // xip0: contains target address for the thunk to call + // xip1: contains parameter of the thunks target + adrp xip0, RhpCidResolve + add xip0, xip0, :lo12:RhpCidResolve + mov xip1, x11 + b RhpUniversalTransition_DebugStepTailCall + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/Native/Runtime/arm64/UniversalTransition.S b/src/Native/Runtime/arm64/UniversalTransition.S index 876f2dfbcb8..81fd8ca3385 100644 --- a/src/Native/Runtime/arm64/UniversalTransition.S +++ b/src/Native/Runtime/arm64/UniversalTransition.S @@ -1,4 +1,159 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + // TODO .extern RhpIntegerTrashValues + // TODO .extern RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +// Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (8) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_LR_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ALIGNMENT_PADDING_SIZE +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_LR_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_LR_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_LR_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_LR_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// +// RhpUniversalTransition +// +// At input to this function, x0-8, d0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// xip0 will contain the managed function that is to be called by this transition function +// xip1 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// x0 shall contain a pointer to the TransitionBlock +// x1 shall contain the value that was in xip1 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+0C0 CallerSP+000 +// {AlignmentPad (0x8 bytes)} ChildSP+0B8 CallerSP-008 +// {IntArgRegs (x0-x8) (0x48 bytes)} ChildSP+070 CallerSP-050 +// {ReturnBlock (0x20 bytes)} ChildSP+050 CallerSP-070 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (d0-d7) (0x40 bytes)} ChildSP+010 CallerSP-0B0 +// {PushedLR} ChildSP+008 CallerSP-0B8 +// {PushedFP} ChildSP+000 CallerSP-0C0 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // FP and LR registers + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-STACK_SIZE // ;; Push down stack pointer and store FP and LR + + // Floating point registers + stp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + stp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + stp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + stp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + stp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + stp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + stp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + stp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + stp x8, xzr, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + // ARM64TODO +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + add x0, sp, #DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + mov x8, x0 // Arm64 calling convention: Address of return block shall be passed in x8 + mov x1, xip1 // Second parameter to target function + blr xip0 + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + EXPORT_POINTER_TO_ADDRESS PointerToReturnFrom\FunctionName + + // Move the result (the target address) to x12 so it doesn't get overridden when we restore the + // argument registers. + mov x12, x0 + + // Restore floating point registers + ldp d0, d1, [sp, #(FLOAT_ARG_OFFSET )] + ldp d2, d3, [sp, #(FLOAT_ARG_OFFSET + 0x10)] + ldp d4, d5, [sp, #(FLOAT_ARG_OFFSET + 0x20)] + ldp d6, d7, [sp, #(FLOAT_ARG_OFFSET + 0x30)] + + // Restore the argument registers + ldp x0, x1, [sp, #(ARGUMENT_REGISTERS_OFFSET )] + ldp x2, x3, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x10)] + ldp x4, x5, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x20)] + ldp x6, x7, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x30)] + ldr x8, [sp, #(ARGUMENT_REGISTERS_OFFSET + 0x40)] + + // Restore FP and LR registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #STACK_SIZE + + // Tailcall to the target address. + // TODO EPILOG_NOP + br x12 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall + diff --git a/src/Native/Runtime/arm64/WriteBarriers.S b/src/Native/Runtime/arm64/WriteBarriers.S index a14d99d7ef4..a1e3c103673 100644 --- a/src/Native/Runtime/arm64/WriteBarriers.S +++ b/src/Native/Runtime/arm64/WriteBarriers.S @@ -1,33 +1,368 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -// TODO: Implement Unix write barriers #include -LEAF_ENTRY RhpAssignRef, _TEXT - str x1, [x0] - ret -LEAF_END RhpAssignRef, _TEXT +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references where never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this can not be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK -LEAF_ENTRY RhpCheckedAssignRef, _TEXT - str x1, [x0] - ret -LEAF_END RhpCheckedAssignRef, _TEXT +// TODO + + SETALIAS g_GCShadow, ?g_GCShadow@@3PEAEEA + SETALIAS g_GCShadowEnd, ?g_GCShadowEnd@@3PEAEEA + EXTERN $g_GCShadow + EXTERN $g_GCShadowEnd + +INVALIDGCVALUE EQU 0xCCCCCCCD + + MACRO + // On entry: + // $destReg: location to be updated + // $refReg: objectref to be stored + // + // On exit: + // x9,x10: trashed + // other registers are preserved + // + UPDATE_GC_SHADOW $destReg, $refReg + + // If g_GCShadow is 0, don't perform the check. + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + cbz x9, %ft1 + + // Save $destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + mov x10, $destReg + + // Transform $destReg into the equivalent address in the shadow heap. + adrp x9, g_lowest_address + ldr x9, [x9, g_lowest_address] + subs $destReg, $destReg, x9 + blt %ft0 + + adrp x9, $g_GCShadow + ldr x9, [x9, $g_GCShadow] + add $destReg, $destReg, x9 + + adrp x9, $g_GCShadowEnd + ldr x9, [x9, $g_GCShadowEnd] + cmp $destReg, x9 + bgt %ft0 + + // Update the shadow heap. + str $refReg, [$destReg] + + // The following read must be strongly ordered wrt to the write we have just performed in order to + // prevent race conditions. + dmb ish + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + mov x9, x10 + ldr x9, [x9] + cmp x9, $refReg + beq %ft0 + + // Someone went and updated the real heap. We need to invalidate the shadow location since we can not + // guarantee whose shadow update won. + MOVL64 x9, INVALIDGCVALUE, 0 + str x9, [$destReg] + +0 + // Restore original $destReg value + mov $destReg, x10 + +1 + MEND + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // trash: register nr than can be trashed + // trash2: register than can be trashed + // + // On exit: + // destReg: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + + // We can skip the card table write if the reference is to + // an object not on the epehemeral segment. + adrp x\trash, g_ephemeral_low + add x\trash, x\trash, :lo12:g_ephemeral_low + ldr x\trash, [x\trash] + cmp \refReg, x\trash + blt 0f + + adrp x\trash, g_ephemeral_high + add x\trash, x\trash, :lo12:g_ephemeral_high + ldr x\trash, [x\trash] + cmp \refReg, x\trash + bge 0f + + // Set this objects card, if it has not already been set. + + adrp x\trash, g_card_table + add x\trash, x\trash, :lo12:g_card_table + ldr x\trash, [x\trash] + add \trash2, x\trash, \destReg, lsr #11 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-proc systems since it avoids cache thrashing. + ldrb w\trash, [\trash2] + cmp x\trash, 0xFF + beq 0f + + mov x\trash, 0xFF + strb w\trash, [\trash2] +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // trash: register nr than can be trashed + // trash2: register than can be trashed + // + // On exit: + // destReg: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg, trash, trash2 + + // The "check" of this checked write barrier - is destReg + // within the heap? if no, early out. + adrp x\trash, g_lowest_address + add x\trash, x\trash, :lo12:g_lowest_address + ldr x\trash, [x\trash] + cmp \destReg, x\trash + blt 0f + + adrp x\trash, g_highest_address + add x\trash, x\trash, :lo12:g_highest_address + ldr x\trash, [x\trash] + cmp \destReg, x\trash + bgt 0f + + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg, \trash, \trash2 + +0: + // Exit label + .endm + +// RhpCheckedAssignRef(Object** dst, Object* src) // -// RhpByRefAssignRef simulates movs instruction for object references. +// Write barrier for writes to objects that may reside +// on the managed heap. // // On entry: -// x0: address of ref-field (assigned to) -// x1: address of the data (source) -// x3: be trashed +// x0 : the destination address (LHS of the assignment). +// May not be an object reference (hence the checked). +// x1 : the object reference (RHS of the assignment). +// On exit: +// x1 : trashed +// x9 : trashed + LEAF_ENTRY RhpCheckedAssignRef, _TEXT + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + ALTERNATE_ENTRY RhpCheckedAssignRefX1 + ALTERNATE_ENTRY RhpCheckedAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 + + ret + + LEAF_END RhpCheckedAssignRef, _TEXT + +// RhpAssignRef(Object** dst, Object* src) // +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// x0 : the destination address (LHS of the assignment). +// x1 : the object reference (RHS of the assignment). // On exit: -// x0, x1 are incremented by 8, -// x3: trashed +// x1 : trashed +// x9 : trashed + LEAF_ENTRY RhpAssignRef, _TEXT + ALTERNATE_ENTRY RhpAssignRefAVLocation + ALTERNATE_ENTRY RhpAssignRefX1 + ALTERNATE_ENTRY RhpAssignRefX1AVLocation + + stlr x1, [x0] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x0, x1, 9, x1 + + ret + + LEAF_END RhpAssignRef, _TEXT + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// x0: pointer to objectref +// x1: exchange value +// x2: comparand +// +// On exit: +// x0: original value of objectref +// x9: trashed +// x10: trashed +// + LEAF_ENTRY RhpCheckedLockCmpXchg + ALTERNATE_ENTRY RhpCheckedLockCmpXchgAVLocation + +CmpXchgRetry: + // Check location value is what we expect. + ldaxr x10, [x0] + cmp x10, x2 + bne CmpXchgNoUpdate + + // Current value matches comparand, attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, CmpXchgRetry + + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in x0 and the value in x1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + +CmpXchgNoUpdate: + // x10 still contains the original value. + mov x0, x10 + ret lr + + LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindWriteBarrierToCaller" assumes no registers where pushed and LR contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// x0: pointer to objectref +// x1: exchange value +// +// On exit: +// x0: original value of objectref +// x9: trashed +// x10: trashed +// + LEAF_ENTRY RhpCheckedXchg, _TEXT + ALTERNATE_ENTRY RhpCheckedXchgAVLocation + +ExchangeRetry: + // Read the existing memory location. + ldaxr x10, [x0] + + // Attempt to update with the new value. + stlxr w9, x1, [x0] + cbnz w9, ExchangeRetry + + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in x0 and the value in x1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE x0, x1, 9, x0 + + // x10 still contains the original value. + mov x0, x10 + ret + + LEAF_END RhpCheckedXchg, _TEXT + +LEAF_ENTRY RhpAssignRefArm64, _TEXT + stlr x15, [x14] + + INSERT_UNCHECKED_WRITE_BARRIER_CORE x14, x15, 12, X14 + + ret +LEAF_END RhpAssignRefArm64, _TEXT + +// void JIT_CheckedWriteBarrier(Object** dst, Object* src) +// On entry: +// x14 : the destination address (LHS of the assignment) +// x15 : the object reference (RHS of the assignment) +// +// On exit: +// x12 : trashed +// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract) +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP // -LEAF_ENTRY RhpByRefAssignRef, _TEXT - ldr x3, [x1], #8 - str x3, [x0], #8 +LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT + + stlr x15, [x14] + + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + + add x14, x14, #8 + + ret +LEAF_END RhpCheckedAssignRefArm64, _TEXT + +// void JIT_ByRefWriteBarrier +// On entry: +// x13 : the source address (points to object reference to write) +// x14 : the destination address (object reference written here) +// +// On exit: +// x12 : trashed +// x13 : incremented by 8 +// x14 : incremented by 8 +// x15 : trashed +// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP +// +LEAF_ENTRY RhpByRefAssignRefArm64, _TEXT + ldr x15, [x13] + str x15, [x14] + + INSERT_CHECKED_WRITE_BARRIER_CORE x14, x15, 12, X15 + + add X13, x13, #8 + add x14, x14, #8 + ret -LEAF_END RhpByRefAssignRef, _TEXT +LEAF_END RhpByRefAssignRefArm64, _TEXT diff --git a/src/Native/Runtime/portable.cpp b/src/Native/Runtime/portable.cpp index 4372925d0dd..797cc968834 100644 --- a/src/Native/Runtime/portable.cpp +++ b/src/Native/Runtime/portable.cpp @@ -36,7 +36,6 @@ #include "GCMemoryHelpers.inl" #if defined(USE_PORTABLE_HELPERS) - EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpGcAlloc(EEType *pEEType, UInt32 uFlags, UIntNative cbSize, void * pTransitionFrame); EXTERN_C REDHAWK_API void* REDHAWK_CALLCONV RhpPublishObject(void* pObject, UIntNative cbSize); @@ -88,7 +87,9 @@ COOP_PINVOKE_HELPER(Object *, RhpNewFast, (EEType* pEEType)) return pObject; } -#define GC_ALLOC_FINALIZE 0x1 // TODO: Defined in gc.h +#define GC_ALLOC_FINALIZE 0x1 // TODO: Defined in gc.h +#define GC_ALLOC_ALIGN8_BIAS 0x4 // TODO: Defined in gc.h +#define GC_ALLOC_ALIGN8 0x8 // TODO: Defined in gc.h COOP_PINVOKE_HELPER(Object *, RhpNewFinalizable, (EEType* pEEType)) { @@ -180,36 +181,149 @@ COOP_PINVOKE_HELPER(String *, RhNewString, (EEType * pArrayEEType, int numElemen #endif #if defined(USE_PORTABLE_HELPERS) +#if defined(FEATURE_64BIT_ALIGNMENT) + +GPTR_DECL(EEType, g_pFreeObjectEEType); -#ifdef HOST_ARM COOP_PINVOKE_HELPER(Object *, RhpNewFinalizableAlign8, (EEType* pEEType)) { Object * pObject = nullptr; - /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + /* Not reachable as finalizable types are never align8 */ ASSERT_UNCONDITIONALLY("UNREACHABLE"); return pObject; } -COOP_PINVOKE_HELPER(Object *, RhpNewFastMisalign, (EEType* pEEType)) +COOP_PINVOKE_HELPER(Object *, RhpNewFastAlign8, (EEType* pEEType)) { - Object * pObject = nullptr; - /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + ASSERT(pEEType->RequiresAlign8()); + ASSERT(!pEEType->HasFinalizer()); + + Thread* pCurThread = ThreadStore::GetCurrentThread(); + gc_alloc_context* acontext = pCurThread->GetAllocContext(); + Object* pObject; + + size_t size = pEEType->get_BaseSize(); + size = (size + (sizeof(UIntNative) - 1)) & ~(sizeof(UIntNative) - 1); + + UInt8* result = acontext->alloc_ptr; + + int requiresPadding = ((uint32_t)result) & 7; + if (requiresPadding) size += 12; + UInt8* advance = result + size; + if (advance <= acontext->alloc_limit) + { + acontext->alloc_ptr = advance; + if (requiresPadding) + { + Object* dummy = (Object*)result; + dummy->set_EEType(g_pFreeObjectEEType); + result += 12; + } + pObject = (Object*)result; + pObject->set_EEType(pEEType); + + return pObject; + } + + pObject = (Object*)RhpGcAlloc(pEEType, GC_ALLOC_ALIGN8, size, NULL); + if (pObject == nullptr) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw OOM + } + pObject->set_EEType(pEEType); + + if (size >= RH_LARGE_OBJECT_SIZE) + RhpPublishObject(pObject, size); + return pObject; } -COOP_PINVOKE_HELPER(Object *, RhpNewFastAlign8, (EEType* pEEType)) +COOP_PINVOKE_HELPER(Object*, RhpNewFastMisalign, (EEType* pEEType)) { - Object * pObject = nullptr; - /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + size_t size = pEEType->get_BaseSize(); + Object* pObject = (Object*)RhpGcAlloc(pEEType, GC_ALLOC_ALIGN8_BIAS, size, NULL); + if (pObject == nullptr) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw OOM + } + pObject->set_EEType(pEEType); + + if (size >= RH_LARGE_OBJECT_SIZE) + RhpPublishObject(pObject, size); + return pObject; } COOP_PINVOKE_HELPER(Array *, RhpNewArrayAlign8, (EEType * pArrayEEType, int numElements)) { - Array * pObject = nullptr; - /* TODO */ ASSERT_UNCONDITIONALLY("NYI"); + ASSERT_MSG(pArrayEEType->RequiresAlign8(), "RhpNewArrayAlign8 called for a type that is not aligned 8"); + + Thread* pCurThread = ThreadStore::GetCurrentThread(); + gc_alloc_context* acontext = pCurThread->GetAllocContext(); + Array* pObject; + + if (numElements < 0) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw overflow + } + + size_t size; + + UInt32 baseSize = pArrayEEType->get_BaseSize(); +#ifndef HOST_64BIT + // if the element count is <= 0x10000, no overflow is possible because the component size is + // <= 0xffff, and thus the product is <= 0xffff0000, and the base size is only ~12 bytes + if (numElements > 0x10000) + { + // Perform the size computation using 64-bit integeres to detect overflow + uint64_t size64 = (uint64_t)baseSize + ((uint64_t)numElements * (uint64_t)pArrayEEType->get_ComponentSize()); + size64 = (size64 + (sizeof(UIntNative) - 1)) & ~(sizeof(UIntNative) - 1); + + size = (size_t)size64; + if (size != size64) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw overflow + } + } + else +#endif // !HOST_64BIT + { + size = (size_t)baseSize + ((size_t)numElements * (size_t)pArrayEEType->get_ComponentSize()); + size = ALIGN_UP(size, sizeof(UIntNative)); + } + UInt8* result = acontext->alloc_ptr; + int requiresAlignObject = ((uint32_t)result) & 7; + if (requiresAlignObject) size += 12; + + UInt8* advance = result + size; + if (advance <= acontext->alloc_limit) + { + acontext->alloc_ptr = advance; + if (requiresAlignObject) + { + Object* dummy = (Object*)result; + dummy->set_EEType(g_pFreeObjectEEType); + result += 12; + } + pObject = (Array*)result; + pObject->set_EEType(pArrayEEType); + pObject->InitArrayLength((UInt32)numElements); + return pObject; + } + + pObject = (Array*)RhpGcAlloc(pArrayEEType, GC_ALLOC_ALIGN8, size, NULL); + if (pObject == nullptr) + { + ASSERT_UNCONDITIONALLY("NYI"); // TODO: Throw OOM + } + pObject->set_EEType(pArrayEEType); + pObject->InitArrayLength((UInt32)numElements); + + if (size >= RH_LARGE_OBJECT_SIZE) + RhpPublishObject(pObject, size); + return pObject; } -#endif +#endif // defined(HOST_ARM) || defined(HOST_WASM) COOP_PINVOKE_HELPER(void, RhpInitialDynamicInterfaceDispatch, ()) { diff --git a/src/Native/Runtime/startup.cpp b/src/Native/Runtime/startup.cpp index 4ed81423af9..a7faeccd263 100644 --- a/src/Native/Runtime/startup.cpp +++ b/src/Native/Runtime/startup.cpp @@ -49,7 +49,7 @@ EXTERN_C bool g_fHasFastFxsave = false; CrstStatic g_CastCacheLock; CrstStatic g_ThunkPoolLock; -#if defined(HOST_X86) || defined(HOST_AMD64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. EXTERN_C int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. @@ -155,7 +155,9 @@ static void CheckForPalFallback() } #ifndef USE_PORTABLE_HELPERS -// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.cs + +#if defined(HOST_X86) || defined(HOST_AMD64) +// Should match the constants defined in the compiler in HardwareIntrinsicHelpers.Aot.cs enum XArchIntrinsicConstants { XArchIntrinsicConstants_Aes = 0x0001, @@ -173,8 +175,12 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, }; +#endif + bool DetectCPUFeatures() { +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) + #if defined(HOST_X86) || defined(HOST_AMD64) unsigned char buffer[16]; @@ -293,12 +299,17 @@ bool DetectCPUFeatures() g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt; } } +#endif // HOST_X86 || HOST_AMD64 + +#if defined(HOST_ARM64) + PAL_GetCpuCapabilityFlags (&g_cpuFeatures); +#endif if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) { return false; } -#endif // HOST_X86 || HOST_AMD64 +#endif // HOST_X86 || HOST_AMD64 || HOST_ARM64 return true; } diff --git a/src/Native/Runtime/unix/UnixContext.cpp b/src/Native/Runtime/unix/UnixContext.cpp index 458214bbe56..08b98ef6365 100644 --- a/src/Native/Runtime/unix/UnixContext.cpp +++ b/src/Native/Runtime/unix/UnixContext.cpp @@ -301,7 +301,7 @@ bool GetUnwindProcInfo(PCODE ip, unw_proc_info_t *procInfo) #elif HOST_ARM ((uint32_t*)(unwContext.data))[15] = ip; #elif HOST_ARM64 - ((uint32_t*)(unwContext.data))[32] = ip; + unwContext.data[32] = ip; #elif HOST_WASM ASSERT(false); #elif HOST_X86 @@ -618,7 +618,7 @@ bool FindProcInfo(UIntNative controlPC, UIntNative* startAddress, UIntNative* ls assert((procInfo.start_ip <= controlPC) && (controlPC < procInfo.end_ip)); -#if defined(HOST_ARM) || defined(HOST_ARM64) +#if defined(HOST_ARM) // libunwind fills by reference not by value for ARM *lsda = *((UIntNative *)procInfo.lsda); #else diff --git a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp index e03a85e3205..a2639f6078c 100644 --- a/src/Native/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/Native/Runtime/unix/UnixNativeCodeManager.cpp @@ -388,8 +388,9 @@ bool UnixNativeCodeManager::EHEnumNext(EHEnumState * pEHEnumState, EHClause * pE { // @TODO: CORERT: Compress EHInfo using type table index scheme // https://github.com/dotnet/corert/issues/972 - Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo)++; + Int32 typeRelAddr = *((PTR_Int32&)pEnumState->pEHInfo); pEHClauseOut->m_pTargetType = dac_cast(pEnumState->pEHInfo + typeRelAddr); + pEnumState->pEHInfo += 4; } break; case EH_CLAUSE_FAULT: diff --git a/src/Native/Runtime/unix/UnwindHelpers.cpp b/src/Native/Runtime/unix/UnwindHelpers.cpp index ced22cc272c..9ed75c6a1d7 100644 --- a/src/Native/Runtime/unix/UnwindHelpers.cpp +++ b/src/Native/Runtime/unix/UnwindHelpers.cpp @@ -475,229 +475,284 @@ void Registers_arm_rt::setRegister(int num, uint32_t value, uint32_t location) #if defined(TARGET_ARM64) -class Registers_arm64_rt: public libunwind::Registers_arm64 { -public: - Registers_arm64_rt() { abort(); }; - Registers_arm64_rt(const void *registers); +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_ARM64; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; - bool validRegister(int num) {abort();}; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - bool validFloatRegister(int num) {abort();}; + double getFloatRegister(int num) {abort();} void setFloatRegister(int num, double value) {abort();} - bool validVectorRegister(int num) const {abort();} - libunwind::v128 getVectorRegister(int num) const {abort();}; - void setVectorRegister(int num, libunwind::v128 value) {abort();}; - void jumpto() { abort();}; + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); - uint64_t getSP() const { return regs->SP;} - void setSP(uint64_t value, uint64_t location) { regs->SP = value;} - uint64_t getIP() const { return regs->IP;} + uint64_t getSP() const { return SP;} + void setSP(uint64_t value, uint64_t location) { SP = value;} + uint64_t getIP() const { return IP;} void setIP(uint64_t value, uint64_t location) - { regs->IP = value; regs->pIP = (PTR_UIntNative)location; } - void saveVFPAsX() {abort();}; -private: - REGDISPLAY *regs; + { IP = value; pIP = (PTR_UIntNative)location; } }; -inline Registers_arm64_rt::Registers_arm64_rt(const void *registers) { - regs = (REGDISPLAY *)registers; +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_ARM64_SP) + return true; + + if (num == UNW_ARM64_FP) + return true; + + if (num == UNW_ARM64_LR) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_ARM64_X0 && num <= UNW_ARM64_X28) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + if (num >= UNW_ARM64_D8 && num <= UNW_ARM64_D15) + return true; + + return false; } -inline uint64_t Registers_arm64_rt::getRegister(int regNum) const { +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { if (regNum == UNW_REG_SP || regNum == UNW_ARM64_SP) - return regs->SP; + return SP; + + if (regNum == UNW_ARM64_FP) + return *pFP; if (regNum == UNW_ARM64_LR) - return *regs->pLR; + return *pLR; if (regNum == UNW_REG_IP) - return regs->IP; + return IP; switch (regNum) { case (UNW_ARM64_X0): - return *regs->pX0; + return *pX0; case (UNW_ARM64_X1): - return *regs->pX1; + return *pX1; case (UNW_ARM64_X2): - return *regs->pX2; + return *pX2; case (UNW_ARM64_X3): - return *regs->pX3; + return *pX3; case (UNW_ARM64_X4): - return *regs->pX4; + return *pX4; case (UNW_ARM64_X5): - return *regs->pX5; + return *pX5; case (UNW_ARM64_X6): - return *regs->pX6; + return *pX6; case (UNW_ARM64_X7): - return *regs->pX7; + return *pX7; case (UNW_ARM64_X8): - return *regs->pX8; + return *pX8; case (UNW_ARM64_X9): - return *regs->pX9; + return *pX9; case (UNW_ARM64_X10): - return *regs->pX10; + return *pX10; case (UNW_ARM64_X11): - return *regs->pX11; + return *pX11; case (UNW_ARM64_X12): - return *regs->pX12; + return *pX12; case (UNW_ARM64_X13): - return *regs->pX13; + return *pX13; case (UNW_ARM64_X14): - return *regs->pX14; + return *pX14; case (UNW_ARM64_X15): - return *regs->pX15; + return *pX15; case (UNW_ARM64_X16): - return *regs->pX16; + return *pX16; case (UNW_ARM64_X17): - return *regs->pX17; + return *pX17; case (UNW_ARM64_X18): - return *regs->pX18; + return *pX18; case (UNW_ARM64_X19): - return *regs->pX19; + return *pX19; case (UNW_ARM64_X20): - return *regs->pX20; + return *pX20; case (UNW_ARM64_X21): - return *regs->pX21; + return *pX21; case (UNW_ARM64_X22): - return *regs->pX22; + return *pX22; case (UNW_ARM64_X23): - return *regs->pX23; + return *pX23; case (UNW_ARM64_X24): - return *regs->pX24; + return *pX24; case (UNW_ARM64_X25): - return *regs->pX25; + return *pX25; case (UNW_ARM64_X26): - return *regs->pX26; + return *pX26; case (UNW_ARM64_X27): - return *regs->pX27; + return *pX27; case (UNW_ARM64_X28): - return *regs->pX28; + return *pX28; } PORTABILITY_ASSERT("unsupported arm64 register"); } -void Registers_arm64_rt::setRegister(int num, uint64_t value, uint64_t location) +void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) { - if (num == UNW_REG_SP || num == UNW_ARM64_SP) { - regs->SP = (UIntNative )value; + SP = (UIntNative )value; + return; + } + + if (num == UNW_ARM64_FP) { + pFP = (PTR_UIntNative)location; return; } if (num == UNW_ARM64_LR) { - regs->pLR = (PTR_UIntNative)location; + pLR = (PTR_UIntNative)location; return; } if (num == UNW_REG_IP) { - regs->IP = value; - /* the location could be NULL, we could try to recover - pointer to value in stack from pLR */ - if ((!location) && (regs->pLR) && (*regs->pLR == value)) - regs->pIP = regs->pLR; - else - regs->pIP = (PTR_UIntNative)location; + IP = value; return; } switch (num) { case (UNW_ARM64_X0): - regs->pX0 = (PTR_UIntNative)location; + pX0 = (PTR_UIntNative)location; break; case (UNW_ARM64_X1): - regs->pX1 = (PTR_UIntNative)location; + pX1 = (PTR_UIntNative)location; break; case (UNW_ARM64_X2): - regs->pX2 = (PTR_UIntNative)location; + pX2 = (PTR_UIntNative)location; break; case (UNW_ARM64_X3): - regs->pX3 = (PTR_UIntNative)location; + pX3 = (PTR_UIntNative)location; break; case (UNW_ARM64_X4): - regs->pX4 = (PTR_UIntNative)location; + pX4 = (PTR_UIntNative)location; break; case (UNW_ARM64_X5): - regs->pX5 = (PTR_UIntNative)location; + pX5 = (PTR_UIntNative)location; break; case (UNW_ARM64_X6): - regs->pX6 = (PTR_UIntNative)location; + pX6 = (PTR_UIntNative)location; break; case (UNW_ARM64_X7): - regs->pX7 = (PTR_UIntNative)location; + pX7 = (PTR_UIntNative)location; break; case (UNW_ARM64_X8): - regs->pX8 = (PTR_UIntNative)location; + pX8 = (PTR_UIntNative)location; break; case (UNW_ARM64_X9): - regs->pX9 = (PTR_UIntNative)location; + pX9 = (PTR_UIntNative)location; break; case (UNW_ARM64_X10): - regs->pX10 = (PTR_UIntNative)location; + pX10 = (PTR_UIntNative)location; break; case (UNW_ARM64_X11): - regs->pX11 = (PTR_UIntNative)location; + pX11 = (PTR_UIntNative)location; break; case (UNW_ARM64_X12): - regs->pX12 = (PTR_UIntNative)location; + pX12 = (PTR_UIntNative)location; break; case (UNW_ARM64_X13): - regs->pX13 = (PTR_UIntNative)location; + pX13 = (PTR_UIntNative)location; break; case (UNW_ARM64_X14): - regs->pX14 = (PTR_UIntNative)location; + pX14 = (PTR_UIntNative)location; break; case (UNW_ARM64_X15): - regs->pX15 = (PTR_UIntNative)location; + pX15 = (PTR_UIntNative)location; break; case (UNW_ARM64_X16): - regs->pX16 = (PTR_UIntNative)location; + pX16 = (PTR_UIntNative)location; break; case (UNW_ARM64_X17): - regs->pX17 = (PTR_UIntNative)location; + pX17 = (PTR_UIntNative)location; break; case (UNW_ARM64_X18): - regs->pX18 = (PTR_UIntNative)location; + pX18 = (PTR_UIntNative)location; break; case (UNW_ARM64_X19): - regs->pX19 = (PTR_UIntNative)location; + pX19 = (PTR_UIntNative)location; break; case (UNW_ARM64_X20): - regs->pX20 = (PTR_UIntNative)location; + pX20 = (PTR_UIntNative)location; break; case (UNW_ARM64_X21): - regs->pX21 = (PTR_UIntNative)location; + pX21 = (PTR_UIntNative)location; break; case (UNW_ARM64_X22): - regs->pX22 = (PTR_UIntNative)location; + pX22 = (PTR_UIntNative)location; break; case (UNW_ARM64_X23): - regs->pX23 = (PTR_UIntNative)location; + pX23 = (PTR_UIntNative)location; break; case (UNW_ARM64_X24): - regs->pX24 = (PTR_UIntNative)location; + pX24 = (PTR_UIntNative)location; break; case (UNW_ARM64_X25): - regs->pX25 = (PTR_UIntNative)location; + pX25 = (PTR_UIntNative)location; break; case (UNW_ARM64_X26): - regs->pX26 = (PTR_UIntNative)location; + pX26 = (PTR_UIntNative)location; break; case (UNW_ARM64_X27): - regs->pX27 = (PTR_UIntNative)location; + pX27 = (PTR_UIntNative)location; break; case (UNW_ARM64_X28): - regs->pX28 = (PTR_UIntNative)location; + pX28 = (PTR_UIntNative)location; break; default: PORTABILITY_ASSERT("unsupported arm64 register"); } } +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + libunwind::v128 result; + + result.vec[0] = 0; + result.vec[1] = 0; + result.vec[2] = D[num] >> 32; + result.vec[3] = D[num] & 0xFFFFFFFF; + + return result; +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + num -= UNW_ARM64_D8; + + if (num < 0 || num >= sizeof(D) / sizeof(UInt64)) + { + PORTABILITY_ASSERT("unsupported arm64 vector register"); + } + + D[num] = (UInt64)value.vec[2] << 32 | (UInt64)value.vec[3]; +} + #endif // TARGET_ARM64 bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs) @@ -707,7 +762,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs #elif defined(TARGET_ARM) libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(TARGET_ARM64) - libunwind::UnwindCursor uc(_addressSpace, regs); + libunwind::UnwindCursor uc(_addressSpace, regs); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace, regs); #else @@ -724,10 +779,7 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs unw_proc_info_t procInfo; uc.getInfo(&procInfo); -#if defined(TARGET_ARM64) - DwarfInstructions dwarfInst; - int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm64_rt*)regs); -#elif defined(TARGET_ARM) +#if defined(TARGET_ARM) DwarfInstructions dwarfInst; int stepRet = dwarfInst.stepWithDwarf(_addressSpace, pc, procInfo.unwind_info, *(Registers_arm_rt*)regs); #else @@ -740,7 +792,12 @@ bool DoTheStep(uintptr_t pc, UnwindInfoSections uwInfoSections, REGDISPLAY *regs return false; } +#if defined(TARGET_ARM64) + regs->SetAddrOfIP(regs->pLR); +#else regs->pIP = PTR_PCODE(regs->SP - sizeof(TADDR)); +#endif + #elif defined(_LIBUNWIND_ARM_EHABI) uc.setInfoBasedOnIPRegister(true); int stepRet = uc.step(); diff --git a/src/Native/Runtime/unix/unixasmmacrosarm64.inc b/src/Native/Runtime/unix/unixasmmacrosarm64.inc index d031a77085e..3e3bbdbba86 100644 --- a/src/Native/Runtime/unix/unixasmmacrosarm64.inc +++ b/src/Native/Runtime/unix/unixasmmacrosarm64.inc @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +#include "AsmOffsets.inc" + .macro NESTED_ENTRY Name, Section, Handler LEAF_ENTRY \Name, \Section .ifnc \Handler, NoHandler @@ -69,6 +71,11 @@ C_FUNC(\Name): .endif .endm +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! +.endm + + .macro EPILOG_RESTORE_REG reg, ofs ldr \reg, [sp, \ofs] .endm @@ -137,3 +144,109 @@ C_FUNC(\Name): br \reg .endm + +#define xip0 x16 +#define xip1 x17 +#define xpr x18 + +.macro INLINE_GET_TLS_VAR target, var + mrs \target, tpidr_el0 + add \target, \target, #:tprel_hi12:\var, lsl #12 + add \target, \target, #:tprel_lo12_nc:\var +.endm + + +.macro PREPARE_INLINE_GETTHREAD +.global tls_CurrentThread +.endm + +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, tls_CurrentThread +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ldr \trashReg1, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] + cbz \trashReg1, 0f + + ldr \trashReg2, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str \trashReg1, [\trashReg2] + str xzr, [\threadReg, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation] + str xzr, [\threadReg, #OFFSETOF__Thread__m_pvHijackedReturnAddress] +0: +.endm + + +.macro EXPORT_POINTER_TO_ADDRESS Name + +1: + + .data + .align 8 +C_FUNC(\Name): + .word 1b + .global C_FUNC(\Name) + .text +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_ALL_PRESERVED = 0x000003FF // NOTE: x19-x28 + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-0x80 // Push down stack pointer and store FP and LR + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR x19, x20, #0x20 + PROLOG_SAVE_REG_PAIR x21, x22, #0x30 + PROLOG_SAVE_REG_PAIR x23, x24, #0x40 + PROLOG_SAVE_REG_PAIR x25, x26, #0x50 + PROLOG_SAVE_REG_PAIR x27, x28, #0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + add \trashReg, sp, #0x80 + str \trashReg, [sp, #0x70] + + // Record the bitmask of saved registers in the frame (slot #3) + mov \trashReg, #DEFAULT_FRAME_SAVE_FLAGS + str \trashReg, [sp, #0x18] + + mov \trashReg, sp +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + + EPILOG_RESTORE_REG_PAIR x19, x20, #0x20 + EPILOG_RESTORE_REG_PAIR x21, x22, #0x30 + EPILOG_RESTORE_REG_PAIR x23, x24, #0x40 + EPILOG_RESTORE_REG_PAIR x25, x26, #0x50 + EPILOG_RESTORE_REG_PAIR x27, x28, #0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #0x80 +.endm + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 + +// Bit position for the flags above, to be used with tbz / tbnz instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +#define STATUS_REDHAWK_THREAD_ABORT 0x43 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 diff --git a/src/Native/gc/env/gcenv.base.h b/src/Native/gc/env/gcenv.base.h index 3c1e59ef488..7132efae407 100644 --- a/src/Native/gc/env/gcenv.base.h +++ b/src/Native/gc/env/gcenv.base.h @@ -75,6 +75,7 @@ inline HRESULT HRESULT_FROM_WIN32(unsigned long x) #define S_OK 0x0 #define E_FAIL 0x80004005 #define E_OUTOFMEMORY 0x8007000E +#define E_INVALIDARG 0x80070057 #define COR_E_EXECUTIONENGINE 0x80131506 #define CLR_E_GC_BAD_AFFINITY_CONFIG 0x8013200A #define CLR_E_GC_BAD_AFFINITY_CONFIG_FORMAT 0x8013200B diff --git a/src/Native/gc/env/gcenv.object.h b/src/Native/gc/env/gcenv.object.h index 7a8976fe629..c5c6d42fd64 100644 --- a/src/Native/gc/env/gcenv.object.h +++ b/src/Native/gc/env/gcenv.object.h @@ -4,6 +4,13 @@ #ifndef __GCENV_OBJECT_H__ #define __GCENV_OBJECT_H__ +// ARM requires that 64-bit primitive types are aligned at 64-bit boundaries for interlocked-like operations. +// Additionally the platform ABI requires these types and composite type containing them to be similarly +// aligned when passed as arguments. +#ifdef TARGET_ARM +#define FEATURE_64BIT_ALIGNMENT +#endif + //------------------------------------------------------------------------------------------------- // // Low-level types describing GC object layouts. @@ -35,18 +42,25 @@ class ObjHeader static_assert(sizeof(ObjHeader) == sizeof(uintptr_t), "this assumption is made by the VM!"); -#define MTFlag_ContainsPointers 0x0100 -#define MTFlag_HasCriticalFinalizer 0x0800 -#define MTFlag_HasFinalizer 0x0010 -#define MTFlag_IsArray 0x0008 -#define MTFlag_Collectible 0x1000 -#define MTFlag_HasComponentSize 0x8000 +#define MTFlag_RequireAlign8 0x00001000 +#define MTFlag_Category_ValueType 0x00040000 +#define MTFlag_Category_ValueType_Mask 0x000C0000 +#define MTFlag_ContainsPointers 0x01000000 +#define MTFlag_HasCriticalFinalizer 0x08000000 +#define MTFlag_HasFinalizer 0x00100000 +#define MTFlag_IsArray 0x00080000 +#define MTFlag_Collectible 0x10000000 +#define MTFlag_HasComponentSize 0x80000000 class MethodTable { public: - uint16_t m_componentSize; - uint16_t m_flags; + union + { + uint16_t m_componentSize; + uint32_t m_flags; + }; + uint32_t m_baseSize; MethodTable * m_pRelatedType; @@ -55,8 +69,8 @@ class MethodTable void InitializeFreeObject() { m_baseSize = 3 * sizeof(void *); - m_componentSize = 1; m_flags = MTFlag_HasComponentSize | MTFlag_IsArray; + m_componentSize = 1; } uint32_t GetBaseSize() @@ -84,6 +98,16 @@ class MethodTable return ContainsPointers() || Collectible(); } + bool RequiresAlign8() + { + return (m_flags & MTFlag_RequireAlign8) != 0; + } + + bool IsValueType() + { + return (m_flags & MTFlag_Category_ValueType_Mask) == MTFlag_Category_ValueType; + } + bool HasComponentSize() { // Note that we can't just check m_componentSize != 0 here. The VM diff --git a/src/Native/gc/gc.cpp b/src/Native/gc/gc.cpp index de0add29b75..496f00a309a 100644 --- a/src/Native/gc/gc.cpp +++ b/src/Native/gc/gc.cpp @@ -80,8 +80,6 @@ int compact_ratio = 0; // See comments in reset_memory. BOOL reset_mm_p = TRUE; -bool g_fFinalizerRunOnShutDown = false; - #ifdef FEATURE_SVR_GC bool g_built_with_svr_gc = true; #else @@ -191,22 +189,41 @@ BOOL is_induced_blocking (gc_reason reason) (reason == reason_lowmemory_host_blocking)); } +gc_oh_num gen_to_oh(int gen) +{ + switch (gen) + { + case soh_gen0: + return gc_oh_num::soh; + case soh_gen1: + return gc_oh_num::soh; + case soh_gen2: + return gc_oh_num::soh; + case loh_generation: + return gc_oh_num::loh; + case poh_generation: + return gc_oh_num::poh; + default: + return gc_oh_num::none; + } +} + #ifndef DACCESS_COMPILE -int64_t qpf; -size_t start_time; +uint64_t qpf; +double qpf_ms; +double qpf_us; -size_t GetHighPrecisionTimeStamp() +uint64_t GetHighPrecisionTimeStamp() { int64_t ts = GCToOSInterface::QueryPerformanceCounter(); - return (size_t)(ts / (qpf / 1000)); + return (uint64_t)((double)ts * qpf_us); } uint64_t RawGetHighPrecisionTimeStamp() { return (uint64_t)GCToOSInterface::QueryPerformanceCounter(); } - #endif #ifdef BGC_SERVO_TUNING @@ -338,7 +355,6 @@ uint32_t bgc_alloc_spin_count = 140; uint32_t bgc_alloc_spin_count_loh = 16; uint32_t bgc_alloc_spin = 2; - inline void c_write (uint32_t& place, uint32_t value) { @@ -362,12 +378,12 @@ gc_heap::gc_history gc_heap::gchist_per_heap[max_history_count]; void gc_heap::add_to_history_per_heap() { -#ifdef GC_HISTORY +#if defined(GC_HISTORY) && defined(BACKGROUND_GC) gc_history* current_hist = &gchist_per_heap[gchist_index_per_heap]; current_hist->gc_index = settings.gc_index; current_hist->current_bgc_state = current_bgc_state; size_t elapsed = dd_gc_elapsed_time (dynamic_data_of (0)); - current_hist->gc_time_ms = (uint32_t)elapsed; + current_hist->gc_time_ms = (uint32_t)(elapsed / 1000); current_hist->gc_efficiency = (elapsed ? (total_promoted_bytes / elapsed) : total_promoted_bytes); current_hist->eph_low = generation_allocation_start (generation_of (max_generation-1)); current_hist->gen0_start = generation_allocation_start (generation_of (0)); @@ -386,12 +402,12 @@ void gc_heap::add_to_history_per_heap() { gchist_index_per_heap = 0; } -#endif //GC_HISTORY +#endif //GC_HISTORY && BACKGROUND_GC } void gc_heap::add_to_history() { -#ifdef GC_HISTORY +#if defined(GC_HISTORY) && defined(BACKGROUND_GC) gc_mechanisms_store* current_settings = &gchist[gchist_index]; current_settings->store (&settings); @@ -400,7 +416,7 @@ void gc_heap::add_to_history() { gchist_index = 0; } -#endif //GC_HISTORY +#endif //GC_HISTORY && BACKGROUND_GC } #endif //DACCESS_COMPILE @@ -1865,7 +1881,14 @@ const int max_snoop_level = 128; #define MH_TH_CARD_BUNDLE (180*1024*1024) #endif //CARD_BUNDLE -#define GC_EPHEMERAL_DECOMMIT_TIMEOUT 5000 +// min size to decommit to make the OS call worthwhile +#define MIN_DECOMMIT_SIZE (100*OS_PAGE_SIZE) + +// max size to decommit per millisecond +#define DECOMMIT_SIZE_PER_MILLISECOND (160*1024) + +// time in milliseconds between decommit steps +#define DECOMMIT_TIME_STEP_MILLISECONDS (100) inline size_t align_on_page (size_t add) @@ -1999,11 +2022,11 @@ static static_data static_data_table[latency_level_last - latency_level_first + // latency_level_memory_footprint { // gen0 - {0, 0, 40000, 0.5f, 9.0f, 20.0f, 1000, 1}, + {0, 0, 40000, 0.5f, 9.0f, 20.0f, (1000 * 1000), 1}, // gen1 - {160*1024, 0, 80000, 0.5f, 2.0f, 7.0f, 10000, 10}, + {160*1024, 0, 80000, 0.5f, 2.0f, 7.0f, (10 * 1000 * 1000), 10}, // gen2 - {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100}, + {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, (100 * 1000 * 1000), 100}, // loh {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, // poh @@ -2019,11 +2042,11 @@ static static_data static_data_table[latency_level_last - latency_level_first + #else 9.0f, 20.0f, #endif //MULTIPLE_HEAPS - 1000, 1}, + (1000 * 1000), 1}, // gen1 - {256*1024, 0, 80000, 0.5f, 2.0f, 7.0f, 10000, 10}, + {256*1024, 0, 80000, 0.5f, 2.0f, 7.0f, (10 * 1000 * 1000), 10}, // gen2 - {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, 100000, 100}, + {256*1024, SSIZE_T_MAX, 200000, 0.25f, 1.2f, 1.8f, (100 * 1000 * 1000), 100}, // loh {3*1024*1024, SSIZE_T_MAX, 0, 0.0f, 1.25f, 4.5f, 0, 0}, // poh @@ -2059,7 +2082,6 @@ void qsort1(uint8_t** low, uint8_t** high, unsigned int depth); void* virtual_alloc (size_t size); void* virtual_alloc (size_t size, bool use_large_pages_p); -void virtual_free (void* add, size_t size); /* per heap static initialization */ #if defined(BACKGROUND_GC) && !defined(MULTIPLE_HEAPS) @@ -2111,6 +2133,8 @@ int* gc_heap::g_mark_stack_busy; size_t* gc_heap::g_bpromoted; #endif //BACKGROUND_GC +BOOL gc_heap::gradual_decommit_in_progress_p = FALSE; +size_t gc_heap::max_decommit_step_size = 0; #else //MULTIPLE_HEAPS size_t gc_heap::g_promoted; @@ -2135,20 +2159,33 @@ gc_mechanisms gc_heap::settings; gc_history_global gc_heap::gc_data_global; -size_t gc_heap::gc_last_ephemeral_decommit_time = 0; - -size_t gc_heap::gc_gen0_desired_high; +uint64_t gc_heap::gc_last_ephemeral_decommit_time = 0; CLRCriticalSection gc_heap::check_commit_cs; size_t gc_heap::current_total_committed = 0; +size_t gc_heap::committed_by_oh[total_oh_count] = {0, 0, 0, 0}; + size_t gc_heap::current_total_committed_bookkeeping = 0; #ifdef SHORT_PLUGS double gc_heap::short_plugs_pad_ratio = 0; #endif //SHORT_PLUGS +uint64_t gc_heap::suspended_start_time = 0; +uint64_t gc_heap::end_gc_time = 0; +uint64_t gc_heap::total_suspended_time = 0; +uint64_t gc_heap::process_start_time = 0; +last_recorded_gc_info gc_heap::last_ephemeral_gc_info; +last_recorded_gc_info gc_heap::last_full_blocking_gc_info; + +#ifdef BACKGROUND_GC +last_recorded_gc_info gc_heap::last_bgc_info[2]; +VOLATILE(bool) gc_heap::is_last_recorded_bgc = false; +VOLATILE(int) gc_heap::last_bgc_info_index = 0; +#endif //BACKGROUND_GC + #if defined(HOST_64BIT) #define MAX_ALLOWED_MEM_LOAD 85 @@ -2160,12 +2197,6 @@ double gc_heap::short_plugs_pad_ratio = 0; size_t gc_heap::youngest_gen_desired_th; #endif //HOST_64BIT -uint32_t gc_heap::last_gc_memory_load = 0; - -size_t gc_heap::last_gc_heap_size = 0; - -size_t gc_heap::last_gc_fragmentation = 0; - uint64_t gc_heap::mem_one_percent = 0; uint32_t gc_heap::high_memory_load_th = 0; @@ -2182,6 +2213,8 @@ uint64_t gc_heap::entry_available_physical_mem = 0; size_t gc_heap::heap_hard_limit = 0; +size_t gc_heap::heap_hard_limit_oh[total_oh_count - 1] = {0, 0, 0}; + bool affinity_config_specified_p = false; #ifdef BACKGROUND_GC GCEvent gc_heap::bgc_start_event; @@ -2313,7 +2346,7 @@ BOOL gc_heap::blocking_collection = FALSE; heap_segment* gc_heap::freeable_uoh_segment = 0; -size_t gc_heap::time_bgc_last = 0; +uint64_t gc_heap::time_bgc_last = 0; size_t gc_heap::mark_stack_tos = 0; @@ -2329,9 +2362,7 @@ BOOL gc_heap::verify_pinned_queue_p = FALSE; uint8_t* gc_heap::oldest_pinned_plug = 0; -#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) size_t gc_heap::num_pinned_objects = 0; -#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE #ifdef FEATURE_LOH_COMPACTION size_t gc_heap::loh_pinned_queue_tos = 0; @@ -2502,7 +2533,7 @@ size_t gc_heap::eph_gen_starts_size = 0; heap_segment* gc_heap::segment_standby_list; bool gc_heap::use_large_pages_p = 0; #ifdef HEAP_BALANCE_INSTRUMENTATION -size_t gc_heap::last_gc_end_time_ms = 0; +size_t gc_heap::last_gc_end_time_us = 0; #endif //HEAP_BALANCE_INSTRUMENTATION size_t gc_heap::min_segment_size = 0; size_t gc_heap::min_segment_size_shr = 0; @@ -2511,8 +2542,6 @@ size_t gc_heap::min_uoh_segment_size = 0; size_t gc_heap::segment_info_size = 0; #ifdef GC_CONFIG_DRIVEN -size_t gc_heap::time_init = 0; -size_t gc_heap::time_since_init = 0; size_t gc_heap::compact_or_sweep_gcs[2]; #endif //GC_CONFIG_DRIVEN @@ -3896,7 +3925,8 @@ struct initial_memory_details { ALLATONCE = 1, EACH_GENERATION, - EACH_BLOCK + EACH_BLOCK, + ALLATONCE_SEPARATED_POH }; size_t allocation_pattern; @@ -3942,7 +3972,7 @@ struct initial_memory_details initial_memory_details memory_details; -BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p) +BOOL gc_heap::reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p) { BOOL reserve_success = FALSE; @@ -3986,14 +4016,35 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinne return FALSE; } - size_t requestedMemory = memory_details.block_count * (normal_size + large_size + pinned_size); + size_t temp_pinned_size = (separated_poh_p ? 0 : pinned_size); + size_t separate_pinned_size = memory_details.block_count * pinned_size; + size_t requestedMemory = memory_details.block_count * (normal_size + large_size + temp_pinned_size); uint8_t* allatonce_block = (uint8_t*)virtual_alloc (requestedMemory, use_large_pages_p); + uint8_t* separated_poh_block = nullptr; + if (allatonce_block && separated_poh_p) + { + separated_poh_block = (uint8_t*)virtual_alloc (separate_pinned_size, false); + if (!separated_poh_block) + { + virtual_free (allatonce_block, requestedMemory); + allatonce_block = nullptr; + } + } if (allatonce_block) { - g_gc_lowest_address = allatonce_block; - g_gc_highest_address = allatonce_block + requestedMemory; - memory_details.allocation_pattern = initial_memory_details::ALLATONCE; + if (separated_poh_p) + { + g_gc_lowest_address = min (allatonce_block, separated_poh_block); + g_gc_highest_address = max ((allatonce_block + requestedMemory), (separated_poh_block + separate_pinned_size)); + memory_details.allocation_pattern = initial_memory_details::ALLATONCE_SEPARATED_POH; + } + else + { + g_gc_lowest_address = allatonce_block; + g_gc_highest_address = allatonce_block + requestedMemory; + memory_details.allocation_pattern = initial_memory_details::ALLATONCE; + } for (int i = 0; i < memory_details.block_count; i++) { @@ -4001,8 +4052,16 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinne (i * normal_size); memory_details.initial_large_heap[i].memory_base = allatonce_block + (memory_details.block_count * normal_size) + (i * large_size); - memory_details.initial_pinned_heap[i].memory_base = allatonce_block + - (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size); + if (separated_poh_p) + { + memory_details.initial_pinned_heap[i].memory_base = separated_poh_block + + (i * pinned_size); + } + else + { + memory_details.initial_pinned_heap[i].memory_base = allatonce_block + + (memory_details.block_count * (normal_size + large_size)) + (i * pinned_size); + } reserve_success = TRUE; } @@ -4012,7 +4071,7 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinne // try to allocate 3 blocks uint8_t* b1 = (uint8_t*)virtual_alloc (memory_details.block_count * normal_size, use_large_pages_p); uint8_t* b2 = (uint8_t*)virtual_alloc (memory_details.block_count * large_size, use_large_pages_p); - uint8_t* b3 = (uint8_t*)virtual_alloc (memory_details.block_count * pinned_size, use_large_pages_p); + uint8_t* b3 = (uint8_t*)virtual_alloc (memory_details.block_count * pinned_size, use_large_pages_p && !separated_poh_p); if (b1 && b2 && b3) { @@ -4081,15 +4140,23 @@ BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinne return reserve_success; } -void destroy_initial_memory() +void gc_heap::destroy_initial_memory() { if (memory_details.initial_memory != NULL) { if (memory_details.allocation_pattern == initial_memory_details::ALLATONCE) + { + virtual_free(memory_details.initial_memory[0].memory_base, + memory_details.block_count*(memory_details.block_size_normal + + memory_details.block_size_large + memory_details.block_size_pinned)); + } + else if (memory_details.allocation_pattern == initial_memory_details::ALLATONCE_SEPARATED_POH) { virtual_free(memory_details.initial_memory[0].memory_base, memory_details.block_count*(memory_details.block_size_normal + memory_details.block_size_large)); + virtual_free(memory_details.initial_pinned_heap[0].memory_base, + memory_details.block_count*(memory_details.block_size_pinned)); } else if (memory_details.allocation_pattern == initial_memory_details::EACH_GENERATION) { @@ -4101,7 +4168,7 @@ void destroy_initial_memory() virtual_free (memory_details.initial_pinned_heap[0].memory_base, memory_details.block_count*memory_details.block_size_pinned); - } + } else { assert (memory_details.allocation_pattern == initial_memory_details::EACH_BLOCK); @@ -4128,7 +4195,8 @@ heap_segment* make_initial_segment (int gen, int h_number) { void* mem = memory_details.get_initial_memory (gen, h_number); size_t size = memory_details.get_initial_size (gen); - heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size , h_number); + gc_oh_num oh = gen_to_oh (gen); + heap_segment* res = gc_heap::make_heap_segment ((uint8_t*)mem, size, oh, h_number); return res; } @@ -4195,14 +4263,6 @@ void* virtual_alloc (size_t size, bool use_large_pages_p) return aligned_mem; } -void virtual_free (void* add, size_t size) -{ - GCToOSInterface::VirtualRelease (add, size); - gc_heap::reserved_memory -= size; - dprintf (2, ("Virtual Free size %Id: [%Ix, %Ix[", - size, (size_t)add, (size_t)((uint8_t*)add+size))); -} - static size_t get_valid_segment_size (BOOL large_seg=FALSE) { size_t seg_size, initial_seg_size; @@ -4376,7 +4436,7 @@ gc_heap::soh_get_segment_to_expand() } } - heap_segment* result = get_segment (size, FALSE); + heap_segment* result = get_segment (size, gc_oh_num::soh); if(result) { @@ -4417,8 +4477,10 @@ gc_heap::soh_get_segment_to_expand() //returns 0 in case of allocation failure heap_segment* -gc_heap::get_segment (size_t size, BOOL loh_p) +gc_heap::get_segment (size_t size, gc_oh_num oh) { + assert(oh != gc_oh_num::none); + BOOL uoh_p = (oh == gc_oh_num::loh) || (oh == gc_oh_num::poh); if (heap_hard_limit) return NULL; @@ -4487,11 +4549,11 @@ gc_heap::get_segment (size_t size, BOOL loh_p) void* mem = virtual_alloc (size); if (!mem) { - fgm_result.set_fgm (fgm_reserve_segment, size, loh_p); + fgm_result.set_fgm (fgm_reserve_segment, size, uoh_p); return 0; } - result = gc_heap::make_heap_segment ((uint8_t*)mem, size, heap_number); + result = gc_heap::make_heap_segment ((uint8_t*)mem, size, oh, heap_number); if (result) { @@ -4515,7 +4577,7 @@ gc_heap::get_segment (size_t size, BOOL loh_p) end = (uint8_t*)g_gc_highest_address; } - if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, loh_p) != 0) + if (gc_heap::grow_brick_card_tables (start, end, size, result, __this, uoh_p) != 0) { virtual_free (mem, size); return 0; @@ -4523,7 +4585,7 @@ gc_heap::get_segment (size_t size, BOOL loh_p) } else { - fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, loh_p); + fgm_result.set_fgm (fgm_commit_segment_beg, SEGMENT_INITIAL_COMMIT, uoh_p); virtual_free (mem, size); } @@ -4547,11 +4609,11 @@ gc_heap::get_segment (size_t size, BOOL loh_p) return result; } -void release_segment (heap_segment* sg) +void gc_heap::release_segment (heap_segment* sg) { ptrdiff_t delta = 0; FIRE_EVENT(GCFreeSegment_V1, heap_segment_mem(sg)); - virtual_free (sg, (uint8_t*)heap_segment_reserved (sg)-(uint8_t*)sg); + virtual_free (sg, (uint8_t*)heap_segment_reserved (sg)-(uint8_t*)sg, sg); } heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size @@ -4563,7 +4625,8 @@ heap_segment* gc_heap::get_segment_for_uoh (int gen_number, size_t size #ifndef MULTIPLE_HEAPS gc_heap* hp = 0; #endif //MULTIPLE_HEAPS - heap_segment* res = hp->get_segment (size, TRUE); + gc_oh_num oh = gen_to_oh (gen_number); + heap_segment* res = hp->get_segment (size, oh); if (res != 0) { #ifdef MULTIPLE_HEAPS @@ -5110,7 +5173,7 @@ void add_to_hb_numa ( dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP[p%3d->%3d(i:%3d), N%d] #%4d: %I64d, tid %d, ah: %d, m: %d, p: %d, i: %d", saved_proc_no, proc_no, ideal_proc_no, numa_no, index, - (timestamp - start_raw_ts), tid, alloc_heap, (int)multiple_procs_p, (int)(!alloc_count_p), (int)set_ideal_p)); + (timestamp - start_raw_ts) / 1000, tid, alloc_heap, (int)multiple_procs_p, (int)(!alloc_count_p), (int)set_ideal_p)); if (multiple_procs_p) { @@ -5156,8 +5219,8 @@ void gc_heap::hb_log_balance_activities() #ifdef HEAP_BALANCE_INSTRUMENTATION char* log_buffer = hb_log_buffer; - size_t now = GetHighPrecisionTimeStamp (); - size_t time_since_last_gc_ms = now - last_gc_end_time_ms; + uint64_t now = GetHighPrecisionTimeStamp(); + size_t time_since_last_gc_ms = (size_t)((now - last_gc_end_time_us) / 1000); dprintf (HEAP_BALANCE_TEMP_LOG, ("TEMP%Id - %Id = %Id", now, last_gc_end_time_ms, time_since_last_gc_ms)); // We want to get the min and the max timestamp for all procs because it helps with our post processing @@ -5202,8 +5265,10 @@ void gc_heap::hb_log_balance_activities() int total_entries_on_proc = hb_info_proc->index; if (total_entries_on_proc > 0) { - int total_exec_time_ms = (int)((hb_info_proc->hb_info[total_entries_on_proc - 1].timestamp - hb_info_proc->hb_info[0].timestamp) / (qpf / 1000)); - dprintf (HEAP_BALANCE_LOG, ("[p%d]-%d-%dms", (proc_index + numa_node_index * procs_per_numa_node), total_entries_on_proc, total_exec_time_ms)); + int total_exec_time_ms = + (int)((double)(hb_info_proc->hb_info[total_entries_on_proc - 1].timestamp - hb_info_proc->hb_info[0].timestamp) * qpf_ms); + dprintf (HEAP_BALANCE_LOG, ("[p%d]-%d-%dms", + (proc_index + numa_node_index * procs_per_numa_node), total_entries_on_proc, total_exec_time_ms)); } for (int i = 0; i < hb_info_proc->index; i++) @@ -5396,8 +5461,14 @@ void gc_heap::gc_thread_function () if (heap_number == 0) { - gc_heap::ee_suspend_event.Wait(INFINITE, FALSE); + uint32_t wait_result = gc_heap::ee_suspend_event.Wait(gradual_decommit_in_progress_p ? DECOMMIT_TIME_STEP_MILLISECONDS : INFINITE, FALSE); + if (wait_result == WAIT_TIMEOUT) + { + gradual_decommit_in_progress_p = decommit_step (); + continue; + } + suspended_start_time = GetHighPrecisionTimeStamp(); BEGIN_TIMING(suspend_ee_during_log); GCToEEInterface::SuspendEE(SUSPEND_FOR_GC); END_TIMING(suspend_ee_during_log); @@ -5456,6 +5527,9 @@ void gc_heap::gc_thread_function () gc_heap::gc_started = FALSE; +#ifdef BACKGROUND_GC + gc_heap::add_bgc_pause_duration_0(); +#endif //BACKGROUND_GC BEGIN_TIMING(restart_ee_during_log); GCToEEInterface::RestartEE(TRUE); END_TIMING(restart_ee_during_log); @@ -5478,6 +5552,12 @@ void gc_heap::gc_thread_function () hp->set_gc_done(); } } + + // check if we should do some decommitting + if (gradual_decommit_in_progress_p) + { + gradual_decommit_in_progress_p = decommit_step (); + } } else { @@ -5522,7 +5602,7 @@ bool gc_heap::virtual_alloc_commit_for_heap (void* addr, size_t size, int h_numb return GCToOSInterface::VirtualCommit(addr, size); } -bool gc_heap::virtual_commit (void* address, size_t size, int h_number, bool* hard_limit_exceeded_p) +bool gc_heap::virtual_commit (void* address, size_t size, gc_oh_num oh, int h_number, bool* hard_limit_exceeded_p) { #ifndef HOST_64BIT assert (heap_hard_limit == 0); @@ -5530,11 +5610,17 @@ bool gc_heap::virtual_commit (void* address, size_t size, int h_number, bool* ha if (heap_hard_limit) { - bool exceeded_p = false; - check_commit_cs.Enter(); + bool exceeded_p = false; - if ((current_total_committed + size) > heap_hard_limit) + if (heap_hard_limit_oh[0] != 0) + { + if ((oh != gc_oh_num::none) && (committed_by_oh[oh] + size) > heap_hard_limit_oh[oh]) + { + exceeded_p = true; + } + } + else if ((current_total_committed + size) > heap_hard_limit) { dprintf (1, ("%Id + %Id = %Id > limit %Id ", current_total_committed, size, @@ -5543,8 +5629,10 @@ bool gc_heap::virtual_commit (void* address, size_t size, int h_number, bool* ha exceeded_p = true; } - else + + if (!exceeded_p) { + committed_by_oh[oh] += size; current_total_committed += size; if (h_number < 0) current_total_committed_bookkeeping += size; @@ -5571,6 +5659,8 @@ bool gc_heap::virtual_commit (void* address, size_t size, int h_number, bool* ha if (!commit_succeeded_p && heap_hard_limit) { check_commit_cs.Enter(); + committed_by_oh[oh] -= size; + dprintf (1, ("commit failed, updating %Id to %Id", current_total_committed, (current_total_committed - size))); current_total_committed -= size; @@ -5579,11 +5669,10 @@ bool gc_heap::virtual_commit (void* address, size_t size, int h_number, bool* ha check_commit_cs.Leave(); } - return commit_succeeded_p; } -bool gc_heap::virtual_decommit (void* address, size_t size, int h_number) +bool gc_heap::virtual_decommit (void* address, size_t size, gc_oh_num oh, int h_number) { #ifndef HOST_64BIT assert (heap_hard_limit == 0); @@ -5594,6 +5683,7 @@ bool gc_heap::virtual_decommit (void* address, size_t size, int h_number) if (decommit_succeeded_p && heap_hard_limit) { check_commit_cs.Enter(); + committed_by_oh[oh] -= size; current_total_committed -= size; if (h_number < 0) current_total_committed_bookkeeping -= size; @@ -5603,6 +5693,18 @@ bool gc_heap::virtual_decommit (void* address, size_t size, int h_number) return decommit_succeeded_p; } +void gc_heap::virtual_free (void* add, size_t allocated_size, heap_segment* sg) +{ + assert(!heap_hard_limit); + bool release_succeeded_p = GCToOSInterface::VirtualRelease (add, allocated_size); + if (release_succeeded_p) + { + reserved_memory -= allocated_size; + dprintf (2, ("Virtual Free size %Id: [%Ix, %Ix[", + allocated_size, (size_t)add, (size_t)((uint8_t*)add + allocated_size))); + } +} + class mark { public: @@ -7206,7 +7308,7 @@ uint32_t* gc_heap::make_card_table (uint8_t* start, uint8_t* end) // mark array will be committed separately (per segment). size_t commit_size = alloc_size - ms; - if (!virtual_commit (mem, commit_size)) + if (!virtual_commit (mem, commit_size, gc_oh_num::none)) { dprintf (1, ("Card table commit failed")); GCToOSInterface::VirtualRelease (mem, alloc_size); @@ -7276,7 +7378,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, size_t size, heap_segment* new_seg, gc_heap* hp, - BOOL loh_p) + BOOL uoh_p) { uint8_t* la = g_gc_lowest_address; uint8_t* ha = g_gc_highest_address; @@ -7401,7 +7503,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, if (!mem) { - set_fgm_result (fgm_grow_table, alloc_size, loh_p); + set_fgm_result (fgm_grow_table, alloc_size, uoh_p); goto fail; } @@ -7412,10 +7514,10 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, // mark array will be committed separately (per segment). size_t commit_size = alloc_size - ms; - if (!virtual_commit (mem, commit_size)) + if (!virtual_commit (mem, commit_size, gc_oh_num::none)) { dprintf (GC_TABLE_LOG, ("Table commit failed")); - set_fgm_result (fgm_commit_table, commit_size, loh_p); + set_fgm_result (fgm_commit_table, commit_size, uoh_p); goto fail; } } @@ -7482,14 +7584,14 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, if (!commit_new_mark_array_global (new_mark_array)) { dprintf (GC_TABLE_LOG, ("failed to commit portions in the mark array for existing segments")); - set_fgm_result (fgm_commit_table, logging_ma_commit_size, loh_p); + set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p); goto fail; } if (!commit_mark_array_new_seg (hp, new_seg, translated_ct, saved_g_lowest_address)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg")); - set_fgm_result (fgm_commit_table, logging_ma_commit_size, loh_p); + set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p); goto fail; } } @@ -7606,7 +7708,7 @@ int gc_heap::grow_brick_card_tables (uint8_t* start, if (!commit_mark_array_new_seg (hp, new_seg)) { dprintf (GC_TABLE_LOG, ("failed to commit mark array for the new seg in range")); - set_fgm_result (fgm_commit_table, logging_ma_commit_size, loh_p); + set_fgm_result (fgm_commit_table, logging_ma_commit_size, uoh_p); return -1; } } @@ -9139,11 +9241,12 @@ int gc_heap::object_gennum_plan (uint8_t* o) #pragma optimize("", on) // Go back to command line default optimizations #endif //_MSC_VER && TARGET_X86 -heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, int h_number) +heap_segment* gc_heap::make_heap_segment (uint8_t* new_pages, size_t size, gc_oh_num oh, int h_number) { + assert(oh != gc_oh_num::none); size_t initial_commit = SEGMENT_INITIAL_COMMIT; - if (!virtual_commit (new_pages, initial_commit, h_number)) + if (!virtual_commit (new_pages, initial_commit, oh, h_number)) { return 0; } @@ -9243,22 +9346,40 @@ void gc_heap::decommit_heap_segment_pages (heap_segment* seg, uint8_t* page_start = align_on_page (heap_segment_allocated(seg)); size_t size = heap_segment_committed (seg) - page_start; extra_space = align_on_page (extra_space); - if (size >= max ((extra_space + 2*OS_PAGE_SIZE), 100*OS_PAGE_SIZE)) + if (size >= max ((extra_space + 2*OS_PAGE_SIZE), MIN_DECOMMIT_SIZE)) { page_start += max(extra_space, 32*OS_PAGE_SIZE); - size -= max (extra_space, 32*OS_PAGE_SIZE); + decommit_heap_segment_pages_worker (seg, page_start); + } +} - virtual_decommit (page_start, size, heap_number); - dprintf (3, ("Decommitting heap segment [%Ix, %Ix[(%d)", - (size_t)page_start, - (size_t)(page_start + size), - size)); - heap_segment_committed (seg) = page_start; - if (heap_segment_used (seg) > heap_segment_committed (seg)) +size_t gc_heap::decommit_heap_segment_pages_worker (heap_segment* seg, + uint8_t* new_committed) +{ + assert (!use_large_pages_p); + uint8_t* page_start = align_on_page (new_committed); + size_t size = heap_segment_committed (seg) - page_start; + if (size > 0) + { + bool decommit_succeeded_p = virtual_decommit (page_start, size, heap_segment_oh (seg), heap_number); + if (decommit_succeeded_p) { - heap_segment_used (seg) = heap_segment_committed (seg); + dprintf (3, ("Decommitting heap segment [%Ix, %Ix[(%d)", + (size_t)page_start, + (size_t)(page_start + size), + size)); + heap_segment_committed (seg) = page_start; + if (heap_segment_used (seg) > heap_segment_committed (seg)) + { + heap_segment_used (seg) = heap_segment_committed (seg); + } + } + else + { + dprintf (3, ("Decommitting heap segment failed")); } } + return size; } //decommit all pages except one or 2 @@ -9273,13 +9394,16 @@ void gc_heap::decommit_heap_segment (heap_segment* seg) #endif //BACKGROUND_GC size_t size = heap_segment_committed (seg) - page_start; - virtual_decommit (page_start, size, heap_number); + bool decommit_succeeded_p = virtual_decommit (page_start, size, heap_segment_oh (seg), heap_number); - //re-init the segment object - heap_segment_committed (seg) = page_start; - if (heap_segment_used (seg) > heap_segment_committed (seg)) + if (decommit_succeeded_p) { - heap_segment_used (seg) = heap_segment_committed (seg); + //re-init the segment object + heap_segment_committed (seg) = page_start; + if (heap_segment_used (seg) > heap_segment_committed (seg)) + { + heap_segment_used (seg) = heap_segment_committed (seg); + } } } @@ -9887,7 +10011,8 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, check_commit_cs.Initialize(); } - if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p)) + bool separated_poh_p = use_large_pages_p && heap_hard_limit_oh[0] && (GCConfig::GetGCHeapHardLimitPOH() == 0) && (GCConfig::GetGCHeapHardLimitPOHPercent() == 0); + if (!reserve_initial_memory (soh_segment_size, loh_segment_size, poh_segment_size, number_of_heaps, use_large_pages_p, separated_poh_p)) return E_OUTOFMEMORY; #ifdef CARD_BUNDLE @@ -10027,6 +10152,14 @@ gc_heap::init_semi_shared() } #endif //MARK_LIST +#ifdef MULTIPLE_HEAPS + // gradual decommit: set size to some reasonable value per time interval + max_decommit_step_size = ((DECOMMIT_SIZE_PER_MILLISECOND * DECOMMIT_TIME_STEP_MILLISECONDS) / n_heaps); + + // but do at least MIN_DECOMMIT_SIZE per step to make the OS call worthwhile + max_decommit_step_size = max (max_decommit_step_size, MIN_DECOMMIT_SIZE); +#endif //MULTIPLE_HEAPS + #ifdef FEATURE_BASICFREEZE seg_table = sorted_table::make_sorted_table(); @@ -10050,6 +10183,12 @@ gc_heap::init_semi_shared() memset (full_gc_counts, 0, sizeof (full_gc_counts)); + memset (&last_ephemeral_gc_info, 0, sizeof (last_ephemeral_gc_info)); + memset (&last_full_blocking_gc_info, 0, sizeof (last_full_blocking_gc_info)); +#ifdef BACKGROUND_GC + memset (&last_bgc_info, 0, sizeof (last_bgc_info)); +#endif //BACKGROUND_GC + should_expand_in_full_gc = FALSE; #ifdef FEATURE_LOH_COMPACTION @@ -10897,7 +11036,7 @@ BOOL gc_heap::grow_heap_segment (heap_segment* seg, uint8_t* high_address, bool* "Growing heap_segment: %Ix high address: %Ix\n", (size_t)seg, (size_t)high_address); - bool ret = virtual_commit (heap_segment_committed (seg), c_size, heap_number, hard_limit_exceeded_p); + bool ret = virtual_commit (heap_segment_committed (seg), c_size, heap_segment_oh (seg), heap_number, hard_limit_exceeded_p); if (ret) { heap_segment_committed (seg) += c_size; @@ -12053,7 +12192,7 @@ BOOL gc_heap::a_fit_free_list_uoh_p (size_t size, #endif //FEATURE_LOH_COMPACTION // must fit exactly or leave formattable space - if ((diff == 0) || (diff > (ptrdiff_t)Align (min_obj_size, align_const))) + if ((diff == 0) || (diff >= (ptrdiff_t)Align (min_obj_size, align_const))) { #ifdef BACKGROUND_GC cookie = bgc_alloc_lock->uoh_alloc_set (free_list); @@ -12242,7 +12381,14 @@ BOOL gc_heap::a_fit_segment_end_p (int gen_number, assert(gen_number == 0); assert(allocated > acontext->alloc_ptr); - limit -= (allocated - acontext->alloc_ptr); + size_t extra = allocated - acontext->alloc_ptr; + limit -= extra; + + // Since we are not consuming all the memory we already deducted from the budget, + // we should put the extra back. + dynamic_data* dd = dynamic_data_of (0); + dd_new_allocation (dd) += extra; + // add space for an AC continuity divider limit += Align(min_obj_size, align_const); } @@ -12258,7 +12404,7 @@ BOOL gc_heap::a_fit_segment_end_p (int gen_number, return FALSE; } -BOOL gc_heap::loh_a_fit_segment_end_p (int gen_number, +BOOL gc_heap::uoh_a_fit_segment_end_p (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, @@ -12506,7 +12652,7 @@ allocation_state gc_heap::allocate_soh (int gen_number, } else { - assert (commit_failed_p); + assert (commit_failed_p || heap_hard_limit); soh_alloc_state = a_state_cant_allocate; oom_r = oom_cant_commit; } @@ -12825,7 +12971,7 @@ BOOL gc_heap::uoh_try_fit (int gen_number, if (!a_fit_free_list_uoh_p (size, acontext, flags, align_const, gen_number)) { - can_allocate = loh_a_fit_segment_end_p (gen_number, size, + can_allocate = uoh_a_fit_segment_end_p (gen_number, size, acontext, flags, align_const, commit_failed_p, oom_r); @@ -12934,14 +13080,12 @@ bool gc_heap::should_retry_other_heap (int gen_number, size_t size) #ifdef MULTIPLE_HEAPS if (heap_hard_limit) { - size_t total_heap_committed_recorded = - current_total_committed - current_total_committed_bookkeeping; size_t min_size = dd_min_size (g_heaps[0]->dynamic_data_of (gen_number)); size_t slack_space = max (commit_min_th, min_size); - bool retry_p = ((total_heap_committed_recorded + size) < (heap_hard_limit - slack_space)); + bool retry_p = ((current_total_committed + size) < (heap_hard_limit - slack_space)); dprintf (1, ("%Id - %Id - total committed %Id - size %Id = %Id, %s", - heap_hard_limit, slack_space, total_heap_committed_recorded, size, - (heap_hard_limit - slack_space - total_heap_committed_recorded - size), + heap_hard_limit, slack_space, current_total_committed, size, + (heap_hard_limit - slack_space - current_total_committed - size), (retry_p ? "retry" : "no retry"))); return retry_p; } @@ -15046,6 +15190,27 @@ size_t gc_heap::get_total_allocated() return total_current_allocated; } +size_t gc_heap::get_total_promoted() +{ + size_t total_promoted_size = 0; + int highest_gen = ((settings.condemned_generation == max_generation) ? + (total_generation_count - 1) : settings.condemned_generation); +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < gc_heap::n_heaps; i++) + { + gc_heap* hp = gc_heap::g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + for (int gen_number = 0; gen_number <= highest_gen; gen_number++) + { + total_promoted_size += dd_promoted_size (hp->dynamic_data_of (gen_number)); + } + } + return total_promoted_size; +} + #ifdef BGC_SERVO_TUNING size_t gc_heap::get_total_generation_size (int gen_number) { @@ -15338,7 +15503,7 @@ int gc_heap::generation_to_condemn (int n_initial, (local_settings->pause_mode == pause_sustained_low_latency)) { dynamic_data* dd0 = dynamic_data_of (0); - size_t now = GetHighPrecisionTimeStamp(); + uint64_t now = GetHighPrecisionTimeStamp(); temp_gen = n; for (i = (temp_gen+1); i <= n_time_max; i++) { @@ -15831,11 +15996,11 @@ void fire_overflow_event (uint8_t* overflow_min, void gc_heap::concurrent_print_time_delta (const char* msg) { #ifdef TRACE_GC - size_t current_time = GetHighPrecisionTimeStamp(); - size_t elapsed_time = current_time - time_bgc_last; + uint64_t current_time = GetHighPrecisionTimeStamp(); + size_t elapsed_time_ms = (size_t)((current_time - time_bgc_last) / 1000); time_bgc_last = current_time; - dprintf (2, ("h%d: %s T %Id ms", heap_number, msg, elapsed_time)); + dprintf (2, ("h%d: %s T %Id ms", heap_number, msg, elapsed_time_ms)); #else UNREFERENCED_PARAMETER(msg); #endif //TRACE_GC @@ -15901,6 +16066,26 @@ BOOL gc_heap::should_proceed_with_gc() return TRUE; } +void gc_heap::update_end_gc_time_per_heap() +{ + for (int gen_number = 0; gen_number <= settings.condemned_generation; gen_number++) + { + dynamic_data* dd = dynamic_data_of (gen_number); + dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); + } +} + +void gc_heap::update_end_ngc_time() +{ + end_gc_time = GetHighPrecisionTimeStamp(); +#ifdef HEAP_BALANCE_INSTRUMENTATION + last_gc_end_time_us = end_gc_time; + dprintf (HEAP_BALANCE_LOG, ("[GC#%Id-%Id-%Id]", settings.gc_index, + (last_gc_end_time_us - dd_time_clock (dynamic_data_of (0))), + dd_time_clock (dynamic_data_of (0)))); +#endif //HEAP_BALANCE_INSTRUMENTATION +} + //internal part of gc used by the serial and concurrent version void gc_heap::gc1() { @@ -15979,9 +16164,6 @@ void gc_heap::gc1() } } - size_t end_gc_time = GetHighPrecisionTimeStamp(); -// printf ("generation: %d, elapsed time: %Id\n", n, end_gc_time - dd_time_clock (dynamic_data_of (0))); - //adjust the allocation size from the pinned quantities. for (int gen_number = 0; gen_number <= min (max_generation,n+1); gen_number++) { @@ -16004,12 +16186,13 @@ void gc_heap::gc1() if (settings.concurrent) { dynamic_data* dd = dynamic_data_of (n); - dd_gc_elapsed_time (dd) = end_gc_time - dd_time_clock (dd); + end_gc_time = GetHighPrecisionTimeStamp(); + dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd)); #ifdef HEAP_BALANCE_INSTRUMENTATION if (heap_number == 0) { - last_gc_end_time_ms = end_gc_time; + last_gc_end_time_us = end_gc_time; dprintf (HEAP_BALANCE_LOG, ("[GC#%Id-%Id-BGC]", settings.gc_index, dd_gc_elapsed_time (dd))); } #endif //HEAP_BALANCE_INSTRUMENTATION @@ -16033,17 +16216,12 @@ void gc_heap::gc1() free_list_info (max_generation, "end"); for (int gen_number = 0; gen_number <= n; gen_number++) { - dynamic_data* dd = dynamic_data_of (gen_number); - dd_gc_elapsed_time (dd) = end_gc_time - dd_time_clock (dd); compute_new_dynamic_data (gen_number); } if (n != max_generation) { - // for gen < max_generation - 1, update data for gen + 1 - // for gen == max_generation - 1, update data for max_generation, loh, etc - int highest_update_gen = (n < max_generation - 1) ? n + 1 : total_generation_count - 1; - for (int gen_number = (n + 1); gen_number <= highest_update_gen; gen_number++) + for (int gen_number = (n + 1); gen_number < total_generation_count; gen_number++) { get_gc_data_per_heap()->gen_data[gen_number].size_after = generation_size (gen_number); get_gc_data_per_heap()->gen_data[gen_number].free_list_space_after = generation_free_list_space (generation_of (gen_number)); @@ -16054,26 +16232,6 @@ void gc_heap::gc1() get_gc_data_per_heap()->maxgen_size_info.running_free_list_efficiency = (uint32_t)(generation_allocator_efficiency (generation_of (max_generation)) * 100); free_list_info (max_generation, "after computing new dynamic data"); - - if (heap_number == 0) - { - size_t gc_elapsed_time = dd_gc_elapsed_time (dynamic_data_of (0)); -#ifdef HEAP_BALANCE_INSTRUMENTATION - last_gc_end_time_ms = end_gc_time; - dprintf (HEAP_BALANCE_LOG, ("[GC#%Id-%Id-%Id]", settings.gc_index, gc_elapsed_time, dd_time_clock (dynamic_data_of (0)))); -#endif //HEAP_BALANCE_INSTRUMENTATION - - dprintf (GTC_LOG, ("GC#%d(gen%d) took %Idms", - dd_collection_count (dynamic_data_of (0)), - settings.condemned_generation, - gc_elapsed_time)); - } - - for (int gen_number = 0; gen_number < total_generation_count; gen_number++) - { - dprintf (2, ("end of FGC/NGC: gen%d new_alloc: %Id", - gen_number, dd_desired_allocation (dynamic_data_of (gen_number)))); - } } if (n < max_generation) @@ -16160,14 +16318,6 @@ void gc_heap::gc1() verify_soh_segment_list(); -#ifdef BACKGROUND_GC - add_to_history_per_heap(); - if (heap_number == 0) - { - add_to_history(); - } -#endif // BACKGROUND_GC - #ifdef BACKGROUND_GC assert (settings.concurrent == (uint32_t)(bgc_thread_id.IsCurrentThread())); #endif //BACKGROUND_GC @@ -16408,14 +16558,17 @@ void gc_heap::gc1() } fire_pevents(); + update_end_ngc_time(); pm_full_gc_init_or_clear(); gc_t_join.restart(); } + + update_end_gc_time_per_heap(); + add_to_history_per_heap(); alloc_context_count = 0; heap_select::mark_heap (heap_number); } - #else //MULTIPLE_HEAPS gc_data_global.final_youngest_desired = dd_desired_allocation (dynamic_data_of (0)); @@ -16428,6 +16581,9 @@ void gc_heap::gc1() if (!(settings.concurrent)) { rearrange_uoh_segments(); + update_end_ngc_time(); + update_end_gc_time_per_heap(); + add_to_history_per_heap(); do_post_gc(); } @@ -16890,7 +17046,7 @@ void gc_heap::update_collection_counts () dynamic_data* dd0 = dynamic_data_of (0); dd_gc_clock (dd0) += 1; - size_t now = GetHighPrecisionTimeStamp(); + uint64_t now = GetHighPrecisionTimeStamp(); for (int i = 0; i <= settings.condemned_generation;i++) { @@ -16972,7 +17128,9 @@ BOOL gc_heap::expand_soh_with_minimal_gc() return TRUE; } else + { return FALSE; + } } // Only to be done on the thread that calls restart in a join for server GC @@ -17246,9 +17404,7 @@ void gc_heap::garbage_collect (int n) init_records(); settings.reason = gc_trigger_reason; -#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) num_pinned_objects = 0; -#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE #ifdef STRESS_HEAP if (settings.reason == reason_gcstress) @@ -17596,7 +17752,6 @@ void gc_heap::garbage_collect (int n) done: if (settings.pause_mode == pause_no_gc) allocate_for_no_gc_after_gc(); - } #define mark_stack_empty_p() (mark_stack_base == mark_stack_tos) @@ -17678,12 +17833,12 @@ uint8_t* gc_heap::find_object (uint8_t* interior) { // this is a pointer to a UOH object heap_segment* seg = find_segment (interior, FALSE); - if (seg + if (seg) + { #ifdef FEATURE_CONSERVATIVE_GC - && (GCConfig::GetConservativeGC() || interior <= heap_segment_allocated(seg)) + if ( interior >= heap_segment_allocated(seg)) + return 0; #endif - ) - { // If interior falls within the first free object at the beginning of a generation, // we don't have brick entry for it, and we may incorrectly treat it as on large object heap. int align_const = get_alignment_constant (heap_segment_read_only_p (seg) @@ -19714,10 +19869,10 @@ void gc_heap::process_mark_overflow_internal (int condemned_gen_number, int align_const = get_alignment_constant (i < uoh_start_generation); PREFIX_ASSUME(seg != NULL); - uint8_t* o = max (heap_segment_mem (seg), min_add); while (seg) { + uint8_t* o = max (heap_segment_mem (seg), min_add); uint8_t* end = heap_segment_allocated (seg); while ((o < end) && (o <= max_add)) @@ -20453,12 +20608,9 @@ void gc_heap::pin_object (uint8_t* o, uint8_t** ppObject) } #endif // FEATURE_EVENT_TRACE -#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) num_pinned_objects++; -#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE } -#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) size_t gc_heap::get_total_pinned_objects() { #ifdef MULTIPLE_HEAPS @@ -20473,7 +20625,6 @@ size_t gc_heap::get_total_pinned_objects() return num_pinned_objects; #endif //MULTIPLE_HEAPS } -#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE void gc_heap::reset_mark_stack () { @@ -23972,17 +24123,24 @@ void gc_heap::relocate_address (uint8_t** pold_address THREAD_NUMBER_DCL) } #ifdef FEATURE_LOH_COMPACTION - if (loh_compacted_p) + if (settings.loh_compaction) { heap_segment* pSegment = seg_mapping_table_segment_of ((uint8_t*)old_address); - size_t flags = pSegment->flags; - if ((flags & heap_segment_flags_loh) -#ifdef FEATURE_BASICFREEZE - && !(flags & heap_segment_flags_readonly) +#ifdef MULTIPLE_HEAPS + if (heap_segment_heap (pSegment)->loh_compacted_p) +#else + if (loh_compacted_p) #endif - ) { - *pold_address = old_address + loh_node_relocation_distance (old_address); + size_t flags = pSegment->flags; + if ((flags & heap_segment_flags_loh) +#ifdef FEATURE_BASICFREEZE + && !(flags & heap_segment_flags_readonly) +#endif + ) + { + *pold_address = old_address + loh_node_relocation_distance (old_address); + } } } #endif //FEATURE_LOH_COMPACTION @@ -25788,7 +25946,7 @@ BOOL gc_heap::commit_mark_array_by_range (uint8_t* begin, uint8_t* end, uint32_t size)); #endif //SIMPLE_DPRINTF - if (virtual_commit (commit_start, size)) + if (virtual_commit (commit_start, size, gc_oh_num::none)) { // We can only verify the mark array is cleared from begin to end, the first and the last // page aren't necessarily all cleared 'cause they could be used by other segments or @@ -26012,7 +26170,7 @@ void gc_heap::decommit_mark_array_by_seg (heap_segment* seg) if (decommit_start < decommit_end) { - if (!virtual_decommit (decommit_start, size)) + if (!virtual_decommit (decommit_start, size, gc_oh_num::none)) { dprintf (GC_TABLE_LOG, ("decommit on %Ix for %Id bytes failed", decommit_start, size)); @@ -26321,6 +26479,7 @@ void gc_heap::background_mark_phase () { enter_spin_lock (&gc_lock); + suspended_start_time = GetHighPrecisionTimeStamp(); bgc_suspend_EE (); //suspend_EE (); bgc_threads_sync_event.Set(); @@ -27694,7 +27853,7 @@ void gc_heap::bgc_tuning::record_bgc_start() if (!bgc_tuning::enable_fl_tuning) return; - size_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - start_time; + uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - process_start_time; // Note that younger gen's collection count is always updated with older gen's collections. // So to calcuate the actual # of gen1 occurred we really should take the # of gen2s into @@ -27703,7 +27862,7 @@ void gc_heap::bgc_tuning::record_bgc_start() dprintf (BGC_TUNING_LOG, ("BTL: g2t[st][g1 %Id]: %0.3f minutes", current_gen1_index, - (double)elapsed_time_so_far / (double)1000 / (double)60)); + (double)elapsed_time_so_far / (double)1000000 / (double)60)); actual_num_gen1s_to_trigger = current_gen1_index - gen1_index_last_bgc_end; gen1_index_last_bgc_start = current_gen1_index; @@ -27805,10 +27964,10 @@ void gc_heap::bgc_tuning::record_bgc_sweep_start() size_t num_gen1s_since_start = current_gen1_index - gen1_index_last_bgc_start; gen1_index_last_bgc_sweep = current_gen1_index; - size_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - start_time; + uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - process_start_time; dprintf (BGC_TUNING_LOG, ("BTL: g2t[sw][g1 %Id]: %0.3f minutes", current_gen1_index, - (double)elapsed_time_so_far / (double)1000 / (double)60)); + (double)elapsed_time_so_far / (double)1000000 / (double)60)); update_bgc_sweep_start (max_generation, num_gen1s_since_start); update_bgc_sweep_start (loh_generation, num_gen1s_since_start); @@ -28473,11 +28632,11 @@ void gc_heap::bgc_tuning::record_and_adjust_bgc_end() if (!bgc_tuning::enable_fl_tuning) return; - size_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - start_time; + uint64_t elapsed_time_so_far = GetHighPrecisionTimeStamp() - process_start_time; size_t current_gen1_index = get_current_gc_index (max_generation - 1); dprintf (BGC_TUNING_LOG, ("BTL: g2t[en][g1 %Id]: %0.3f minutes", current_gen1_index, - (double)elapsed_time_so_far / (double)1000 / (double)60)); + (double)elapsed_time_so_far / (double)1000000 / (double)60)); if (fl_tuning_triggered) { @@ -31161,15 +31320,16 @@ bool gc_heap::init_dynamic_data() #ifdef HEAP_BALANCE_INSTRUMENTATION start_raw_ts = now_raw_ts; #endif //HEAP_BALANCE_INSTRUMENTATION - uint32_t now = (uint32_t)(now_raw_ts / (qpf / 1000)); + uint64_t now = (uint64_t)((double)now_raw_ts * qpf_us); set_static_data(); if (heap_number == 0) { + process_start_time = now; smoothed_desired_per_heap = dynamic_data_of (0)->min_size; #ifdef HEAP_BALANCE_INSTRUMENTATION - last_gc_end_time_ms = now; + last_gc_end_time_us = now; dprintf (HEAP_BALANCE_LOG, ("qpf=%I64d, start: %I64d(%d)", qpf, start_raw_ts, now)); #endif //HEAP_BALANCE_INSTRUMENTATION } @@ -31188,11 +31348,6 @@ bool gc_heap::init_dynamic_data() dd->fragmentation = 0; } -#ifdef GC_CONFIG_DRIVEN - if (heap_number == 0) - time_init = now; -#endif //GC_CONFIG_DRIVEN - return true; } @@ -31706,7 +31861,6 @@ void gc_heap::compute_new_dynamic_data (int gen_number) if (i == poh_generation) end_poh_size = total_gen_size; - #endif //BACKGROUND_GC dd_promoted_size (dd) = out; } @@ -31728,56 +31882,133 @@ void gc_heap::trim_youngest_desired_low_memory() void gc_heap::decommit_ephemeral_segment_pages() { - if (settings.concurrent) + if (settings.concurrent || use_large_pages_p) { return; } - size_t slack_space = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment); - - dynamic_data* dd = dynamic_data_of (0); - -#ifndef MULTIPLE_HEAPS - size_t extra_space = (g_low_memory_status ? 0 : (512 * 1024)); - size_t decommit_timeout = (g_low_memory_status ? 0 : GC_EPHEMERAL_DECOMMIT_TIMEOUT); - size_t ephemeral_elapsed = dd_time_clock(dd) - gc_last_ephemeral_decommit_time; + dynamic_data* dd0 = dynamic_data_of (0); - if (dd_desired_allocation (dd) > gc_gen0_desired_high) - { - gc_gen0_desired_high = dd_desired_allocation (dd) + extra_space; - } + // this is how much we are going to allocate in gen 0 + ptrdiff_t desired_allocation = dd_desired_allocation (dd0) + loh_size_threshold; - if (ephemeral_elapsed >= decommit_timeout) + // estimate how we are going to need in gen 1 - estimate half the free list space gets used + dynamic_data* dd1 = dynamic_data_of (1); + ptrdiff_t desired_allocation_1 = dd_new_allocation (dd1) - (generation_free_list_space (generation_of (1)) / 2); + if (desired_allocation_1 > 0) { - slack_space = min (slack_space, gc_gen0_desired_high); - - gc_last_ephemeral_decommit_time = dd_time_clock(dd); - gc_gen0_desired_high = 0; + desired_allocation += desired_allocation_1; } -#endif //!MULTIPLE_HEAPS - if (settings.condemned_generation >= (max_generation-1)) - { - size_t new_slack_space = + size_t slack_space = #ifdef HOST_64BIT - max(min(min(soh_segment_size/32, dd_max_size(dd)), (generation_size (max_generation) / 10)), dd_desired_allocation(dd)); + max(min(min(soh_segment_size/32, dd_max_size (dd0)), (generation_size (max_generation) / 10)), (size_t)desired_allocation); #else #ifdef FEATURE_CORECLR - dd_desired_allocation (dd); + desired_allocation; #else - dd_max_size (dd); + dd_max_size (dd0); #endif //FEATURE_CORECLR #endif // HOST_64BIT - slack_space = min (slack_space, new_slack_space); + uint8_t *decommit_target = heap_segment_allocated (ephemeral_heap_segment) + slack_space; + if (decommit_target < heap_segment_decommit_target (ephemeral_heap_segment)) + { + // we used to have a higher target - do exponential smoothing by computing + // essentially decommit_target = 1/3*decommit_target + 2/3*previous_decommit_target + // computation below is slightly different to avoid overflow + ptrdiff_t target_decrease = heap_segment_decommit_target (ephemeral_heap_segment) - decommit_target; + decommit_target += target_decrease * 2 / 3; } + heap_segment_decommit_target(ephemeral_heap_segment) = decommit_target; + +#ifdef MULTIPLE_HEAPS + if (decommit_target < heap_segment_committed (ephemeral_heap_segment)) + { + gradual_decommit_in_progress_p = TRUE; + } +#ifdef _DEBUG + // these are only for checking against logic errors + ephemeral_heap_segment->saved_committed = heap_segment_committed (ephemeral_heap_segment); + ephemeral_heap_segment->saved_desired_allocation = dd_desired_allocation (dd0); +#endif // _DEBUG +#endif // MULTIPLE_HEAPS + +#ifndef MULTIPLE_HEAPS + // we want to limit the amount of decommit we do per time to indirectly + // limit the amount of time spent in recommit and page faults + size_t ephemeral_elapsed = (size_t)((dd_time_clock (dd0) - gc_last_ephemeral_decommit_time) / 1000); + gc_last_ephemeral_decommit_time = dd_time_clock (dd0); + + // this is the amount we were planning to decommit + ptrdiff_t decommit_size = heap_segment_committed (ephemeral_heap_segment) - decommit_target; + + // we do a max of DECOMMIT_SIZE_PER_MILLISECOND per millisecond of elapsed time since the last GC + // we limit the elapsed time to 10 seconds to avoid spending too much time decommitting + ptrdiff_t max_decommit_size = min (ephemeral_elapsed, (10*1000)) * DECOMMIT_SIZE_PER_MILLISECOND; + decommit_size = min (decommit_size, max_decommit_size); + + slack_space = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment) - decommit_size; decommit_heap_segment_pages (ephemeral_heap_segment, slack_space); +#endif // !MULTIPLE_HEAPS gc_history_per_heap* current_gc_data_per_heap = get_gc_data_per_heap(); current_gc_data_per_heap->extra_gen0_committed = heap_segment_committed (ephemeral_heap_segment) - heap_segment_allocated (ephemeral_heap_segment); } +#ifdef MULTIPLE_HEAPS +// return true if we actually decommitted anything +bool gc_heap::decommit_step () +{ + // should never get here for large pages because decommit_ephemeral_segment_pages + // will not do anything if use_large_pages_p is true + assert (!use_large_pages_p); + + size_t decommit_size = 0; + for (int i = 0; i < n_heaps; i++) + { + gc_heap* hp = gc_heap::g_heaps[i]; + decommit_size += hp->decommit_ephemeral_segment_pages_step (); + } + return (decommit_size != 0); +} + +// return the decommitted size +size_t gc_heap::decommit_ephemeral_segment_pages_step () +{ + // we rely on desired allocation not being changed outside of GC + assert (ephemeral_heap_segment->saved_desired_allocation == dd_desired_allocation (dynamic_data_of (0))); + + uint8_t* decommit_target = heap_segment_decommit_target (ephemeral_heap_segment); + size_t EXTRA_SPACE = 2 * OS_PAGE_SIZE; + decommit_target += EXTRA_SPACE; + uint8_t* committed = heap_segment_committed (ephemeral_heap_segment); + if (decommit_target < committed) + { + // we rely on other threads not messing with committed if we are about to trim it down + assert (ephemeral_heap_segment->saved_committed == heap_segment_committed (ephemeral_heap_segment)); + + // how much would we need to decommit to get to decommit_target in one step? + size_t full_decommit_size = (committed - decommit_target); + + // don't do more than max_decommit_step_size per step + size_t decommit_size = min (max_decommit_step_size, full_decommit_size); + + // figure out where the new committed should be + uint8_t* new_committed = (committed - decommit_size); + size_t size = decommit_heap_segment_pages_worker (ephemeral_heap_segment, new_committed); + +#ifdef _DEBUG + ephemeral_heap_segment->saved_committed = committed - size; +#endif // _DEBUG + + return size; + } + return 0; +} +#endif //MULTIPLE_HEAPS + //This is meant to be called by decide_on_compacting. size_t gc_heap::generation_fragmentation (generation* gen, @@ -32363,7 +32594,7 @@ void reset_memory (uint8_t* o, size_t sizeo) size_t size = align_lower_page ((size_t)o + sizeo - size_to_skip - plug_skew) - page_start; // Note we need to compensate for an OS bug here. This bug would cause the MEM_RESET to fail // on write watched memory. - if (reset_mm_p) + if (reset_mm_p && gc_heap::g_low_memory_status) { #ifdef MULTIPLE_HEAPS bool unlock_p = true; @@ -32932,7 +33163,6 @@ void gc_heap::background_sweep() } } - #ifdef MULTIPLE_HEAPS bgc_t_join.join(this, gc_join_restart_ee); if (bgc_t_join.joined()) @@ -32947,6 +33177,9 @@ void gc_heap::background_sweep() #ifdef BGC_SERVO_TUNING get_and_reset_loh_alloc_info(); #endif //BGC_SERVO_TUNING + uint64_t suspended_end_ts = GetHighPrecisionTimeStamp(); + last_bgc_info[last_bgc_info_index].pause_durations[1] = (size_t)(suspended_end_ts - suspended_start_time); + total_suspended_time += last_bgc_info[last_bgc_info_index].pause_durations[1]; restart_EE (); } @@ -34919,8 +35152,9 @@ HRESULT GCHeap::Initialize() { HRESULT hr = S_OK; - qpf = GCToOSInterface::QueryPerformanceFrequency(); - start_time = GetHighPrecisionTimeStamp(); + qpf = (uint64_t)GCToOSInterface::QueryPerformanceFrequency(); + qpf_ms = 1000.0 / (double)qpf; + qpf_us = 1000.0 * 1000.0 / (double)qpf; g_gc_pFreeObjectMethodTable = GCToEEInterface::GetFreeObjectMethodTable(); g_num_processors = GCToOSInterface::GetTotalProcessorCount(); @@ -34938,6 +35172,62 @@ HRESULT GCHeap::Initialize() #ifdef HOST_64BIT gc_heap::heap_hard_limit = (size_t)GCConfig::GetGCHeapHardLimit(); + gc_heap::heap_hard_limit_oh[0] = (size_t)GCConfig::GetGCHeapHardLimitSOH(); + gc_heap::heap_hard_limit_oh[1] = (size_t)GCConfig::GetGCHeapHardLimitLOH(); + gc_heap::heap_hard_limit_oh[2] = (size_t)GCConfig::GetGCHeapHardLimitPOH(); + + if (gc_heap::heap_hard_limit_oh[0] || gc_heap::heap_hard_limit_oh[1] || gc_heap::heap_hard_limit_oh[2]) + { + if (!gc_heap::heap_hard_limit_oh[0]) + { + return E_INVALIDARG; + } + if (!gc_heap::heap_hard_limit_oh[1]) + { + return E_INVALIDARG; + } + if (gc_heap::heap_hard_limit_oh[2] < min_segment_size_hard_limit) + { + gc_heap::heap_hard_limit_oh[2] = min_segment_size_hard_limit; + } + gc_heap::heap_hard_limit = gc_heap::heap_hard_limit_oh[0] + gc_heap::heap_hard_limit_oh[1] + gc_heap::heap_hard_limit_oh[2]; + } + else + { + uint32_t percent_of_mem_soh = (uint32_t)GCConfig::GetGCHeapHardLimitSOHPercent(); + uint32_t percent_of_mem_loh = (uint32_t)GCConfig::GetGCHeapHardLimitLOHPercent(); + uint32_t percent_of_mem_poh = (uint32_t)GCConfig::GetGCHeapHardLimitPOHPercent(); + if (percent_of_mem_soh || percent_of_mem_loh || percent_of_mem_poh) + { + if ((percent_of_mem_soh <= 0) || (percent_of_mem_soh >= 100)) + { + return E_INVALIDARG; + } + if ((percent_of_mem_loh <= 0) || (percent_of_mem_loh >= 100)) + { + return E_INVALIDARG; + } + else if (percent_of_mem_poh >= 100) + { + return E_INVALIDARG; + } + if ((percent_of_mem_soh + percent_of_mem_loh + percent_of_mem_poh) >= 100) + { + return E_INVALIDARG; + } + gc_heap::heap_hard_limit_oh[0] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_soh / (uint64_t)100); + gc_heap::heap_hard_limit_oh[1] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_loh / (uint64_t)100); + if (percent_of_mem_poh == 0) + { + gc_heap::heap_hard_limit_oh[2] = min_segment_size_hard_limit; + } + else + { + gc_heap::heap_hard_limit_oh[2] = (size_t)(gc_heap::total_physical_mem * (uint64_t)percent_of_mem_poh / (uint64_t)100); + } + gc_heap::heap_hard_limit = gc_heap::heap_hard_limit_oh[0] + gc_heap::heap_hard_limit_oh[1] + gc_heap::heap_hard_limit_oh[2]; + } + } if (!(gc_heap::heap_hard_limit)) { @@ -35024,14 +35314,63 @@ HRESULT GCHeap::Initialize() size_t seg_size = 0; size_t large_seg_size = 0; + size_t pin_seg_size = 0; if (gc_heap::heap_hard_limit) { gc_heap::use_large_pages_p = GCConfig::GetGCLargePages(); - seg_size = gc_heap::get_segment_size_hard_limit (&nhp, (nhp_from_config == 0)); - gc_heap::soh_segment_size = seg_size; - large_seg_size = gc_heap::use_large_pages_p ? seg_size : seg_size * 2; + if (gc_heap::heap_hard_limit_oh[0]) + { +#ifdef MULTIPLE_HEAPS + if (nhp_from_config == 0) + { + for (int i = 0; i < (total_oh_count - 1); i++) + { + uint32_t nhp_oh = (uint32_t)(gc_heap::heap_hard_limit_oh[i] / min_segment_size_hard_limit); + nhp = min (nhp, nhp_oh); + } + if (nhp == 0) + { + nhp = 1; + } + } +#endif + seg_size = gc_heap::heap_hard_limit_oh[0] / nhp; + large_seg_size = gc_heap::heap_hard_limit_oh[1] / nhp; + pin_seg_size = gc_heap::heap_hard_limit_oh[2] / nhp; + + size_t aligned_seg_size = align_on_segment_hard_limit (seg_size); + size_t aligned_large_seg_size = align_on_segment_hard_limit (large_seg_size); + size_t aligned_pin_seg_size = align_on_segment_hard_limit (pin_seg_size); + if (!gc_heap::use_large_pages_p) + { + aligned_seg_size = round_up_power2 (aligned_seg_size); + aligned_large_seg_size = round_up_power2 (aligned_large_seg_size); + aligned_pin_seg_size = round_up_power2 (aligned_pin_seg_size); + } + + size_t seg_size_from_config = (size_t)GCConfig::GetSegmentSize(); + if (seg_size_from_config) + { + size_t aligned_seg_size_config = (gc_heap::use_large_pages_p ? align_on_segment_hard_limit (seg_size) : round_up_power2 (seg_size_from_config)); + aligned_seg_size = max (aligned_seg_size, aligned_seg_size_config); + aligned_large_seg_size = max (aligned_large_seg_size, aligned_seg_size_config); + aligned_pin_seg_size = max (aligned_pin_seg_size, aligned_seg_size_config); + } + + seg_size = aligned_seg_size; + gc_heap::soh_segment_size = seg_size; + large_seg_size = aligned_large_seg_size; + pin_seg_size = aligned_pin_seg_size; + } + else + { + seg_size = gc_heap::get_segment_size_hard_limit (&nhp, (nhp_from_config == 0)); + gc_heap::soh_segment_size = seg_size; + large_seg_size = gc_heap::use_large_pages_p ? seg_size : seg_size * 2; + pin_seg_size = large_seg_size; + } if (gc_heap::use_large_pages_p) gc_heap::min_segment_size = min_segment_size_hard_limit; } @@ -35040,26 +35379,30 @@ HRESULT GCHeap::Initialize() seg_size = get_valid_segment_size(); gc_heap::soh_segment_size = seg_size; large_seg_size = get_valid_segment_size (TRUE); + pin_seg_size = large_seg_size; } + assert (g_theGCHeap->IsValidSegmentSize (seg_size)); + assert (g_theGCHeap->IsValidSegmentSize (large_seg_size)); + assert (g_theGCHeap->IsValidSegmentSize (pin_seg_size)); dprintf (1, ("%d heaps, soh seg size: %Id mb, loh: %Id mb\n", nhp, (seg_size / (size_t)1024 / 1024), (large_seg_size / 1024 / 1024))); - gc_heap::min_uoh_segment_size = large_seg_size; + gc_heap::min_uoh_segment_size = min (large_seg_size, pin_seg_size); if (gc_heap::min_segment_size == 0) { - gc_heap::min_segment_size = min (seg_size, large_seg_size); + gc_heap::min_segment_size = min (seg_size, gc_heap::min_uoh_segment_size); } gc_heap::min_segment_size_shr = index_of_highest_set_bit (gc_heap::min_segment_size); #ifdef MULTIPLE_HEAPS gc_heap::n_heaps = nhp; - hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/, nhp); + hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, pin_seg_size /*poh_segment_size*/, nhp); #else - hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, large_seg_size /*poh_segment_size*/); + hr = gc_heap::initialize_gc (seg_size, large_seg_size /*loh_segment_size*/, pin_seg_size /*poh_segment_size*/); #endif //MULTIPLE_HEAPS if (hr != S_OK) @@ -36072,6 +36415,32 @@ GCHeap::GarbageCollectTry (int generation, BOOL low_memory_p, int mode) return GarbageCollectGeneration (gen, reason); } +#ifdef BACKGROUND_GC +void gc_heap::add_bgc_pause_duration_0() +{ + if (settings.concurrent) + { + uint64_t suspended_end_ts = GetHighPrecisionTimeStamp(); + size_t pause_duration = (size_t)(suspended_end_ts - suspended_start_time); + last_recorded_gc_info* last_gc_info = &(last_bgc_info[last_bgc_info_index]); + last_gc_info->pause_durations[0] = pause_duration; + if (last_gc_info->index < last_ephemeral_gc_info.index) + { + last_gc_info->pause_durations[0] -= last_ephemeral_gc_info.pause_durations[0]; + } + + total_suspended_time += last_gc_info->pause_durations[0]; + } +} + +last_recorded_gc_info* gc_heap::get_completed_bgc_info() +{ + int completed_bgc_index = gc_heap::background_running_p() ? + (int)(!(gc_heap::last_bgc_info_index)) : (int)gc_heap::last_bgc_info_index; + return &gc_heap::last_bgc_info[completed_bgc_index]; +} +#endif //BACKGROUND_GC + void gc_heap::do_pre_gc() { STRESS_LOG_GC_STACK; @@ -36090,6 +36459,11 @@ void gc_heap::do_pre_gc() #ifdef BACKGROUND_GC settings.b_state = hp->current_bgc_state; + if (settings.concurrent) + { + last_bgc_info_index = !last_bgc_info_index; + last_bgc_info[last_bgc_info_index].index = settings.gc_index; + } #endif //BACKGROUND_GC #ifdef TRACE_GC @@ -36429,6 +36803,30 @@ bool gc_heap::is_pm_ratio_exceeded() return maxgen_highfrag_p; } +void gc_heap::update_recorded_gen_data (last_recorded_gc_info* gc_info) +{ +#ifdef MULTIPLE_HEAPS + for (int i = 0; i < gc_heap::n_heaps; i++) + { + gc_heap* hp = gc_heap::g_heaps[i]; +#else //MULTIPLE_HEAPS + { + gc_heap* hp = pGenGCHeap; +#endif //MULTIPLE_HEAPS + + gc_history_per_heap* current_gc_data_per_heap = hp->get_gc_data_per_heap(); + for (int gen_number = 0; gen_number < total_generation_count; gen_number++) + { + recorded_generation_info* recorded_info = &(gc_info->gen_info[gen_number]); + gc_generation_data* data = &(current_gc_data_per_heap->gen_data[gen_number]); + recorded_info->size_before += data->size_before; + recorded_info->fragmentation_before += data->free_list_space_before + data->free_obj_space_before; + recorded_info->size_after += data->size_after; + recorded_info->fragmentation_after += data->free_list_space_after + data->free_obj_space_after; + } + } +} + void gc_heap::do_post_gc() { if (!settings.concurrent) @@ -36449,6 +36847,8 @@ void gc_heap::do_post_gc() (uint32_t)settings.reason, !!settings.concurrent); + add_to_history(); + uint32_t current_memory_load = 0; #ifdef BGC_SERVO_TUNING @@ -36494,7 +36894,7 @@ void gc_heap::do_post_gc() dprintf (1, ("*EGC* %Id(gen0:%Id)(%Id)(%d)(%s)(%s)(%s)(ml: %d->%d)", VolatileLoad(&settings.gc_index), dd_collection_count(hp->dynamic_data_of(0)), - GetHighPrecisionTimeStamp(), + (size_t)(GetHighPrecisionTimeStamp() / 1000), settings.condemned_generation, (settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC")), (settings.compaction ? "C" : "S"), @@ -36503,23 +36903,70 @@ void gc_heap::do_post_gc() current_memory_load)); #endif //SIMPLE_DPRINTF + // Now record the gc info. + last_recorded_gc_info* last_gc_info = 0; + if (settings.concurrent) + { + last_gc_info = &last_bgc_info[last_bgc_info_index]; + assert (last_gc_info->index == settings.gc_index); + } + else + { + last_gc_info = ((settings.condemned_generation == max_generation) ? + &last_full_blocking_gc_info : &last_ephemeral_gc_info); + last_gc_info->index = settings.gc_index; + } + size_t total_heap_committed = get_total_committed_size(); + last_gc_info->total_committed = total_heap_committed; + last_gc_info->promoted = get_total_promoted(); + last_gc_info->pinned_objects = get_total_pinned_objects(); + last_gc_info->finalize_promoted_objects = GCHeap::GetFinalizablePromotedCount(); + + if (!settings.concurrent) + { + // If it's a normal blocking GC with its own SuspendEE, we simply get the elapsed time recoreded + // and add the time between SuspendEE start and GC start. + dynamic_data* dd = hp->dynamic_data_of (settings.condemned_generation); + uint64_t gc_start_ts = dd_time_clock (dd); + size_t pause_duration = (size_t)(end_gc_time - dd_time_clock (dd)); + + if ((hp->current_bgc_state != bgc_initialized) && (settings.reason != reason_pm_full_gc)) + { + pause_duration += (size_t)(gc_start_ts - suspended_start_time); + } + + last_gc_info->pause_durations[0] = pause_duration; + total_suspended_time += pause_duration; + last_gc_info->pause_durations[1] = 0; + } + + uint64_t total_process_time = end_gc_time - process_start_time; + last_gc_info->pause_percentage = (float)(total_process_time ? + ((double)total_suspended_time / (double)total_process_time * 100.0) : 0); + + update_recorded_gen_data (last_gc_info); + last_gc_info->heap_size = get_total_heap_size(); + last_gc_info->fragmentation = get_total_fragmentation(); if (settings.exit_memory_load != 0) - last_gc_memory_load = settings.exit_memory_load; + last_gc_info->memory_load = settings.exit_memory_load; else if (settings.entry_memory_load != 0) - last_gc_memory_load = settings.entry_memory_load; + last_gc_info->memory_load = settings.entry_memory_load; + last_gc_info->condemned_generation = settings.condemned_generation; + last_gc_info->compaction = settings.compaction; + last_gc_info->concurrent = settings.concurrent; - last_gc_heap_size = get_total_heap_size(); - last_gc_fragmentation = get_total_fragmentation(); +#ifdef BACKGROUND_GC + is_last_recorded_bgc = settings.concurrent; +#endif //BACKGROUND_GC #ifdef TRACE_GC if (heap_hard_limit) { - size_t total_heap_committed = get_total_committed_size(); size_t total_heap_committed_recorded = current_total_committed - current_total_committed_bookkeeping; dprintf (1, ("(%d)GC commit END #%Id: %Id (recorded: %Id), heap %Id, frag: %Id", settings.condemned_generation, (size_t)settings.gc_index, total_heap_committed, total_heap_committed_recorded, - last_gc_heap_size, last_gc_fragmentation)); + last_gc_info->heap_size, last_gc_info->fragmentation)); } #endif //TRACE_GC @@ -36668,6 +37115,7 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason) cooperative_mode = gc_heap::enable_preemptive (); dprintf (2, ("Suspending EE")); + gc_heap::suspended_start_time = GetHighPrecisionTimeStamp(); BEGIN_TIMING(suspend_ee_during_log); GCToEEInterface::SuspendEE(SUSPEND_FOR_GC); END_TIMING(suspend_ee_during_log); @@ -36728,14 +37176,15 @@ GCHeap::GarbageCollectGeneration (unsigned int gen, gc_reason reason) #ifndef MULTIPLE_HEAPS #ifdef BACKGROUND_GC if (!gc_heap::dont_restart_ee_p) +#endif //BACKGROUND_GC { +#ifdef BACKGROUND_GC + gc_heap::add_bgc_pause_duration_0(); #endif //BACKGROUND_GC BEGIN_TIMING(restart_ee_during_log); GCToEEInterface::RestartEE(TRUE); END_TIMING(restart_ee_during_log); -#ifdef BACKGROUND_GC } -#endif //BACKGROUND_GC #endif //!MULTIPLE_HEAPS #ifndef MULTIPLE_HEAPS @@ -36927,16 +37376,110 @@ unsigned int GCHeap::GetCondemnedGeneration() void GCHeap::GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, uint64_t* totalAvailableMemoryBytes, uint64_t* lastRecordedMemLoadBytes, - uint32_t* lastRecordedMemLoadPct, - size_t* lastRecordedHeapSizeBytes, - size_t* lastRecordedFragmentationBytes) + uint64_t* lastRecordedHeapSizeBytes, + uint64_t* lastRecordedFragmentationBytes, + uint64_t* totalCommittedBytes, + uint64_t* promotedBytes, + uint64_t* pinnedObjectCount, + uint64_t* finalizationPendingCount, + uint64_t* index, + uint32_t* generation, + uint32_t* pauseTimePct, + bool* isCompaction, + bool* isConcurrent, + uint64_t* genInfoRaw, + uint64_t* pauseInfoRaw, + int kind) { - *highMemLoadThresholdBytes = (uint64_t) (((double)gc_heap::high_memory_load_th) / 100 * gc_heap::total_physical_mem); + last_recorded_gc_info* last_gc_info = 0; + + if ((gc_kind)kind == gc_kind_ephemeral) + { + last_gc_info = &gc_heap::last_ephemeral_gc_info; + } + else if ((gc_kind)kind == gc_kind_full_blocking) + { + last_gc_info = &gc_heap::last_full_blocking_gc_info; + } +#ifdef BACKGROUND_GC + else if ((gc_kind)kind == gc_kind_background) + { + last_gc_info = gc_heap::get_completed_bgc_info(); + } +#endif //BACKGROUND_GC + else + { + assert ((gc_kind)kind == gc_kind_any); +#ifdef BACKGROUND_GC + if (gc_heap::is_last_recorded_bgc) + { + last_gc_info = gc_heap::get_completed_bgc_info(); + } + else +#endif //BACKGROUND_GC + { + last_gc_info = ((gc_heap::last_ephemeral_gc_info.index > gc_heap::last_full_blocking_gc_info.index) ? + &gc_heap::last_ephemeral_gc_info : &gc_heap::last_full_blocking_gc_info); + } + } + + *highMemLoadThresholdBytes = (uint64_t) (((double)(gc_heap::high_memory_load_th)) / 100 * gc_heap::total_physical_mem); *totalAvailableMemoryBytes = gc_heap::heap_hard_limit != 0 ? gc_heap::heap_hard_limit : gc_heap::total_physical_mem; - *lastRecordedMemLoadBytes = (uint64_t) (((double)gc_heap::last_gc_memory_load) / 100 * gc_heap::total_physical_mem); - *lastRecordedMemLoadPct = gc_heap::last_gc_memory_load; - *lastRecordedHeapSizeBytes = gc_heap::last_gc_heap_size; - *lastRecordedFragmentationBytes = gc_heap::last_gc_fragmentation; + *lastRecordedMemLoadBytes = (uint64_t) (((double)(last_gc_info->memory_load)) / 100 * gc_heap::total_physical_mem); + *lastRecordedHeapSizeBytes = last_gc_info->heap_size; + *lastRecordedFragmentationBytes = last_gc_info->fragmentation; + *totalCommittedBytes = last_gc_info->total_committed; + *promotedBytes = last_gc_info->promoted; + *pinnedObjectCount = last_gc_info->pinned_objects; + *finalizationPendingCount = last_gc_info->finalize_promoted_objects; + *index = last_gc_info->index; + *generation = last_gc_info->condemned_generation; + *pauseTimePct = (int)(last_gc_info->pause_percentage * 100); + *isCompaction = last_gc_info->compaction; + *isConcurrent = last_gc_info->concurrent; + int genInfoIndex = 0; + for (int i = 0; i < total_generation_count; i++) + { + genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].size_before; + genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].fragmentation_before; + genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].size_after; + genInfoRaw[genInfoIndex++] = last_gc_info->gen_info[i].fragmentation_after; + } + for (int i = 0; i < 2; i++) + { + // convert it to 100-ns units that TimeSpan needs. + pauseInfoRaw[i] = (uint64_t)(last_gc_info->pause_durations[i]) * 10; + } + +#ifdef _DEBUG + if ((gc_kind)kind == gc_kind_ephemeral) + { + assert (last_gc_info->condemned_generation < max_generation); + } + else if ((gc_kind)kind == gc_kind_full_blocking) + { + assert (last_gc_info->condemned_generation == max_generation); + assert (last_gc_info->concurrent == false); + } +#ifdef BACKGROUND_GC + else if ((gc_kind)kind == gc_kind_background) + { + assert (last_gc_info->condemned_generation == max_generation); + assert (last_gc_info->concurrent == true); + } +#endif //BACKGROUND_GC +#endif //_DEBUG +} + +uint32_t GCHeap::GetMemoryLoad() +{ + uint32_t memory_load = 0; + if (gc_heap::settings.exit_memory_load != 0) + memory_load = gc_heap::settings.exit_memory_load; + else if (gc_heap::settings.entry_memory_load != 0) + memory_load = gc_heap::settings.entry_memory_load; + + return memory_load; } int GCHeap::GetGcLatencyMode() @@ -37284,26 +37827,6 @@ size_t GCHeap::GetFinalizablePromotedCount() #endif //MULTIPLE_HEAPS } -bool GCHeap::ShouldRestartFinalizerWatchDog() -{ - // This condition was historically used as part of the condition to detect finalizer thread timeouts - return gc_heap::gc_lock.lock != -1; -} - -void GCHeap::SetFinalizeQueueForShutdown(bool fHasLock) -{ -#ifdef MULTIPLE_HEAPS - for (int hn = 0; hn < gc_heap::n_heaps; hn++) - { - gc_heap* hp = gc_heap::g_heaps [hn]; - hp->finalize_queue->SetSegForShutDown(fHasLock); - } - -#else //MULTIPLE_HEAPS - pGenGCHeap->finalize_queue->SetSegForShutDown(fHasLock); -#endif //MULTIPLE_HEAPS -} - //--------------------------------------------------------------------------- // Finalized class tracking //--------------------------------------------------------------------------- @@ -37436,18 +37959,9 @@ CFinalize::RegisterForFinalization (int gen, Object* obj, size_t size) } CONTRACTL_END; EnterFinalizeLock(); - // Adjust gen - unsigned int dest = 0; - - if (g_fFinalizerRunOnShutDown) - { - //put it in the finalizer queue and sort out when - //dequeueing - dest = FinalizerListSeg; - } - else - dest = gen_segment (gen); + // Adjust gen + unsigned int dest = gen_segment (gen); // Adjust boundary for segments so that GC will keep objects alive. Object*** s_i = &SegQueue (FreeList); @@ -37503,24 +38017,9 @@ CFinalize::GetNextFinalizableObject (BOOL only_non_critical) Object* obj = 0; EnterFinalizeLock(); -retry: if (!IsSegEmpty(FinalizerListSeg)) { - if (g_fFinalizerRunOnShutDown) - { - obj = *(SegQueueLimit (FinalizerListSeg)-1); - if (method_table(obj)->HasCriticalFinalizer()) - { - MoveItem ((SegQueueLimit (FinalizerListSeg)-1), - FinalizerListSeg, CriticalFinalizerListSeg); - goto retry; - } - else - --SegQueueLimit (FinalizerListSeg); - } - else - obj = *(--SegQueueLimit (FinalizerListSeg)); - + obj = *(--SegQueueLimit (FinalizerListSeg)); } else if (!only_non_critical && !IsSegEmpty(CriticalFinalizerListSeg)) { @@ -37537,52 +38036,10 @@ CFinalize::GetNextFinalizableObject (BOOL only_non_critical) return obj; } -void -CFinalize::SetSegForShutDown(BOOL fHasLock) -{ - int i; - - if (!fHasLock) - EnterFinalizeLock(); - for (i = 0; i <= max_generation; i++) - { - unsigned int seg = gen_segment (i); - Object** startIndex = SegQueueLimit (seg)-1; - Object** stopIndex = SegQueue (seg); - for (Object** po = startIndex; po >= stopIndex; po--) - { - Object* obj = *po; - if (method_table(obj)->HasCriticalFinalizer()) - { - MoveItem (po, seg, CriticalFinalizerListSeg); - } - else - { - MoveItem (po, seg, FinalizerListSeg); - } - } - } - if (!fHasLock) - LeaveFinalizeLock(); -} - -void -CFinalize::DiscardNonCriticalObjects() -{ - //empty the finalization queue - Object** startIndex = SegQueueLimit (FinalizerListSeg)-1; - Object** stopIndex = SegQueue (FinalizerListSeg); - for (Object** po = startIndex; po >= stopIndex; po--) - { - MoveItem (po, FinalizerListSeg, FreeList); - } -} - size_t CFinalize::GetNumberFinalizableObjects() { - return SegQueueLimit (FinalizerListSeg) - - (g_fFinalizerRunOnShutDown ? m_Array : SegQueue(FinalizerListSeg)); + return SegQueueLimit(FinalizerListSeg) - SegQueue(FinalizerListSeg); } void @@ -38296,11 +38753,6 @@ bool GCHeap::IsConcurrentGCEnabled() #endif //BACKGROUND_GC } -void GCHeap::SetFinalizeRunOnShutdown(bool value) -{ - g_fFinalizerRunOnShutDown = value; -} - void PopulateDacVars(GcDacVars *gcDacVars) { #ifndef DACCESS_COMPILE @@ -38312,6 +38764,7 @@ void PopulateDacVars(GcDacVars *gcDacVars) gcDacVars->build_variant = &g_build_variant; gcDacVars->gc_structures_invalid_cnt = const_cast(&GCScan::m_GcStructuresInvalidCnt); gcDacVars->generation_size = sizeof(generation); + gcDacVars->total_generation_count = total_generation_count; gcDacVars->max_gen = &g_max_generation; #ifndef MULTIPLE_HEAPS gcDacVars->mark_array = &gc_heap::mark_array; diff --git a/src/Native/gc/gcconfig.h b/src/Native/gc/gcconfig.h index e3fc6d1e9af..3ff7a1dc292 100644 --- a/src/Native/gc/gcconfig.h +++ b/src/Native/gc/gcconfig.h @@ -121,7 +121,13 @@ class GCConfigStringHolder INT_CONFIG (BGCFLEnableSmooth, "BGCFLEnableSmooth", NULL, 0, "Enables smoothing") \ INT_CONFIG (BGCFLEnableTBH, "BGCFLEnableTBH", NULL, 0, "Enables TBH") \ INT_CONFIG (BGCFLEnableFF, "BGCFLEnableFF", NULL, 0, "Enables FF") \ - INT_CONFIG (BGCG2RatioStep, "BGCG2RatioStep", NULL, 5, "Ratio correction factor for ML loop") + INT_CONFIG (BGCG2RatioStep, "BGCG2RatioStep", NULL, 5, "Ratio correction factor for ML loop") \ + INT_CONFIG (GCHeapHardLimitSOH, "GCHeapHardLimitSOH", NULL, 0, "Specifies a hard limit for the GC heap SOH") \ + INT_CONFIG (GCHeapHardLimitLOH, "GCHeapHardLimitLOH", NULL, 0, "Specifies a hard limit for the GC heap LOH") \ + INT_CONFIG (GCHeapHardLimitPOH, "GCHeapHardLimitPOH", NULL, 0, "Specifies a hard limit for the GC heap POH") \ + INT_CONFIG (GCHeapHardLimitSOHPercent, "GCHeapHardLimitSOHPercent", NULL, 0, "Specifies the GC heap SOH usage as a percentage of the total memory") \ + INT_CONFIG (GCHeapHardLimitLOHPercent, "GCHeapHardLimitLOHPercent", NULL, 0, "Specifies the GC heap LOH usage as a percentage of the total memory") \ + INT_CONFIG (GCHeapHardLimitPOHPercent, "GCHeapHardLimitPOHPercent", NULL, 0, "Specifies the GC heap POH usage as a percentage of the total memory") \ // This class is responsible for retreiving configuration information // for how the GC should operate. diff --git a/src/Native/gc/gcee.cpp b/src/Native/gc/gcee.cpp index abe36ccd9da..2964b14190e 100644 --- a/src/Native/gc/gcee.cpp +++ b/src/Native/gc/gcee.cpp @@ -228,7 +228,7 @@ size_t GCHeap::GetLastGCStartTime(int generation) gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS - return dd_time_clock (hp->dynamic_data_of (generation)); + return (size_t)(dd_time_clock (hp->dynamic_data_of (generation)) / 1000); } size_t GCHeap::GetLastGCDuration(int generation) @@ -239,14 +239,14 @@ size_t GCHeap::GetLastGCDuration(int generation) gc_heap* hp = pGenGCHeap; #endif //MULTIPLE_HEAPS - return dd_gc_elapsed_time (hp->dynamic_data_of (generation)); + return (size_t)(dd_gc_elapsed_time (hp->dynamic_data_of (generation)) / 1000); } -size_t GetHighPrecisionTimeStamp(); +uint64_t GetHighPrecisionTimeStamp(); size_t GCHeap::GetNow() { - return GetHighPrecisionTimeStamp(); + return (size_t)(GetHighPrecisionTimeStamp() / 1000); } bool GCHeap::IsGCInProgressHelper (bool bConsiderGCStart) diff --git a/src/Native/gc/gcimpl.h b/src/Native/gc/gcimpl.h index 5f93f0f619d..d1f062efb44 100644 --- a/src/Native/gc/gcimpl.h +++ b/src/Native/gc/gcimpl.h @@ -167,9 +167,22 @@ class GCHeap : public IGCHeapInternal void GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, uint64_t* totalAvailableMemoryBytes, uint64_t* lastRecordedMemLoadBytes, - uint32_t* lastRecordedMemLoadPct, - size_t* lastRecordedHeapSizeBytes, - size_t* lastRecordedFragmentationBytes); + uint64_t* lastRecordedHeapSizeBytes, + uint64_t* lastRecordedFragmentationBytes, + uint64_t* totalCommittedBytes, + uint64_t* promotedBytes, + uint64_t* pinnedObjectCount, + uint64_t* finalizationPendingCount, + uint64_t* index, + uint32_t* generation, + uint32_t* pauseTimePct, + bool* isCompaction, + bool* isConcurrent, + uint64_t* genInfoRaw, + uint64_t* pauseInfoRaw, + int kind);; + + uint32_t GetMemoryLoad(); int GetGcLatencyMode(); int SetGcLatencyMode(int newLatencyMode); @@ -201,12 +214,8 @@ class GCHeap : public IGCHeapInternal PER_HEAP_ISOLATED size_t GetNumberFinalizableObjects(); PER_HEAP_ISOLATED size_t GetFinalizablePromotedCount(); - void SetFinalizeQueueForShutdown(bool fHasLock); - bool ShouldRestartFinalizerWatchDog(); - void DiagWalkObject (Object* obj, walk_fn fn, void* context); void DiagWalkObject2 (Object* obj, walk_fn2 fn, void* context); - void SetFinalizeRunOnShutdown(bool value); public: // FIX diff --git a/src/Native/gc/gcinterface.dac.h b/src/Native/gc/gcinterface.dac.h index a0684a8fa84..348279b5b69 100644 --- a/src/Native/gc/gcinterface.dac.h +++ b/src/Native/gc/gcinterface.dac.h @@ -17,6 +17,12 @@ #define MAX_GC_MECHANISM_BITS_COUNT 2 #define MAX_GLOBAL_GC_MECHANISMS_COUNT 6 +// The number of generations is hardcoded in to the dac APIS (DacpGcHeapDetails hard codes the size of its arrays) +// The number of generations is hardcoded into some older dac APIS (for example DacpGcHeapDetails hard codes the size of its arrays) +// This value cannot change and should not be used in new DAC APIs. New APIs can query GcDacVars.total_generation_count +// variable which is dynamically initialized at runtime + + #define NUMBERGENERATIONS 4 #define INITIAL_HANDLE_TABLE_ARRAY_SIZE 10 #define HANDLE_MAX_INTERNAL_TYPES 12 @@ -183,6 +189,7 @@ struct GcDacVars { uint8_t major_version_number; uint8_t minor_version_number; size_t generation_size; + size_t total_generation_count; #ifdef DACCESS_COMPILE #define GC_DAC_VAR(type, name) DPTR(type) name; #define GC_DAC_PTR_VAR(type, name) DPTR(type*) name; diff --git a/src/Native/gc/gcinterface.h b/src/Native/gc/gcinterface.h index 6c72fe0c5b9..331f8e12210 100644 --- a/src/Native/gc/gcinterface.h +++ b/src/Native/gc/gcinterface.h @@ -296,6 +296,14 @@ enum end_no_gc_region_status end_no_gc_alloc_exceeded = 3 }; +enum gc_kind +{ + gc_kind_any = 0, // any of the following kind + gc_kind_ephemeral = 1, // gen0 or gen1 GC + gc_kind_full_blocking = 2, // blocking gen2 GC + gc_kind_background = 3 // background GC (always gen2) +}; + typedef enum { /* @@ -571,23 +579,12 @@ class IGCHeap { =========================================================================== */ - // Finalizes all registered objects for shutdown, even if they are still reachable. - virtual void SetFinalizeQueueForShutdown(bool fHasLock) = 0; - // Gets the number of finalizable objects. virtual size_t GetNumberOfFinalizable() = 0; - // Traditionally used by the finalizer thread on shutdown to determine - // whether or not to time out. Returns true if the GC lock has not been taken. - virtual bool ShouldRestartFinalizerWatchDog() = 0; - // Gets the next finalizable object. virtual Object* GetNextFinalizable() = 0; - // Sets whether or not the GC should report all finalizable objects as - // ready to be finalized, instead of only collectable objects. - virtual void SetFinalizeRunOnShutdown(bool value) = 0; - /* =========================================================================== BCL routines. These are routines that are directly exposed by mscorlib @@ -596,20 +593,48 @@ class IGCHeap { =========================================================================== */ - // Gets memory related information - + // Gets memory related information the last GC observed. Depending on the last arg, this could + // be any last GC that got recorded, or of the kind specified by this arg. All info below is + // what was observed by that last GC. + // // highMemLoadThreshold - physical memory load (in percentage) when GC will start to - // react aggressively to reclaim memory. + // react aggressively to reclaim memory. // totalPhysicalMem - the total amount of phyiscal memory available on the machine and the memory - // limit set on the container if running in a container. - // lastRecordedMemLoad - physical memory load in percentage recorded in the last GC - // lastRecordedHeapSize - total managed heap size recorded in the last GC - // lastRecordedFragmentation - total fragmentation in the managed heap recorded in the last GC + // limit set on the container if running in a container. + // lastRecordedMemLoad - physical memory load in percentage. + // lastRecordedHeapSizeBytes - total managed heap size. + // lastRecordedFragmentation - total fragmentation in the managed heap. + // totalCommittedBytes - total committed bytes by the managed heap. + // promotedBytes - promoted bytes. + // pinnedObjectCount - # of pinned objects observed. + // finalizationPendingCount - # of objects ready for finalization. + // index - the index of the GC. + // generation - the generation the GC collected. + // pauseTimePct - the % pause time in GC so far since process started. + // isCompaction - compacted or not. + // isConcurrent - concurrent or not. + // genInfoRaw - info about each generation. + // pauseInfoRaw - pause info. virtual void GetMemoryInfo(uint64_t* highMemLoadThresholdBytes, - uint64_t* totalPhysicalMemoryBytes, + uint64_t* totalAvailableMemoryBytes, uint64_t* lastRecordedMemLoadBytes, - uint32_t* lastRecordedMemLoadPct, - size_t* lastRecordedHeapSizeBytes, - size_t* lastRecordedFragmentationBytes) = 0; + uint64_t* lastRecordedHeapSizeBytes, + uint64_t* lastRecordedFragmentationBytes, + uint64_t* totalCommittedBytes, + uint64_t* promotedBytes, + uint64_t* pinnedObjectCount, + uint64_t* finalizationPendingCount, + uint64_t* index, + uint32_t* generation, + uint32_t* pauseTimePct, + bool* isCompaction, + bool* isConcurrent, + uint64_t* genInfoRaw, + uint64_t* pauseInfoRaw, + int kind) = 0; + + // Get the last memory load in percentage observed by the last GC. + virtual uint32_t GetMemoryLoad() = 0; // Gets the current GC latency mode. virtual int GetGcLatencyMode() = 0; diff --git a/src/Native/gc/gcpriv.h b/src/Native/gc/gcpriv.h index e3a6c1f5b60..62844b74eb5 100644 --- a/src/Native/gc/gcpriv.h +++ b/src/Native/gc/gcpriv.h @@ -223,6 +223,7 @@ const int policy_expand = 2; #define JOIN_LOG (MIN_CUSTOM_LOG_LEVEL + 6) #define SPINLOCK_LOG (MIN_CUSTOM_LOG_LEVEL + 7) #define SNOOP_LOG (MIN_CUSTOM_LOG_LEVEL + 8) +#define COMMIT_ACCOUNTING_LOG (MIN_CUSTOM_LOG_LEVEL + 9) // NOTE! This is for HEAP_BALANCE_INSTRUMENTATION // This particular one is special and needs to be well formatted because we @@ -398,6 +399,17 @@ enum gc_tuning_point tuning_deciding_short_on_seg = 5 }; +enum gc_oh_num +{ + soh = 0, + loh = 1, + poh = 2, + none = 3, + total_oh_count = 4 +}; + +gc_oh_num gen_to_oh (int gen); + #if defined(TRACE_GC) && defined(BACKGROUND_GC) static const char * const str_bgc_state[] = { @@ -750,7 +762,7 @@ struct static_data float fragmentation_burden_limit; float limit; float max_limit; - size_t time_clock; // time after which to collect generation, in performance counts (see QueryPerformanceCounter) + uint64_t time_clock; // time after which to collect generation, in performance counts (see QueryPerformanceCounter) size_t gc_clock; // nubmer of gcs after which to collect generation }; @@ -791,7 +803,7 @@ class dynamic_data size_t freach_previous_promotion; size_t fragmentation; //fragmentation when we don't compact size_t gc_clock; //gc# when last GC happened - size_t time_clock; //time when last gc started + uint64_t time_clock; //time when last gc started size_t gc_elapsed_time; // Time it took for the gc to complete float gc_speed; // speed in bytes/msec for the gc to complete @@ -800,6 +812,32 @@ class dynamic_data static_data* sdata; }; +struct recorded_generation_info +{ + size_t size_before; + size_t fragmentation_before; + size_t size_after; + size_t fragmentation_after; +}; + +struct last_recorded_gc_info +{ + VOLATILE(size_t) index; + size_t total_committed; + size_t promoted; + size_t pinned_objects; + size_t finalize_promoted_objects; + size_t pause_durations[2]; + float pause_percentage; + recorded_generation_info gen_info[total_generation_count]; + size_t heap_size; + size_t fragmentation; + uint32_t memory_load; + uint8_t condemned_generation; + bool compaction; + bool concurrent; +}; + #define ro_in_entry 0x1 // Note that I am storing both h0 and seg0, even though in Server GC you can get to @@ -1138,6 +1176,7 @@ class gc_heap static heap_segment* make_heap_segment (uint8_t* new_pages, size_t size, + gc_oh_num oh, int h_number); static @@ -1215,6 +1254,21 @@ class gc_heap PER_HEAP_ISOLATED void do_post_gc(); + PER_HEAP_ISOLATED + void update_recorded_gen_data (last_recorded_gc_info* gc_info); + + PER_HEAP + void update_end_gc_time_per_heap(); + + PER_HEAP_ISOLATED + void update_end_ngc_time(); + + PER_HEAP + void add_to_history_per_heap(); + + PER_HEAP_ISOLATED + void add_to_history(); + #ifdef BGC_SERVO_TUNING PER_HEAP_ISOLATED void check_and_adjust_bgc_tuning (int gen_number, size_t physical_size, ptrdiff_t virtual_fl_size); @@ -1269,6 +1323,11 @@ class gc_heap #endif // FEATURE_BASICFREEZE protected: + PER_HEAP_ISOLATED + BOOL reserve_initial_memory (size_t normal_size, size_t large_size, size_t pinned_size, int num_heaps, bool use_large_pages_p, bool separated_poh_p); + + PER_HEAP_ISOLATED + void destroy_initial_memory(); PER_HEAP_ISOLATED void walk_heap (walk_fn fn, void* context, int gen_number, BOOL walk_large_object_heap_p); @@ -1508,7 +1567,7 @@ class gc_heap int align_const, BOOL* commit_failed_p); PER_HEAP - BOOL loh_a_fit_segment_end_p (int gen_number, + BOOL uoh_a_fit_segment_end_p (int gen_number, size_t size, alloc_context* acontext, uint32_t flags, @@ -1631,7 +1690,9 @@ class gc_heap PER_HEAP heap_segment* soh_get_segment_to_expand(); PER_HEAP - heap_segment* get_segment (size_t size, BOOL loh_p); + heap_segment* get_segment (size_t size, gc_oh_num oh); + PER_HEAP_ISOLATED + void release_segment (heap_segment* sg); PER_HEAP_ISOLATED void seg_mapping_table_add_segment (heap_segment* seg, gc_heap* hp); PER_HEAP_ISOLATED @@ -1651,13 +1712,21 @@ class gc_heap PER_HEAP void decommit_heap_segment_pages (heap_segment* seg, size_t extra_space); PER_HEAP + size_t decommit_ephemeral_segment_pages_step (); + PER_HEAP + size_t decommit_heap_segment_pages_worker (heap_segment* seg, uint8_t *new_committed); + PER_HEAP_ISOLATED + bool decommit_step (); + PER_HEAP void decommit_heap_segment (heap_segment* seg); PER_HEAP_ISOLATED bool virtual_alloc_commit_for_heap (void* addr, size_t size, int h_number); PER_HEAP_ISOLATED - bool virtual_commit (void* address, size_t size, int h_number=-1, bool* hard_limit_exceeded_p=NULL); + bool virtual_commit (void* address, size_t size, gc_oh_num oh, int h_number=-1, bool* hard_limit_exceeded_p=NULL); + PER_HEAP_ISOLATED + bool virtual_decommit (void* address, size_t size, gc_oh_num oh, int h_number=-1); PER_HEAP_ISOLATED - bool virtual_decommit (void* address, size_t size, int h_number=-1); + void virtual_free (void* add, size_t size, heap_segment* sg=NULL); PER_HEAP void clear_gen0_bricks(); #ifdef BACKGROUND_GC @@ -2327,10 +2396,8 @@ class gc_heap PER_HEAP void pin_object (uint8_t* o, uint8_t** ppObject); -#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) PER_HEAP_ISOLATED size_t get_total_pinned_objects(); -#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE PER_HEAP void reset_mark_stack (); @@ -2446,8 +2513,6 @@ class gc_heap void check_loh_compact_mode (BOOL all_heaps_compacted_p); #endif //FEATURE_LOH_COMPACTION - PER_HEAP - void decommit_ephemeral_segment_pages (int condemned_gen_number); PER_HEAP void fix_generation_bounds (int condemned_gen_number, generation* consing_gen); @@ -2773,6 +2838,8 @@ class gc_heap size_t get_current_allocated(); PER_HEAP_ISOLATED size_t get_total_allocated(); + PER_HEAP_ISOLATED + size_t get_total_promoted(); #ifdef BGC_SERVO_TUNING PER_HEAP_ISOLATED size_t get_total_generation_size (int gen_number); @@ -3253,10 +3320,7 @@ class gc_heap gc_history_global gc_data_global; PER_HEAP_ISOLATED - size_t gc_last_ephemeral_decommit_time; - - PER_HEAP_ISOLATED - size_t gc_gen0_desired_high; + uint64_t gc_last_ephemeral_decommit_time; PER_HEAP size_t gen0_big_free_spaces; @@ -3266,19 +3330,70 @@ class gc_heap double short_plugs_pad_ratio; #endif //SHORT_PLUGS -#ifdef HOST_64BIT + // We record the time GC work is done while EE is suspended. + // suspended_start_ts is what we get right before we call + // SuspendEE. We omit the time between GC end and RestartEE + // because it's very short and by the time we are calling it + // the settings may have changed and we'd have to do more work + // to figure out the right GC to record info of. + // + // The complications are the GCs triggered without their own + // SuspendEE, in which case we will record that GC's duration + // as its pause duration and the rest toward the GC that + // the SuspendEE was for. The ephemeral GC we might trigger + // at the beginning of a BGC and the PM triggered full GCs + // fall into this case. PER_HEAP_ISOLATED - size_t youngest_gen_desired_th; -#endif //HOST_64BIT + uint64_t suspended_start_time; PER_HEAP_ISOLATED - uint32_t last_gc_memory_load; + uint64_t end_gc_time; PER_HEAP_ISOLATED - size_t last_gc_heap_size; + uint64_t total_suspended_time; PER_HEAP_ISOLATED - size_t last_gc_fragmentation; + uint64_t process_start_time; + + PER_HEAP_ISOLATED + last_recorded_gc_info last_ephemeral_gc_info; + + PER_HEAP_ISOLATED + last_recorded_gc_info last_full_blocking_gc_info; + +#ifdef BACKGROUND_GC + // If the user didn't specify which kind of GC info to return, we need + // to return the last recorded one. There's a complication with BGC as BGC + // end runs concurrently. If 2 BGCs run back to back, we can't have one + // update the info while the user thread is reading it (and we'd still like + // to return the last BGC info otherwise if we only did BGCs we could frequently + // return nothing). So we maintain 2 of these for BGC and the older one is + // guaranteed to be consistent. + PER_HEAP_ISOLATED + last_recorded_gc_info last_bgc_info[2]; + // This is either 0 or 1. + PER_HEAP_ISOLATED + VOLATILE(int) last_bgc_info_index; + // Since a BGC can finish later than blocking GCs with larger indices, + // we can't just compare the index recorded in the GC info. We use this + // to know whether we should be looking for a bgc info or a blocking GC, + // if the user asks for the latest GC info of any kind. + // This can only go from false to true concurrently so if it is true, + // it means the bgc info is ready. + PER_HEAP_ISOLATED + VOLATILE(bool) is_last_recorded_bgc; + + PER_HEAP_ISOLATED + void add_bgc_pause_duration_0(); + + PER_HEAP_ISOLATED + last_recorded_gc_info* get_completed_bgc_info(); +#endif //BACKGROUND_GC + +#ifdef HOST_64BIT + PER_HEAP_ISOLATED + size_t youngest_gen_desired_th; +#endif //HOST_64BIT PER_HEAP_ISOLATED uint32_t high_memory_load_th; @@ -3360,12 +3475,18 @@ class gc_heap PER_HEAP_ISOLATED size_t heap_hard_limit; + PER_HEAP_ISOLATED + size_t heap_hard_limit_oh[total_oh_count - 1]; + PER_HEAP_ISOLATED CLRCriticalSection check_commit_cs; PER_HEAP_ISOLATED size_t current_total_committed; + PER_HEAP_ISOLATED + size_t committed_by_oh[total_oh_count]; + // This is what GC uses for its own bookkeeping. PER_HEAP_ISOLATED size_t current_total_committed_bookkeeping; @@ -3380,7 +3501,7 @@ class gc_heap #ifdef HEAP_BALANCE_INSTRUMENTATION PER_HEAP_ISOLATED - size_t last_gc_end_time_ms; + size_t last_gc_end_time_us; #endif //HEAP_BALANCE_INSTRUMENTATION PER_HEAP_ISOLATED @@ -3427,7 +3548,7 @@ class gc_heap #endif //MULTIPLE_HEAPS PER_HEAP - size_t time_bgc_last; + uint64_t time_bgc_last; PER_HEAP uint8_t* gc_low; // lowest address being condemned @@ -3455,10 +3576,8 @@ class gc_heap PER_HEAP uint8_t* oldest_pinned_plug; -#if defined(ENABLE_PERF_COUNTERS) || defined(FEATURE_EVENT_TRACE) PER_HEAP size_t num_pinned_objects; -#endif //ENABLE_PERF_COUNTERS || FEATURE_EVENT_TRACE #ifdef FEATURE_LOH_COMPACTION PER_HEAP @@ -3595,12 +3714,6 @@ class gc_heap PER_HEAP_ISOLATED gc_mechanisms_store gchist[max_history_count]; - PER_HEAP - void add_to_history_per_heap(); - - PER_HEAP_ISOLATED - void add_to_history(); - PER_HEAP size_t total_promoted_bytes; @@ -3785,6 +3898,14 @@ class gc_heap PER_HEAP_ISOLATED BOOL proceed_with_gc_p; +#ifdef MULTIPLE_HEAPS + PER_HEAP_ISOLATED + BOOL gradual_decommit_in_progress_p; + + PER_HEAP_ISOLATED + size_t max_decommit_step_size; +#endif //MULTIPLE_HEAPS + #define youngest_generation (generation_of (0)) #define large_object_generation (generation_of (loh_generation)) #define pinned_object_generation (generation_of (poh_generation)) @@ -4020,12 +4141,6 @@ class gc_heap size_t eph_gen_starts_size; #ifdef GC_CONFIG_DRIVEN - PER_HEAP_ISOLATED - size_t time_init; - - PER_HEAP_ISOLATED - size_t time_since_init; - // 0 stores compacting GCs; // 1 stores sweeping GCs; PER_HEAP_ISOLATED @@ -4285,9 +4400,7 @@ class CFinalize size_t GetPromotedCount(); //Methods used by the shutdown code to call every finalizer - void SetSegForShutDown(BOOL fHasLock); size_t GetNumberFinalizableObjects(); - void DiscardNonCriticalObjects(); void CheckFinalizerObjects(); }; @@ -4423,7 +4536,7 @@ size_t& dd_gc_clock (dynamic_data* inst) return inst->gc_clock; } inline -size_t& dd_time_clock (dynamic_data* inst) +uint64_t& dd_time_clock (dynamic_data* inst) { return inst->time_clock; } @@ -4434,7 +4547,7 @@ size_t& dd_gc_clock_interval (dynamic_data* inst) return inst->sdata->gc_clock; } inline -size_t& dd_time_clock_interval (dynamic_data* inst) +uint64_t& dd_time_clock_interval (dynamic_data* inst) { return inst->sdata->time_clock; } @@ -4698,7 +4811,12 @@ class heap_segment uint8_t* background_allocated; #ifdef MULTIPLE_HEAPS gc_heap* heap; +#ifdef _DEBUG + uint8_t* saved_committed; + size_t saved_desired_allocation; +#endif // _DEBUG #endif //MULTIPLE_HEAPS + uint8_t* decommit_target; uint8_t* plan_allocated; uint8_t* saved_bg_allocated; @@ -4735,6 +4853,11 @@ uint8_t*& heap_segment_committed (heap_segment* inst) return inst->committed; } inline +uint8_t*& heap_segment_decommit_target (heap_segment* inst) +{ + return inst->decommit_target; +} +inline uint8_t*& heap_segment_used (heap_segment* inst) { return inst->used; @@ -4771,6 +4894,22 @@ BOOL heap_segment_uoh_p (heap_segment * inst) return !!(inst->flags & (heap_segment_flags_loh | heap_segment_flags_poh)); } +inline gc_oh_num heap_segment_oh (heap_segment * inst) +{ + if ((inst->flags & heap_segment_flags_loh) != 0) + { + return gc_oh_num::loh; + } + else if ((inst->flags & heap_segment_flags_poh) != 0) + { + return gc_oh_num::poh; + } + else + { + return gc_oh_num::soh; + } +} + #ifdef BACKGROUND_GC inline BOOL heap_segment_decommitted_p (heap_segment * inst) diff --git a/src/Native/libunwind/src/AddressSpace.hpp b/src/Native/libunwind/src/AddressSpace.hpp index fb07c807db9..389be0a5066 100644 --- a/src/Native/libunwind/src/AddressSpace.hpp +++ b/src/Native/libunwind/src/AddressSpace.hpp @@ -117,12 +117,23 @@ namespace libunwind { // __eh_frame_hdr_start = SIZEOF(.eh_frame_hdr) > 0 ? ADDR(.eh_frame_hdr) : 0; // __eh_frame_hdr_end = SIZEOF(.eh_frame_hdr) > 0 ? . : 0; +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX extern char __eh_frame_start; extern char __eh_frame_end; +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) -extern char __eh_frame_hdr_start; -extern char __eh_frame_hdr_end; + +#ifndef _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START +#define _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START __eh_frame_hdr_start +#endif + +#ifndef _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END +#define _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END __eh_frame_hdr_end +#endif + +extern char _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START; +extern char _LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END; #endif #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) @@ -161,9 +172,11 @@ struct UnwindInfoSections { uintptr_t dso_base; #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX uintptr_t dwarf_section; uintptr_t dwarf_section_length; #endif +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) uintptr_t dwarf_index_section; uintptr_t dwarf_index_section_length; @@ -401,18 +414,31 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, } #elif defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX info.dwarf_section_length = (uintptr_t)(&__eh_frame_end - &__eh_frame_start); - info.dwarf_section = (uintptr_t)(&__eh_frame_start); + info.dwarf_section = (uintptr_t)(&__eh_frame_start); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: section %p length %p", - (void *)info.dwarf_section, (void *)info.dwarf_section_length); + (void *)info.dwarf_section, + (void *)info.dwarf_section_length); +#endif #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) - info.dwarf_index_section = (uintptr_t)(&__eh_frame_hdr_start); - info.dwarf_index_section_length = (uintptr_t)(&__eh_frame_hdr_end - &__eh_frame_hdr_start); + info.dwarf_index_section = + (uintptr_t)(&(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START)); + info.dwarf_index_section_length = + (uintptr_t)(&(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_END) - + &(_LIBUNWIND_BAREMETAL_DWARF_INDEX_SEC_START)); _LIBUNWIND_TRACE_UNWINDING("findUnwindSections: index section %p length %p", - (void *)info.dwarf_index_section, (void *)info.dwarf_index_section_length); + (void *)info.dwarf_index_section, + (void *)info.dwarf_index_section_length); #endif + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX if (info.dwarf_section_length) return true; +#else + if (info.dwarf_index_section_length > 0) + return true; +#endif #elif defined(_LIBUNWIND_ARM_EHABI) && defined(_LIBUNWIND_IS_BAREMETAL) // Bare metal is statically linked, so no need to ask the dynamic loader info.arm_section = (uintptr_t)(&__exidx_start); diff --git a/src/Native/libunwind/src/DwarfInstructions.hpp b/src/Native/libunwind/src/DwarfInstructions.hpp index c5cc6c9d510..f341772824c 100644 --- a/src/Native/libunwind/src/DwarfInstructions.hpp +++ b/src/Native/libunwind/src/DwarfInstructions.hpp @@ -169,6 +169,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // restore registers that DWARF says were saved R newRegisters = registers; pint_t returnAddress = 0; + pint_t returnAddressLocation = 0; const int lastReg = R::lastDwarfRegNum(); assert(static_cast(CFI_Parser::kMaxRegisterNumber) >= lastReg && "register range too large"); @@ -177,7 +178,14 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, for (int i = 0; i <= lastReg; ++i) { if (prolog.savedRegisters[i].location != CFI_Parser::kRegisterUnused) { - if (registers.validFloatRegister(i)) + if (i == (int)cieInfo.returnAddressRegister) { + returnAddress = getSavedRegister(addressSpace, registers, cfa, + prolog.savedRegisters[i], + returnAddressLocation); + + newRegisters.setRegister(i, returnAddress, returnAddressLocation); + } + else if (registers.validFloatRegister(i)) newRegisters.setFloatRegister( i, getSavedFloatRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); @@ -185,12 +193,6 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, newRegisters.setVectorRegister( i, getSavedVectorRegister(addressSpace, registers, cfa, prolog.savedRegisters[i])); - else if (i == (int)cieInfo.returnAddressRegister) { - pint_t dummyLocation; - returnAddress = getSavedRegister(addressSpace, registers, cfa, - prolog.savedRegisters[i], - dummyLocation); - } else if (registers.validRegister(i)) { pint_t value; pint_t location; @@ -272,7 +274,7 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, // Return address is address after call site instruction, so setting IP to // that does simualates a return. - newRegisters.setIP(returnAddress, 0); + newRegisters.setIP(returnAddress, returnAddressLocation); // Simulate the step by replacing the register set with the new ones. registers = newRegisters; diff --git a/src/Native/libunwind/src/UnwindCursor.hpp b/src/Native/libunwind/src/UnwindCursor.hpp index ae5cbe7479e..29c3dc7733a 100644 --- a/src/Native/libunwind/src/UnwindCursor.hpp +++ b/src/Native/libunwind/src/UnwindCursor.hpp @@ -1474,6 +1474,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, typename CFI_Parser::CIE_Info cieInfo; bool foundFDE = false; bool foundInCache = false; + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX // If compact encoding table gave offset into dwarf section, go directly there if (fdeSectionOffsetHint != 0) { foundFDE = CFI_Parser::findFDE(_addressSpace, pc, sects.dwarf_section, @@ -1481,6 +1483,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, sects.dwarf_section + fdeSectionOffsetHint, &fdeInfo, &cieInfo); } +#endif + #if defined(_LIBUNWIND_SUPPORT_DWARF_INDEX) if (!foundFDE && (sects.dwarf_index_section != 0)) { foundFDE = EHHeaderParser::findFDE( @@ -1488,6 +1492,8 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, (uint32_t)sects.dwarf_index_section_length, &fdeInfo, &cieInfo); } #endif + +#ifndef _LIBUNWIND_USE_ONLY_DWARF_INDEX if (!foundFDE) { // otherwise, search cache of previously found FDEs. pint_t cachedFDE = DwarfFDECache::findFDE(sects.dso_base, pc); @@ -1505,6 +1511,7 @@ bool UnwindCursor::getInfoFromDwarfSection(pint_t pc, (uint32_t)sects.dwarf_section_length, 0, &fdeInfo, &cieInfo); } +#endif if (foundFDE) { typename CFI_Parser::PrologInfo prolog; if (CFI_Parser::parseFDEInstructions(_addressSpace, fdeInfo, cieInfo, pc, @@ -1896,7 +1903,11 @@ void UnwindCursor::setInfoBasedOnIPRegister(bool isReturnAddress) { #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) // If there is dwarf unwind info, look there next. +#if defined(_LIBUNWIND_USE_ONLY_DWARF_INDEX) + if (sects.dwarf_index_section != 0) { +#else if (sects.dwarf_section != 0) { +#endif if (this->getInfoFromDwarfSection(pc, sects)) { // found info in dwarf, done return; diff --git a/src/Runtime.Base/src/Runtime.Base.csproj b/src/Runtime.Base/src/Runtime.Base.csproj index 50213144a2c..d11304a115b 100644 --- a/src/Runtime.Base/src/Runtime.Base.csproj +++ b/src/Runtime.Base/src/Runtime.Base.csproj @@ -21,6 +21,9 @@ FEATURE_64BIT_ALIGNMENT;$(DefineConstants) + + FEATURE_64BIT_ALIGNMENT;$(DefineConstants) + diff --git a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.HillClimbing.cs b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.HillClimbing.cs index e29e9e198d1..a84b75228cb 100644 --- a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.HillClimbing.cs +++ b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.HillClimbing.cs @@ -88,8 +88,8 @@ private struct LogEntry private readonly Random _randomIntervalGenerator = new Random(); private readonly LogEntry[] _log = new LogEntry[LogCapacity]; - private int _logStart = 0; - private int _logSize = 0; + private int _logStart; + private int _logSize; public HillClimbing(int wavePeriod, int maxWaveMagnitude, double waveMagnitudeMultiplier, int waveHistorySize, double targetThroughputRatio, double targetSignalToNoiseRatio, double maxChangePerSecond, double maxChangePerSample, int sampleIntervalMsLow, int sampleIntervalMsHigh, @@ -184,8 +184,11 @@ public HillClimbing(int wavePeriod, int maxWaveMagnitude, double waveMagnitudeMu // Add the current thread count and throughput sample to our history // double throughput = numCompletions / sampleDurationSeconds; - - PortableThreadPoolEventSource.Log.WorkerThreadAdjustmentSample(throughput); + PortableThreadPoolEventSource log = PortableThreadPoolEventSource.Log; + if (log.IsEnabled()) + { + log.WorkerThreadAdjustmentSample(throughput); + } int sampleIndex = (int)(_totalSamples % _samplesToMeasure); _samples[sampleIndex] = throughput; @@ -355,8 +358,11 @@ public HillClimbing(int wavePeriod, int maxWaveMagnitude, double waveMagnitudeMu // Record these numbers for posterity // - PortableThreadPoolEventSource.Log.WorkerThreadAdjustmentStats(sampleDurationSeconds, throughput, threadWaveComponent.Real, throughputWaveComponent.Real, + if (log.IsEnabled()) + { + log.WorkerThreadAdjustmentStats(sampleDurationSeconds, throughput, threadWaveComponent.Real, throughputWaveComponent.Real, throughputErrorEstimate, _averageThroughputNoise, ratio.Real, confidence, _currentControlSetting, (ushort)newThreadWaveMagnitude); + } // @@ -413,7 +419,11 @@ private void LogTransition(int newThreadCount, double throughput, StateOrTransit _logSize++; - PortableThreadPoolEventSource.Log.WorkerThreadAdjustmentAdjustment(throughput, newThreadCount, (int)stateOrTransition); + PortableThreadPoolEventSource log = PortableThreadPoolEventSource.Log; + if (log.IsEnabled()) + { + log.WorkerThreadAdjustmentAdjustment(throughput, newThreadCount, (int)stateOrTransition); + } } public void ForceChange(int newThreadCount, StateOrTransition state) diff --git a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WaitThread.cs b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WaitThread.cs index b7e78ac3b61..78a3b2db829 100644 --- a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WaitThread.cs +++ b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WaitThread.cs @@ -150,7 +150,7 @@ public CompletedWaitHandle(RegisteredWaitHandle completedHandle, bool timedOut) /// /// The number of user-registered waits on this wait thread. /// - private int _numUserWaits = 0; + private int _numUserWaits; /// /// A list of removals of wait handles that are waiting for the wait thread to process. @@ -159,7 +159,7 @@ public CompletedWaitHandle(RegisteredWaitHandle completedHandle, bool timedOut) /// /// The number of pending removals. /// - private int _numPendingRemoves = 0; + private int _numPendingRemoves; /// /// An event to notify the wait thread that there are pending adds or removals of wait handles so it needs to wake up. diff --git a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WorkerThread.cs b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WorkerThread.cs index dfab32c3e6c..2cedaa5e3bc 100644 --- a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WorkerThread.cs +++ b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.WorkerThread.cs @@ -25,7 +25,11 @@ private static int SemaphoreSpinCount private static void WorkerThreadStart() { - PortableThreadPoolEventSource.Log.WorkerThreadStart(ThreadCounts.VolatileReadCounts(ref ThreadPoolInstance._separated.counts).numExistingThreads); + PortableThreadPoolEventSource log = PortableThreadPoolEventSource.Log; + if (log.IsEnabled()) + { + log.WorkerThreadStart(ThreadCounts.VolatileReadCounts(ref ThreadPoolInstance._separated.counts).numExistingThreads); + } while (true) { @@ -70,7 +74,11 @@ private static void WorkerThreadStart() if (oldCounts == counts) { HillClimbing.ThreadPoolHillClimber.ForceChange(newCounts.numThreadsGoal, HillClimbing.StateOrTransition.ThreadTimedOut); - PortableThreadPoolEventSource.Log.WorkerThreadStop(newCounts.numExistingThreads); + + if (log.IsEnabled()) + { + log.WorkerThreadStop(newCounts.numExistingThreads); + } return; } } @@ -88,7 +96,11 @@ private static void WorkerThreadStart() /// If this thread was woken up before it timed out. private static bool WaitForRequest() { - PortableThreadPoolEventSource.Log.WorkerThreadWait(ThreadCounts.VolatileReadCounts(ref ThreadPoolInstance._separated.counts).numExistingThreads); + PortableThreadPoolEventSource log = PortableThreadPoolEventSource.Log; + if (log.IsEnabled()) + { + log.WorkerThreadWait(ThreadCounts.VolatileReadCounts(ref ThreadPoolInstance._separated.counts).numExistingThreads); + } return s_semaphore.Wait(ThreadPoolThreadTimeoutMs); } diff --git a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.cs b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.cs index 2794e5cf29a..de23cda5e60 100644 --- a/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.cs +++ b/src/System.Private.CoreLib/shared/System/Threading/PortableThreadPool.cs @@ -27,7 +27,7 @@ internal sealed partial class PortableThreadPool private const int CpuUtilizationHigh = 95; private const int CpuUtilizationLow = 80; - private int _cpuUtilization = 0; + private int _cpuUtilization; private static readonly short s_forcedMinWorkerThreads = AppContextConfigHelper.GetInt16Config("System.Threading.ThreadPool.MinThreads", 0, false); private static readonly short s_forcedMaxWorkerThreads = AppContextConfigHelper.GetInt16Config("System.Threading.ThreadPool.MaxThreads", 0, false); @@ -63,7 +63,7 @@ private struct CacheLineSeparated private readonly LowLevelLock _hillClimbingThreadAdjustmentLock = new LowLevelLock(); - private volatile int _numRequestedWorkers = 0; + private volatile int _numRequestedWorkers; private PortableThreadPool() { diff --git a/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj b/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj index 13f869da10d..14d5c0bbefd 100644 --- a/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj +++ b/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj @@ -495,6 +495,7 @@ INPLACE_RUNTIME;$(DefineConstants) FEATURE_64BIT_ALIGNMENT;$(DefineConstants) FEATURE_64BIT_ALIGNMENT;$(DefineConstants) + FEATURE_64BIT_ALIGNMENT;$(DefineConstants) diff --git a/src/System.Private.CoreLib/src/System/Environment.CoreRT.cs b/src/System.Private.CoreLib/src/System/Environment.CoreRT.cs index 42b326a5365..52ae5d3ca84 100644 --- a/src/System.Private.CoreLib/src/System/Environment.CoreRT.cs +++ b/src/System.Private.CoreLib/src/System/Environment.CoreRT.cs @@ -55,7 +55,7 @@ internal static void ShutdownCore() { // TODO: shut down threading etc. -#if !TARGET_WASM // WASMTODO +#if !TARGET_WASM // WASMTODO Be careful what happens here as if the code has called emscripten_set_main_loop then the main loop method will normally be called repeatedly after this method AppContext.OnProcessExit(); #endif } diff --git a/src/System.Private.CoreLib/src/System/GC.cs b/src/System.Private.CoreLib/src/System/GC.cs index 95b49568237..ed3b17beb09 100644 --- a/src/System.Private.CoreLib/src/System/GC.cs +++ b/src/System.Private.CoreLib/src/System/GC.cs @@ -66,6 +66,55 @@ internal enum EndNoGCRegionStatus AllocationExceeded = 3 } + internal struct GCGenerationInfo + { + public long SizeBeforeBytes { get; } + public long FragmentationBeforeBytes { get; } + public long SizeAfterBytes { get; } + public long FragmentationAfterBytes { get; } + } + + [StructLayout(LayoutKind.Sequential)] + internal struct GCMemoryInfoData + { + internal long _highMemoryLoadThresholdBytes; + internal long _totalAvailableMemoryBytes; + internal long _memoryLoadBytes; + internal long _heapSizeBytes; + internal long _fragmentedBytes; + internal long _totalCommittedBytes; + internal long _promotedBytes; + internal long _pinnedObjectsCount; + internal long _finalizationPendingCount; + internal long _index; + internal int _generation; + internal int _pauseTimePercentage; + internal bool _compacted; + internal bool _concurrent; + + private GCGenerationInfo _generationInfo0; + private GCGenerationInfo _generationInfo1; + private GCGenerationInfo _generationInfo2; + private GCGenerationInfo _generationInfo3; + private GCGenerationInfo _generationInfo4; + + internal ReadOnlySpan GenerationInfoAsSpan => MemoryMarshal.CreateReadOnlySpan(ref _generationInfo0, 5); + + private TimeSpan _pauseDuration0; + private TimeSpan _pauseDuration1; + + internal ReadOnlySpan PauseDurationsAsSpan => MemoryMarshal.CreateReadOnlySpan(ref _pauseDuration0, 2); + } + + // TODO: deduplicate with shared CoreLib + public enum GCKind + { + Any = 0, // any of the following kind + Ephemeral = 1, // gen0 or gen1 GC + FullBlocking = 2, // blocking gen2 GC + Background = 3 // background GC (always gen2) + }; + public static class GC { public static int GetGeneration(object obj) @@ -656,18 +705,13 @@ public static long GetTotalAllocatedBytes(bool precise = false) public static GCMemoryInfo GetGCMemoryInfo() { - RuntimeImports.RhGetMemoryInfo(out ulong highMemLoadThresholdBytes, - out ulong totalAvailableMemoryBytes, - out ulong lastRecordedMemLoadBytes, - out uint _, - out UIntPtr lastRecordedHeapSizeBytes, - out UIntPtr lastRecordedFragmentationBytes); - - return new GCMemoryInfo(highMemoryLoadThresholdBytes: (long)highMemLoadThresholdBytes, - memoryLoadBytes: (long)lastRecordedMemLoadBytes, - totalAvailableMemoryBytes: (long)totalAvailableMemoryBytes, - heapSizeBytes: (long)(ulong)lastRecordedHeapSizeBytes, - fragmentedBytes: (long)(ulong)lastRecordedFragmentationBytes); + RuntimeImports.RhGetMemoryInfo(out GCMemoryInfoData data, GCKind.Any); + + return new GCMemoryInfo(highMemoryLoadThresholdBytes: data._highMemoryLoadThresholdBytes, + memoryLoadBytes: data._memoryLoadBytes, + totalAvailableMemoryBytes: data._totalAvailableMemoryBytes, + heapSizeBytes: data._heapSizeBytes, + fragmentedBytes: data._fragmentedBytes); } internal static ulong GetSegmentSize() diff --git a/src/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs b/src/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs index 4b941c175c9..8f640172c77 100644 --- a/src/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs +++ b/src/System.Private.CoreLib/src/System/Runtime/RuntimeImports.cs @@ -208,13 +208,7 @@ internal static void RhWaitForPendingFinalizers(bool allowReentrantWait) [MethodImpl(MethodImplOptions.InternalCall)] [RuntimeImport(RuntimeLibrary, "RhGetMemoryInfo")] - internal static extern void RhGetMemoryInfo(out ulong highMemLoadThresholdBytes, - out ulong totalAvailableMemoryBytes, - out ulong lastRecordedMemLoadBytes, - out uint lastRecordedMemLoadPct, - // The next two are size_t - out UIntPtr lastRecordedHeapSizeBytes, - out UIntPtr lastRecordedFragmentationBytes); + internal static extern void RhGetMemoryInfo(out GCMemoryInfoData info, GCKind kind); [DllImport(RuntimeLibrary, ExactSpelling = true)] internal static unsafe extern void RhAllocateNewArray(IntPtr pArrayEEType, uint numElements, uint flags, void* pResult); diff --git a/tests/src/Simple/HelloWasm/Program.cs b/tests/src/Simple/HelloWasm/Program.cs index 5c428ce1e60..86803907590 100644 --- a/tests/src/Simple/HelloWasm/Program.cs +++ b/tests/src/Simple/HelloWasm/Program.cs @@ -21,6 +21,11 @@ internal static class Program internal static bool Success; private static unsafe int Main(string[] args) { + var x = new StructWithObjRefs + { + C1 = null, + C2 = null, + }; Success = true; PrintLine("Starting " + 1); @@ -393,15 +398,235 @@ private static void TestGC() PrintString("GC Collection Count " + i.ToString() + " "); PrintLine(GC.CollectionCount(i).ToString()); } - if(!TestObjectRefInUncoveredShadowStackSlot()) + + if (!TestObjectRefInUncoveredShadowStackSlot()) { FailTest("struct Child1 alive unexpectedly"); - } - EndTest(true); + + if (!TestRhpAssignRefWithClassInStructGC()) + { + FailTest(); + return; + } + + EndTest(TestGeneration2Rooting()); } + private static Parent aParent; + private static ParentOfStructWithObjRefs aParentOfStructWithObjRefs; private static WeakReference childRef; + + private static unsafe bool TestRhpAssignRefWithClassInStructGC() + { + bool result = true; + + var parentRef = CreateParentWithStruct(); + result &= BumpToGen(parentRef, 1); + result &= BumpToGen(parentRef, 2); + + StoreChildInC1(); + GC.Collect(1); + PrintLine("GC finished"); + + if (!childRef.IsAlive) + { + PrintLine("Child died unexpectedly"); + result = false; + } + + KillParentWithStruct(); + GC.Collect(); + if (childRef.IsAlive) + { + PrintLine("Child alive unexpectedly"); + result = false; + } + if (parentRef.IsAlive) + { + PrintLine("Parent of struct Child1 alive unexpectedly"); + result = false; + } + return result; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool BumpToGen(WeakReference reference, int expectedGeneration) + { + GC.Collect(); + var target = reference.Target; + if (target == null) + { + PrintLine("WeakReference died unexpectedly"); + return false; + } + if (GC.GetGeneration(target) is { } actualGeneration && actualGeneration != expectedGeneration) + { + PrintLine("WeakReference is in gen " + actualGeneration + " instead of " + expectedGeneration); + return false; + } + return true; + } + + private static bool TestGeneration2Rooting() + { + var parent = CreateParent(); + GC.Collect(); // parent moves to gen1 + GC.Collect(); // parent moves to gen2 + if (!CheckParentGeneration()) return false; + + // store our children in the gen2 object + var child1 = StoreProperty(); + var child2 = StoreField(); + + KillParent(); // even though we kill the parent, it should survive as we do not collect gen2 + GC.Collect(1); + + // the parent should have kept the children alive + bool parentAlive = parent.IsAlive; + bool child1Alive = child1.IsAlive; + bool child2Alive = child2.IsAlive; + if (!parentAlive) + { + PrintLine("Parent died unexpectedly"); + return false; + } + + if (!child1Alive) + { + PrintLine("Child1 died unexpectedly"); + return false; + } + + if (!child2Alive) + { + PrintLine("Child2 died unexpectedly"); + return false; + } + + // Test struct assignment keeps fields alive + var parentRef = CreateParentWithStruct(); + GC.Collect(); // move parent to gen1 + GC.Collect(); // move parent to gen2 + StoreChildInC1(); // store ephemeral object in gen 2 object via struct assignment + KillParentWithStruct(); + GC.Collect(1); + + if (childRef.IsAlive) + { + PrintLine("Child1 gen:" + GC.GetGeneration(childRef.Target)); + } + + if (!childRef.IsAlive) + { + PrintLine("struct Child1 died unexpectedly"); + return false; + } + if (!parentRef.IsAlive) + { + PrintLine("parent of struct Child1 died unexpectedly"); + return false; + } + + return true; + } + + class ParentOfStructWithObjRefs + { + internal StructWithObjRefs StructWithObjRefs; + } + + struct StructWithObjRefs + { + internal Child C1; + internal Child C2; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static WeakReference CreateParent() + { + var parent = new Parent(); + aParent = parent; + return new WeakReference(parent); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static WeakReference CreateStruct() + { + var parent = new Parent(); + aParent = parent; + return new WeakReference(parent); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void KillParent() + { + aParent = null; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static bool CheckParentGeneration() + { + int actualGen = GC.GetGeneration(aParent); + if (actualGen != 2) + { + PrintLine("Parent Object is not in expected generation 2 but in " + actualGen); + return false; + } + return true; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static WeakReference StoreProperty() + { + var child = new Child(); + aParent.Child1 = child; + return new WeakReference(child); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static WeakReference StoreField() + { + var child = new Child(); + aParent.Child2 = child; + return new WeakReference(child); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + unsafe static WeakReference CreateParentWithStruct() + { + var parent = new ParentOfStructWithObjRefs(); + aParentOfStructWithObjRefs = parent; + return new WeakReference(parent); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void KillParentWithStruct() + { + aParentOfStructWithObjRefs = null; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static unsafe void StoreChildInC1() + { + var child = new Child(); + aParentOfStructWithObjRefs.StructWithObjRefs = new StructWithObjRefs + { + C1 = child, + }; + childRef = new WeakReference(child); + } + + public class Parent + { + public Child Child1 { get; set; } + public Child Child2; + } + + public class Child + { + } + // This test is to catch where slots are allocated on the shadow stack uncovering object references that were there previously. // If this happens in the call to GC.Collect, which at the time of writing allocate 12 bytes in the call, 3 slots, then any objects that were in those // 3 slots will not be collected as they will now be (back) in the range of bottom of stack -> top of stack. @@ -421,10 +646,6 @@ static unsafe void CreateObjectRefsInShadowStack() childRef = new WeakReference(child); } - public class Child - { - } - private static unsafe void TestBoxUnboxDifferentSizes() { StartTest("Box/Unbox different sizes"); @@ -2051,6 +2272,8 @@ private static unsafe bool CkFinite64(ulong value) static void TestIntOverflows() { + TestCharInOvf(); + TestSignedIntAddOvf(); TestSignedLongAddOvf(); @@ -2108,6 +2331,21 @@ private static void TestSignedLongAddOvf() EndTest(true); } + private static void TestCharInOvf() + { + // Just checks the compiler can handle the char type + // This was failing for https://github.com/dotnet/corert/blob/f542d97f26e87f633310e67497fb01dad29987a5/src/System.Private.CoreLib/shared/System/Environment.Unix.cs#L111 + StartTest("Test char add overflows"); + char opChar = '1'; + int op32r = 2; + if (checked(opChar + op32r) != 51) + { + FailTest("No overflow for char failed"); // check not always throwing an exception + return; + } + PassTest(); + } + private static void TestSignedIntAddOvf() { StartTest("Test int add overflows");