From 25bf797e42db885f3e05dea2c04ee6dadfd7b45b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Wed, 3 Dec 2025 00:02:09 +0100 Subject: [PATCH 1/2] Switch NameMangler to UTF-8 --- .../tools/Common/Compiler/NameMangler.cs | 4 +- .../Common/Compiler/NativeAotNameMangler.cs | 301 ++++++++++++------ .../Compiler/ObjectWriter/SectionWriter.cs | 11 + .../tools/Common/Internal/Text/Utf8String.cs | 18 ++ .../Common/Internal/Text/Utf8StringBuilder.cs | 5 + .../TypesDebugInfoWriter.cs | 7 +- .../CodeView/CodeViewSymbolsBuilder.cs | 10 +- .../CodeView/CodeViewTypesBuilder.cs | 14 +- .../ObjectWriter/Dwarf/DwarfBuilder.cs | 3 +- .../ObjectWriter/Dwarf/DwarfInfoWriter.cs | 10 + .../Compiler/UnixNodeMangler.cs | 2 +- .../Compiler/WindowsNodeMangler.cs | 2 +- 12 files changed, 275 insertions(+), 112 deletions(-) diff --git a/src/coreclr/tools/Common/Compiler/NameMangler.cs b/src/coreclr/tools/Common/Compiler/NameMangler.cs index 9d182290fde13d..8e327e85ad88fd 100644 --- a/src/coreclr/tools/Common/Compiler/NameMangler.cs +++ b/src/coreclr/tools/Common/Compiler/NameMangler.cs @@ -25,9 +25,9 @@ public NameMangler(NodeMangler nodeMangler) public abstract string CompilationUnitPrefix { get; set; } - public abstract string SanitizeName(string s, bool typeName = false); + public abstract Utf8String SanitizeName(Utf8String s); - public abstract string GetMangledTypeName(TypeDesc type); + public abstract Utf8String GetMangledTypeName(TypeDesc type); public abstract Utf8String GetMangledMethodName(MethodDesc method); diff --git a/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs b/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs index 2a8807f90fb3bf..2f00a4fb19b360 100644 --- a/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs +++ b/src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs @@ -3,13 +3,14 @@ using System; using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.CompilerServices; using System.Security.Cryptography; using System.Text; using Internal.Text; using Internal.TypeSystem; using Internal.TypeSystem.Ecma; -using System.Diagnostics; namespace ILCompiler { @@ -36,7 +37,7 @@ public override string CompilationUnitPrefix // // Turn a name into a valid C/C++ identifier // - public override string SanitizeName(string s, bool typeName = false) + private static string SanitizeName(string s) { StringBuilder sb = null; for (int i = 0; i < s.Length; i++) @@ -76,6 +77,61 @@ public override string SanitizeName(string s, bool typeName = false) return sanitizedName; } + public override Utf8String SanitizeName(Utf8String s) + => SanitizeName(s.AsSpan()); + + private static Utf8String SanitizeName(ReadOnlySpan s) + { + Utf8StringBuilder sb = null; + for (int i = 0; i < s.Length; i++) + { + byte c = s[i]; + + if (char.IsAsciiLetter((char)c) || c == '_') + { + sb?.Append((char)c); + continue; + } + + if (char.IsAsciiDigit((char)c)) + { + // C identifiers cannot start with a digit. Prepend underscores. + if (i == 0) + { + sb ??= new Utf8StringBuilder(s.Length + 2); + sb.Append('_'); + } + sb?.Append((char)c); + continue; + } + + if (sb == null) + { + sb = new Utf8StringBuilder(s.Length); + if (i > 0) + sb.Append(s.Slice(0, i)); + } + + // Everything else is replaced by underscore. + // TODO: We assume that there won't be collisions with our own or C++ built-in identifiers. + sb.Append('_'); + + // If this is a multibyte codepoint, seek to the next character + if ((sbyte)c < 0) + { + while ((i + 1 < s.Length) && ((s[i + 1] & 0b1100_0000) == 0b1000_0000)) + i++; + } + } + + Utf8String sanitizedName = (sb != null) ? sb.ToUtf8String() : new Utf8String(s.ToArray()); + + // The character sequences denoting generic instantiations, arrays, byrefs, or pointers must be + // restricted to that use only. Replace them if they happened to be used in any identifiers in + // the compilation input. + return sanitizedName; + } + private static byte[] GetBytesFromString(string literal) { byte[] bytes = new byte[checked(literal.Length * 2)]; @@ -117,7 +173,7 @@ private string SanitizeNameWithHash(string literal) /// /// Dictionary given a mangled name for a given /// - private Dictionary _mangledTypeNames = new Dictionary(); + private Dictionary _mangledTypeNames = new Dictionary(); /// /// Given a set of names check if @@ -126,36 +182,62 @@ private string SanitizeNameWithHash(string literal) /// Name to check for uniqueness. /// Set of names already used. /// A name based on that is not part of . - private static string DisambiguateName(string origName, HashSet set) + private static Utf8String DisambiguateName(Utf8String origName, HashSet set) { - int iter = 0; - string result = origName; - while (set.Contains(result)) + Utf8String result = origName; + byte[] buffer = null; + for (uint iter = 0; set.Contains(result); iter++) { - result = string.Concat(origName, "_", (iter++).ToStringInvariant()); + int neededLength = origName.Length + 1 + CountDigits(iter); + + if (buffer == null || buffer.Length != neededLength) + { + buffer = new byte[neededLength]; + origName.AsSpan().CopyTo(buffer); + buffer[origName.Length] = (byte)'_'; + result = new Utf8String(buffer); + } + + bool b = iter.TryFormat(new Span(buffer).Slice(origName.Length + 1), out _); + Debug.Assert(b); } return result; } - public override string GetMangledTypeName(TypeDesc type) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int CountDigits(uint value) + { + // Algorithm based on https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster. + ReadOnlySpan table = + [ + 4294967296, 8589934582, 8589934582, 8589934582, 12884901788, 12884901788, 12884901788, 17179868184, + 17179868184, 17179868184, 21474826480, 21474826480, 21474826480, 21474826480, 25769703776, 25769703776, + 25769703776, 30063771072, 30063771072, 30063771072, 34349738368, 34349738368, 34349738368, 34349738368, + 38554705664, 38554705664, 38554705664, 41949672960, 41949672960, 41949672960, 42949672960, 42949672960, + ]; + long tableValue = table[(int)uint.Log2(value)]; + return (int)((value + tableValue) >> 32); + } + + + public override Utf8String GetMangledTypeName(TypeDesc type) { lock (this) { - string mangledName; - if (_mangledTypeNames.TryGetValue(type, out mangledName)) + if (_mangledTypeNames.TryGetValue(type, out Utf8String mangledName)) return mangledName; return ComputeMangledTypeName(type); } } - private const string EnterNameScopeSequence = "<"; - private const string ExitNameScopeSequence = ">"; - private const string DelimitNameScopeSequence = ","; + private static Utf8String EnterNameScopeSequence = new Utf8String([ (byte)'<' ]); + private static Utf8String ExitNameScopeSequence = new Utf8String([(byte)'>']); + private static Utf8String DelimitNameScopeSequence = new Utf8String([(byte)',']); - protected string NestMangledName(string name) + protected Utf8String NestMangledName(Utf8String name) { - return EnterNameScopeSequence + name + ExitNameScopeSequence; + return Utf8String.Concat(EnterNameScopeSequence, name, ExitNameScopeSequence); } /// @@ -166,44 +248,54 @@ protected string NestMangledName(string name) /// /// Type to mangled /// Mangled name for . - private string ComputeMangledTypeName(TypeDesc type) + private Utf8String ComputeMangledTypeName(TypeDesc type) { if (type is EcmaType ecmaType) { - string assemblyName = ((EcmaAssembly)ecmaType.Module).GetName().Name; - bool isSystemPrivate = assemblyName.StartsWith("System.Private."); - - // Abbreviate System.Private to S.P. This might conflict with user defined assembly names, - // but we already have a problem due to running SanitizeName without disambiguating the result - // This problem needs a better fix. - if (isSystemPrivate) - assemblyName = string.Concat("S.P.", assemblyName.AsSpan(15)); - string prependAssemblyName = SanitizeName(assemblyName); - - var deduplicator = new HashSet(); - // Add consistent names for all types in the module, independent on the order in which // they are compiled lock (this) { - bool isSystemModule = ecmaType.Module == ecmaType.Context.SystemModule; - - if (!_mangledTypeNames.ContainsKey(type)) + if (!_mangledTypeNames.TryGetValue(type, out Utf8String name)) { + bool isSystemModule = ecmaType.Module == ecmaType.Context.SystemModule; + + string assemblyName = ((EcmaAssembly)ecmaType.Module).GetName().Name; + bool isSystemPrivate = assemblyName.StartsWith("System.Private."); + + // Abbreviate System.Private to S.P. This might conflict with user defined assembly names, + // but we already have a problem due to running SanitizeName without disambiguating the result + // This problem needs a better fix. + if (isSystemPrivate) + assemblyName = string.Concat("S.P.", assemblyName.AsSpan(15)); + Utf8String prependAssemblyName = SanitizeName(assemblyName); + + var deduplicator = new HashSet(); + + var sb = new Utf8StringBuilder(); foreach (MetadataType t in ecmaType.Module.GetAllTypes()) { - string name = t.GetFullName(); + sb.Clear().Append(prependAssemblyName).Append('_'); + + AppendTypeName(sb, t); - // Include encapsulating type - DefType containingType = t.ContainingType; - while (containingType != null) + static void AppendTypeName(Utf8StringBuilder sb, MetadataType t) { - name = containingType.GetFullName() + "_" + name; - containingType = containingType.ContainingType; + MetadataType containingType = t.ContainingType; + if (containingType != null) + { + AppendTypeName(sb, containingType); + sb.Append('_'); + } + else + { + ReadOnlySpan ns = t.Namespace; + if (ns.Length > 0) + sb.Append(SanitizeName(ns)).Append('_'); + } + sb.Append(SanitizeName(t.Name)); } - name = prependAssemblyName + "_" + SanitizeName(name, true); - // If this is one of the well known types, use a shorter name // We know this won't conflict because all the other types are // prefixed by the assembly name. @@ -211,76 +303,88 @@ private string ComputeMangledTypeName(TypeDesc type) { switch (t.Category) { - case TypeFlags.Boolean: name = "Bool"; break; - case TypeFlags.Byte: name = "UInt8"; break; - case TypeFlags.SByte: name = "Int8"; break; - case TypeFlags.UInt16: name = "UInt16"; break; - case TypeFlags.Int16: name = "Int16"; break; - case TypeFlags.UInt32: name = "UInt32"; break; - case TypeFlags.Int32: name = "Int32"; break; - case TypeFlags.UInt64: name = "UInt64"; break; - case TypeFlags.Int64: name = "Int64"; break; - case TypeFlags.Char: name = "Char"; break; - case TypeFlags.Double: name = "Double"; break; - case TypeFlags.Single: name = "Single"; break; - case TypeFlags.IntPtr: name = "IntPtr"; break; - case TypeFlags.UIntPtr: name = "UIntPtr"; break; + case TypeFlags.Boolean: sb.Clear().Append("Bool"u8); break; + case TypeFlags.Byte: sb.Clear().Append("UInt8"u8); break; + case TypeFlags.SByte: sb.Clear().Append("Int8"u8); break; + case TypeFlags.UInt16: sb.Clear().Append("UInt16"u8); break; + case TypeFlags.Int16: sb.Clear().Append("Int16"u8); break; + case TypeFlags.UInt32: sb.Clear().Append("UInt32"u8); break; + case TypeFlags.Int32: sb.Clear().Append("Int32"u8); break; + case TypeFlags.UInt64: sb.Clear().Append("UInt64"u8); break; + case TypeFlags.Int64: sb.Clear().Append("Int64"u8); break; + case TypeFlags.Char: sb.Clear().Append("Char"u8); break; + case TypeFlags.Double: sb.Clear().Append("Double"u8); break; + case TypeFlags.Single: sb.Clear().Append("Single"u8); break; + case TypeFlags.IntPtr: sb.Clear().Append("IntPtr"u8); break; + case TypeFlags.UIntPtr: sb.Clear().Append("UIntPtr"u8); break; default: if (t.IsObject) - name = "Object"; + sb.Clear().Append("Object"u8); else if (t.IsString) - name = "String"; + sb.Clear().Append("String"u8); break; } } + name = sb.ToUtf8String(); + // Ensure that name is unique and update our tables accordingly. name = DisambiguateName(name, deduplicator); deduplicator.Add(name); _mangledTypeNames.Add(t, name); } + name = _mangledTypeNames[type]; } - return _mangledTypeNames[type]; + return name; } } - string mangledName; + Utf8String mangledName; switch (type.Category) { case TypeFlags.Array: - mangledName = "__MDArray" + - EnterNameScopeSequence + - GetMangledTypeName(((ArrayType)type).ElementType) + - DelimitNameScopeSequence + - ((ArrayType)type).Rank.ToStringInvariant() + - ExitNameScopeSequence; + mangledName = new Utf8StringBuilder().Append("__MDArray"u8) + .Append(EnterNameScopeSequence) + .Append(GetMangledTypeName(((ArrayType)type).ElementType)) + .Append(DelimitNameScopeSequence) + .Append(((ArrayType)type).Rank.ToStringInvariant()) + .Append(ExitNameScopeSequence).ToUtf8String(); break; case TypeFlags.SzArray: - mangledName = "__Array" + NestMangledName(GetMangledTypeName(((ArrayType)type).ElementType)); + mangledName = new Utf8StringBuilder().Append("__Array"u8) + .Append(NestMangledName(GetMangledTypeName(((ArrayType)type).ElementType))).ToUtf8String(); break; case TypeFlags.ByRef: - mangledName = GetMangledTypeName(((ByRefType)type).ParameterType) + NestMangledName("ByRef"); + mangledName = new Utf8StringBuilder() + .Append(GetMangledTypeName(((ByRefType)type).ParameterType)) + .Append(NestMangledName("ByRef")).ToUtf8String(); break; case TypeFlags.Pointer: - mangledName = GetMangledTypeName(((PointerType)type).ParameterType) + NestMangledName("Pointer"); + mangledName = new Utf8StringBuilder() + .Append(GetMangledTypeName(((PointerType)type).ParameterType)) + .Append(NestMangledName("Pointer")).ToUtf8String(); break; case TypeFlags.FunctionPointer: + { var fnPtrType = (FunctionPointerType)type; - mangledName = "__FnPtr_" + ((int)fnPtrType.Signature.Flags).ToString("X2") + EnterNameScopeSequence; - mangledName += GetMangledTypeName(fnPtrType.Signature.ReturnType); + var sb = new Utf8StringBuilder(); + sb.Append("__FnPtr_"u8).Append(((int)fnPtrType.Signature.Flags).ToString("X2")).Append(EnterNameScopeSequence); + sb.Append(GetMangledTypeName(fnPtrType.Signature.ReturnType)); - mangledName += EnterNameScopeSequence; + sb.Append(EnterNameScopeSequence); for (int i = 0; i < fnPtrType.Signature.Length; i++) { if (i != 0) - mangledName += DelimitNameScopeSequence; - mangledName += GetMangledTypeName(fnPtrType.Signature[i]); + sb.Append(DelimitNameScopeSequence); + sb.Append(GetMangledTypeName(fnPtrType.Signature[i])); } - mangledName += ExitNameScopeSequence; + sb.Append(ExitNameScopeSequence); - mangledName += ExitNameScopeSequence; + sb.Append(ExitNameScopeSequence); + mangledName = sb.ToUtf8String(); break; + } default: // Case of a generic type. If `type' is a type definition we use the type name // for mangling, otherwise we use the mangling of the type and its generic type @@ -288,19 +392,19 @@ private string ComputeMangledTypeName(TypeDesc type) var typeDefinition = type.GetTypeDefinition(); if (typeDefinition != type) { - mangledName = GetMangledTypeName(typeDefinition); + var sb = new Utf8StringBuilder(); + sb.Append(GetMangledTypeName(typeDefinition)); var inst = type.Instantiation; - string mangledInstantiation = ""; + sb.Append(EnterNameScopeSequence); for (int i = 0; i < inst.Length; i++) { - string instArgName = GetMangledTypeName(inst[i]); if (i > 0) - mangledInstantiation += "__"; - - mangledInstantiation += instArgName; + sb.Append("__"u8); + sb.Append(GetMangledTypeName(inst[i])); } - mangledName += NestMangledName(mangledInstantiation); + sb.Append(ExitNameScopeSequence); + mangledName = sb.ToUtf8String(); } else if (type is IPrefixMangledMethod) { @@ -314,7 +418,7 @@ private string ComputeMangledTypeName(TypeDesc type) { // This is a type definition. Since we didn't fall in the `is EcmaType` case above, // it's likely a compiler-generated type. - mangledName = SanitizeName(((DefType)type).GetFullName(), true); + mangledName = SanitizeName(((DefType)type).GetFullName()); } break; } @@ -384,13 +488,13 @@ private Utf8String GetPrefixMangledSignatureName(IPrefixMangledSignature prefixM sb.Append(EnterNameScopeSequence); - string sigRetTypeName = GetMangledTypeName(signature.ReturnType); + Utf8String sigRetTypeName = GetMangledTypeName(signature.ReturnType); sb.Append(sigRetTypeName); for (int i = 0; i < signature.Length; i++) { sb.Append("__"u8); - string sigArgName = GetMangledTypeName(signature[i]); + Utf8String sigArgName = GetMangledTypeName(signature[i]); sb.Append(sigArgName); } @@ -411,7 +515,7 @@ private Utf8String ComputeUnqualifiedMangledMethodName(MethodDesc method) { if (method is EcmaMethod) { - var deduplicator = new HashSet(); + var deduplicator = new HashSet(); // Add consistent names for all methods of the type, independent on the order in which // they are compiled @@ -421,7 +525,7 @@ private Utf8String ComputeUnqualifiedMangledMethodName(MethodDesc method) { foreach (var m in method.OwningType.GetMethods()) { - string name = SanitizeName(m.GetName()); + Utf8String name = SanitizeName(m.Name); name = DisambiguateName(name, deduplicator); deduplicator.Add(name); @@ -447,7 +551,7 @@ private Utf8String ComputeUnqualifiedMangledMethodName(MethodDesc method) var inst = method.Instantiation; for (int i = 0; i < inst.Length; i++) { - string instArgName = GetMangledTypeName(inst[i]); + Utf8String instArgName = GetMangledTypeName(inst[i]); if (i > 0) sb.Append("__"u8); sb.Append(instArgName); @@ -480,7 +584,7 @@ private Utf8String ComputeUnqualifiedMangledMethodName(MethodDesc method) else { // Assume that Name is unique for all other methods - utf8MangledName = new Utf8String(SanitizeName(method.GetName())); + utf8MangledName = SanitizeName(method.Name); } } @@ -503,11 +607,11 @@ public override Utf8String GetMangledFieldName(FieldDesc field) private Utf8String ComputeMangledFieldName(FieldDesc field) { - string prependTypeName = GetMangledTypeName(field.OwningType); + Utf8String prependTypeName = GetMangledTypeName(field.OwningType); if (field is EcmaField) { - var deduplicator = new HashSet(); + var deduplicator = new HashSet(); // Add consistent names for all fields of the type, independent on the order in which // they are compiled @@ -515,17 +619,18 @@ private Utf8String ComputeMangledFieldName(FieldDesc field) { if (!_mangledFieldNames.ContainsKey(field)) { + Utf8StringBuilder sb = new Utf8StringBuilder(); foreach (var f in field.OwningType.GetFields()) { - string name = SanitizeName(f.GetName()); + sb.Clear().Append(prependTypeName).Append("__"u8); + Utf8String name = SanitizeName(f.Name); name = DisambiguateName(name, deduplicator); deduplicator.Add(name); - if (prependTypeName != null) - name = prependTypeName + "__" + name; + sb.Append(name); - _mangledFieldNames.Add(f, name); + _mangledFieldNames.Add(f, sb.ToUtf8String()); } } return _mangledFieldNames[field]; @@ -533,12 +638,8 @@ private Utf8String ComputeMangledFieldName(FieldDesc field) } - string mangledName = SanitizeName(field.GetName()); - - if (prependTypeName != null) - mangledName = prependTypeName + "__" + mangledName; - - Utf8String utf8MangledName = new Utf8String(mangledName); + Utf8String mangledName = SanitizeName(field.Name); + Utf8String utf8MangledName = new Utf8StringBuilder().Append(prependTypeName).Append("__"u8).Append(mangledName).ToUtf8String(); lock (this) { diff --git a/src/coreclr/tools/Common/Compiler/ObjectWriter/SectionWriter.cs b/src/coreclr/tools/Common/Compiler/ObjectWriter/SectionWriter.cs index 7ccfa9651a2fc7..2378830f5e094a 100644 --- a/src/coreclr/tools/Common/Compiler/ObjectWriter/SectionWriter.cs +++ b/src/coreclr/tools/Common/Compiler/ObjectWriter/SectionWriter.cs @@ -6,6 +6,7 @@ using System.Numerics; using System.Text; using ILCompiler.DependencyAnalysis; +using Internal.Text; namespace ILCompiler.ObjectWriter { @@ -115,6 +116,16 @@ public readonly void WriteLittleEndian(T value) bufferWriter.Advance(value.WriteLittleEndian(buffer)); } + public readonly void WriteUtf8String(Utf8String value) + { + IBufferWriter bufferWriter = _sectionData.BufferWriter; + int size = value.Length + 1; + Span buffer = bufferWriter.GetSpan(size); + value.AsSpan().CopyTo(buffer); + buffer[size - 1] = 0; + bufferWriter.Advance(size); + } + public readonly void WriteUtf8String(string value) { IBufferWriter bufferWriter = _sectionData.BufferWriter; diff --git a/src/coreclr/tools/Common/Internal/Text/Utf8String.cs b/src/coreclr/tools/Common/Internal/Text/Utf8String.cs index 3614639fc99c6e..5ade766c61d35f 100644 --- a/src/coreclr/tools/Common/Internal/Text/Utf8String.cs +++ b/src/coreclr/tools/Common/Internal/Text/Utf8String.cs @@ -84,5 +84,23 @@ public int CompareTo(Utf8String other) { return Compare(this, other); } + + public static Utf8String Concat(params ReadOnlySpan strings) + { + int length = 0; + foreach (Utf8String s in strings) + length += s.Length; + + var result = new byte[length]; + Span resultSpan = result; + + foreach (Utf8String s in strings) + { + s.AsSpan().CopyTo(resultSpan); + resultSpan = resultSpan.Slice(s.Length); + } + + return new Utf8String(result); + } } } diff --git a/src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs b/src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs index ee438c6e2209b6..0be41bbf645a15 100644 --- a/src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs +++ b/src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs @@ -16,6 +16,11 @@ public Utf8StringBuilder() { } + public Utf8StringBuilder(int capacity) + { + _buffer = new byte[capacity]; + } + public int Length => _length; public ReadOnlySpan AsSpan() => _buffer.AsSpan(0, _length); diff --git a/src/coreclr/tools/Common/TypeSystem/TypesDebugInfoWriter/TypesDebugInfoWriter.cs b/src/coreclr/tools/Common/TypeSystem/TypesDebugInfoWriter/TypesDebugInfoWriter.cs index ed387a866bc552..27ad49899a31a1 100644 --- a/src/coreclr/tools/Common/TypeSystem/TypesDebugInfoWriter/TypesDebugInfoWriter.cs +++ b/src/coreclr/tools/Common/TypeSystem/TypesDebugInfoWriter/TypesDebugInfoWriter.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Runtime.InteropServices; +using Internal.Text; namespace Internal.TypeSystem.TypesDebugInfo { @@ -24,7 +25,7 @@ uint GetCompleteClassTypeIndex(ClassTypeDescriptor classTypeDescriptor, ClassFie uint GetPrimitiveTypeIndex(TypeDesc type); - string GetMangledName(TypeDesc type); + Utf8String GetMangledName(TypeDesc type); } [StructLayout(LayoutKind.Sequential)] @@ -39,14 +40,14 @@ public struct EnumTypeDescriptor { public uint ElementType; public ulong ElementCount; - public string Name; + public Utf8String Name; } [StructLayout(LayoutKind.Sequential)] public struct ClassTypeDescriptor { public int IsStruct; - public string Name; + public Utf8String Name; public uint BaseClassId; public ulong InstanceSize; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs index e8faabae519391..132b97a0e99aa3 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs @@ -11,6 +11,7 @@ using ILCompiler.DependencyAnalysis; using Internal.JitInterface; +using Internal.Text; using Internal.TypeSystem; using Internal.TypeSystem.TypesDebugInfo; @@ -275,7 +276,7 @@ public void EmitLineInfo( } } - public void WriteUserDefinedTypes(IList<(string, uint)> userDefinedTypes) + public void WriteUserDefinedTypes(IList<(Utf8String, uint)> userDefinedTypes) { using var symbolSubsection = GetSubsection(DebugSymbolsSubsectionType.Symbols); foreach (var (name, typeIndex) in userDefinedTypes) @@ -399,6 +400,13 @@ public void Write(ulong value) _bufferWriter.Advance(sizeof(ulong)); } + public void Write(Utf8String value) + { + int byteCount = value.Length + 1; + value.AsSpan().CopyTo(_bufferWriter.GetSpan(byteCount)); + _bufferWriter.Advance(byteCount); + } + public void Write(string value) { int byteCount = Encoding.UTF8.GetByteCount(value) + 1; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewTypesBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewTypesBuilder.cs index c5aa7f8b4ee2bc..767fc8997829d1 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewTypesBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewTypesBuilder.cs @@ -11,6 +11,7 @@ using ILCompiler.DependencyAnalysis; using Internal.JitInterface; +using Internal.Text; using Internal.TypeSystem; using Internal.TypeSystem.TypesDebugInfo; @@ -44,11 +45,11 @@ internal sealed class CodeViewTypesBuilder : ITypesDebugInfoWriter private readonly uint _classVTableTypeIndex; private readonly uint _vfuncTabTypeIndex; - private readonly List<(string, uint)> _userDefinedTypes = new(); + private readonly List<(Utf8String, uint)> _userDefinedTypes = new(); private uint _nextTypeIndex = 0x1000; - public IList<(string, uint)> UserDefinedTypes => _userDefinedTypes; + public IList<(Utf8String, uint)> UserDefinedTypes => _userDefinedTypes; public CodeViewTypesBuilder(NameMangler nameMangler, int targetPointerSize, SectionWriter sectionWriter) { @@ -382,7 +383,7 @@ public uint GetMemberFunctionId(MemberFunctionIdTypeDescriptor memberIdDescripto return _nextTypeIndex++; } - public string GetMangledName(TypeDesc type) + public Utf8String GetMangledName(TypeDesc type) { return _nameMangler.GetMangledTypeName(type); } @@ -444,6 +445,13 @@ public void Write(ulong value) _bufferWriter.Advance(sizeof(ulong)); } + public void Write(Utf8String value) + { + int byteCount = value.Length + 1; + value.AsSpan().CopyTo(_bufferWriter.GetSpan(byteCount)); + _bufferWriter.Advance(byteCount); + } + public void Write(string value) { int byteCount = Encoding.UTF8.GetByteCount(value) + 1; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs index 99a1d8b53ac948..4dcccbdd32fc41 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs @@ -8,6 +8,7 @@ using System.IO; using System.Linq; using ILCompiler.DependencyAnalysis; +using Internal.Text; using Internal.TypeSystem; using Internal.TypeSystem.TypesDebugInfo; using static ILCompiler.ObjectWriter.DwarfNative; @@ -422,7 +423,7 @@ public uint GetMemberFunctionId(MemberFunctionIdTypeDescriptor memberIdDescripto return (uint)_memberFunctions.Count; } - public string GetMangledName(TypeDesc type) + public Utf8String GetMangledName(TypeDesc type) { return _nameMangler.GetMangledTypeName(type); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs index 18b2c34a484685..89d42674ab4457 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfInfoWriter.cs @@ -7,6 +7,7 @@ using System.Collections.Generic; using System.Diagnostics; using ILCompiler.DependencyAnalysis; +using Internal.Text; using Internal.TypeSystem; namespace ILCompiler.ObjectWriter @@ -95,6 +96,15 @@ public void WriteAddressSize(ulong value) } } + public void WriteStringReference(Utf8String value) + { + long stringsOffset = _stringTableWriter.Position; + _stringTableWriter.WriteUtf8String(value); + + Debug.Assert(stringsOffset < uint.MaxValue); + _infoSectionWriter.EmitSymbolReference(RelocType.IMAGE_REL_BASED_HIGHLOW, ".debug_str", stringsOffset); + } + public void WriteStringReference(string value) { long stringsOffset = _stringTableWriter.Position; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs index 5ef33807b0f448..089cc16657be35 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs @@ -23,7 +23,7 @@ public sealed override string MethodTable(TypeDesc type) if (type.IsValueType) mangledJustTypeName = MangledBoxedTypeName(type); else - mangledJustTypeName = NameMangler.GetMangledTypeName(type); + mangledJustTypeName = NameMangler.GetMangledTypeName(type).ToString(); return "_ZTV" + mangledJustTypeName.Length.ToString(CultureInfo.InvariantCulture) + mangledJustTypeName; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs index 95d6d53c55cea2..ff5fccecbad8ba 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs @@ -37,7 +37,7 @@ public sealed override string MethodTable(TypeDesc type) if (type.IsValueType) mangledJustTypeName = MangledBoxedTypeName(type); else - mangledJustTypeName = NameMangler.GetMangledTypeName(type); + mangledJustTypeName = NameMangler.GetMangledTypeName(type).ToString(); // "??_7TypeName@@6B@" is the C++ mangling for "const TypeName::`vftable'" // This, along with LF_VTSHAPE debug records added by the object writer From 6e89a2ced3f4805070abfc4d8bdc55ff985d18f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Strehovsk=C3=BD?= Date: Wed, 3 Dec 2025 09:30:05 +0100 Subject: [PATCH 2/2] FB --- .../aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs | 9 +++------ .../ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs index 089cc16657be35..b15a47fa0f634f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/UnixNodeMangler.cs @@ -18,12 +18,9 @@ public sealed override string MangledBoxedTypeName(TypeDesc type) public sealed override string MethodTable(TypeDesc type) { - string mangledJustTypeName; - - if (type.IsValueType) - mangledJustTypeName = MangledBoxedTypeName(type); - else - mangledJustTypeName = NameMangler.GetMangledTypeName(type).ToString(); + string mangledJustTypeName = type.IsValueType + ? MangledBoxedTypeName(type) + : NameMangler.GetMangledTypeName(type).ToString(); return "_ZTV" + mangledJustTypeName.Length.ToString(CultureInfo.InvariantCulture) + mangledJustTypeName; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs index ff5fccecbad8ba..be2930565d322e 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/WindowsNodeMangler.cs @@ -32,12 +32,9 @@ public sealed override string MangledBoxedTypeName(TypeDesc type) public sealed override string MethodTable(TypeDesc type) { - string mangledJustTypeName; - - if (type.IsValueType) - mangledJustTypeName = MangledBoxedTypeName(type); - else - mangledJustTypeName = NameMangler.GetMangledTypeName(type).ToString(); + string mangledJustTypeName = type.IsValueType + ? MangledBoxedTypeName(type) + : NameMangler.GetMangledTypeName(type).ToString(); // "??_7TypeName@@6B@" is the C++ mangling for "const TypeName::`vftable'" // This, along with LF_VTSHAPE debug records added by the object writer