Skip to content

Commit d3f7d59

Browse files
authored
[mono] Use unsigned char when computing UTF8 string hashes (#83273)
* [mono] Use `unsigned char` when computing UTF8 string hashes The C standard does not specify whether `char` is signed or unsigned, it is implementation defined. Apparently Android aarch64 makes a different choice than other platforms (at least macOS arm64 and Windows x64 give different results). Mono uses `mono_metadata_str_hash` in the AOT compiler and AOT runtime to optimize class name lookup. As a result, classes whose names include UTF-8 continuation bytes (with the high bit = 1) will hash differently in the AOT compiler and on the device. Fixes #82187 Fixes #78638 * [aot] add DEBUG_AOT_NAME_TABLE code for debugging the class names AOT compiler: Emits a second "class_name_table_debug" symbol that has all the class names and hashes as strings. AOT runtime: warns if a class is not found in the name cache * Add regression test
1 parent bbc7e06 commit d3f7d59

File tree

7 files changed

+114
-6
lines changed

7 files changed

+114
-6
lines changed

src/mono/mono/eglib/ghashtable.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,7 @@ guint
673673
g_str_hash (gconstpointer v1)
674674
{
675675
guint hash = 0;
676-
char *p = (char *) v1;
676+
unsigned char *p = (unsigned char *) v1;
677677

678678
while (*p++)
679679
hash = (hash << 5) - (hash + *p);

src/mono/mono/metadata/metadata.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5524,7 +5524,8 @@ guint
55245524
mono_metadata_str_hash (gconstpointer v1)
55255525
{
55265526
/* Same as g_str_hash () in glib */
5527-
char *p = (char *) v1;
5527+
/* note: signed/unsigned char matters - we feed UTF-8 to this function, so the high bit will give diferent results if we don't match. */
5528+
unsigned char *p = (unsigned char *) v1;
55285529
guint hash = *p;
55295530

55305531
while (*p++) {

src/mono/mono/mini/aot-compiler.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11417,18 +11417,40 @@ emit_class_info (MonoAotCompile *acfg)
1141711417
typedef struct ClassNameTableEntry {
1141811418
guint32 token, index;
1141911419
struct ClassNameTableEntry *next;
11420+
#ifdef DEBUG_AOT_NAME_TABLE
11421+
char *full_name;
11422+
uint32_t hash;
11423+
#endif
1142011424
} ClassNameTableEntry;
1142111425

11426+
static char*
11427+
get_class_full_name_for_hash (MonoClass *klass)
11428+
{
11429+
return mono_type_get_name_full (m_class_get_byval_arg (klass), MONO_TYPE_NAME_FORMAT_FULL_NAME);
11430+
}
11431+
11432+
static uint32_t
11433+
hash_for_class (MonoClass *klass)
11434+
{
11435+
char *full_name = get_class_full_name_for_hash (klass);
11436+
uint32_t hash = mono_metadata_str_hash (full_name);
11437+
g_free (full_name);
11438+
return hash;
11439+
}
11440+
1142211441
static void
1142311442
emit_class_name_table (MonoAotCompile *acfg)
1142411443
{
1142511444
int buf_size;
1142611445
guint32 token, hash;
1142711446
MonoClass *klass;
1142811447
GPtrArray *table;
11429-
char *full_name;
1143011448
guint8 *buf, *p;
1143111449
ClassNameTableEntry *entry, *new_entry;
11450+
#ifdef DEBUG_AOT_NAME_TABLE
11451+
int name_buf_size = 0;
11452+
guint8 *name_buf, *name_p;
11453+
#endif
1143211454

1143311455
/*
1143411456
* Construct a chained hash table for mapping class names to typedef tokens.
@@ -11446,13 +11468,17 @@ emit_class_name_table (MonoAotCompile *acfg)
1144611468
mono_error_cleanup (error);
1144711469
continue;
1144811470
}
11449-
full_name = mono_type_get_name_full (m_class_get_byval_arg (klass), MONO_TYPE_NAME_FORMAT_FULL_NAME);
11450-
hash = mono_metadata_str_hash (full_name) % table_size;
11451-
g_free (full_name);
11471+
hash = hash_for_class (klass) % table_size;
1145211472

1145311473
/* FIXME: Allocate from the mempool */
1145411474
new_entry = g_new0 (ClassNameTableEntry, 1);
1145511475
new_entry->token = token;
11476+
#ifdef DEBUG_AOT_NAME_TABLE
11477+
new_entry->full_name = get_class_full_name_for_hash (klass);
11478+
new_entry->hash = hash;
11479+
/* '%s'=%08x\n */
11480+
name_buf_size += strlen (new_entry->full_name) + strlen("''=\n") + 8;
11481+
#endif
1145611482

1145711483
entry = (ClassNameTableEntry *)g_ptr_array_index (table, hash);
1145811484
if (entry == NULL) {
@@ -11471,6 +11497,10 @@ emit_class_name_table (MonoAotCompile *acfg)
1147111497
/* Emit the table */
1147211498
buf_size = table->len * 4 + 4;
1147311499
p = buf = (guint8 *)g_malloc0 (buf_size);
11500+
#ifdef DEBUG_AOT_NAME_TABLE
11501+
name_buf_size ++; /* one extra trailing nul */
11502+
name_p = name_buf = (guint8 *)g_malloc0 (name_buf_size);
11503+
#endif
1147411504

1147511505
/* FIXME: Optimize memory usage */
1147611506
g_assert (table_size < 65000);
@@ -11488,14 +11518,28 @@ emit_class_name_table (MonoAotCompile *acfg)
1148811518
else
1148911519
encode_int16 (0, p, &p);
1149011520
}
11521+
#ifdef DEBUG_AOT_NAME_TABLE
11522+
if (entry != NULL) {
11523+
name_p += sprintf ((char*)name_p, "'%s'=%08x\n", entry->full_name, entry->hash);
11524+
g_free (entry->full_name);
11525+
}
11526+
#endif
1149111527
g_free (entry);
1149211528
}
11529+
#ifdef DEBUG_AOT_NAME_TABLE
11530+
g_assert (name_p - name_buf <= name_buf_size);
11531+
#endif
1149311532
g_assert (p - buf <= buf_size);
1149411533
g_ptr_array_free (table, TRUE);
1149511534

1149611535
emit_aot_data (acfg, MONO_AOT_TABLE_CLASS_NAME, "class_name_table", buf, GPTRDIFF_TO_INT (p - buf));
1149711536

1149811537
g_free (buf);
11538+
11539+
#ifdef DEBUG_AOT_NAME_TABLE
11540+
emit_aot_data (acfg, MONO_AOT_TABLE_CLASS_NAME_DEBUG, "class_name_table_debug", name_buf, GPTRDIFF_TO_INT (name_p - name_buf));
11541+
g_free (name_buf);
11542+
#endif
1149911543
}
1150011544

1150111545
static void

src/mono/mono/mini/aot-runtime.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2601,6 +2601,10 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
26012601
MonoTableInfo *t;
26022602
guint32 cols [MONO_TYPEDEF_SIZE];
26032603
GHashTable *nspace_table;
2604+
#ifdef DEBUG_AOT_NAME_TABLE
2605+
char *debug_full_name;
2606+
uint32_t debug_hash;
2607+
#endif
26042608

26052609
if (!amodule || !amodule->class_name_table)
26062610
return FALSE;
@@ -2634,6 +2638,10 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
26342638
full_name = g_strdup_printf ("%s.%s", name_space, name);
26352639
}
26362640
}
2641+
#ifdef DEBUG_AOT_NAME_TABLE
2642+
debug_full_name = g_strdup (full_name);
2643+
debug_hash = mono_metadata_str_hash (full_name) % table_size;
2644+
#endif
26372645
hash = mono_metadata_str_hash (full_name) % table_size;
26382646
if (full_name != full_name_buf)
26392647
g_free (full_name);
@@ -2673,6 +2681,9 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
26732681
g_hash_table_insert (nspace_table, (char*)name2, *klass);
26742682
amodule_unlock (amodule);
26752683
}
2684+
#ifdef DEBUG_AOT_NAME_TABLE
2685+
g_free (debug_full_name);
2686+
#endif
26762687
return TRUE;
26772688
}
26782689

@@ -2686,6 +2697,13 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
26862697

26872698
amodule_unlock (amodule);
26882699

2700+
#ifdef DEBUG_AOT_NAME_TABLE
2701+
if (*klass == NULL) {
2702+
g_warning ("AOT class name cache '%s'=%08x not found\n", debug_full_name, debug_hash);
2703+
}
2704+
g_free (debug_full_name);
2705+
#endif
2706+
26892707
return TRUE;
26902708
}
26912709

src/mono/mono/mini/aot-runtime.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@ typedef enum {
8686
MONO_AOT_METHOD_FLAG_INTERP_ENTRY_ONLY = 16,
8787
} MonoAotMethodFlags;
8888

89+
#undef DEBUG_AOT_NAME_TABLE
90+
8991
typedef enum {
9092
MONO_AOT_TABLE_BLOB,
9193
MONO_AOT_TABLE_CLASS_NAME,
@@ -99,6 +101,9 @@ typedef enum {
99101
MONO_AOT_TABLE_IMAGE_TABLE,
100102
MONO_AOT_TABLE_WEAK_FIELD_INDEXES,
101103
MONO_AOT_TABLE_METHOD_FLAGS_TABLE,
104+
#ifdef DEBUG_AOT_NAME_TABLE
105+
MONO_AOT_TABLE_CLASS_NAME_DEBUG,
106+
#endif
102107
MONO_AOT_TABLE_NUM
103108
} MonoAotFileTable;
104109

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<Project Sdk="Microsoft.NET.Sdk">
2+
<PropertyGroup>
3+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
4+
<OutputType>Exe</OutputType>
5+
</PropertyGroup>
6+
<ItemGroup>
7+
<Compile Include="repro.cs" />
8+
</ItemGroup>
9+
</Project>
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
using System;
2+
3+
/* Regression test for https://github.com/dotnet/runtime/issues/78638
4+
* and https://github.com/dotnet/runtime/issues/82187 ensure AOT
5+
* cross-compiler and AOT runtime use the same name hashing for names
6+
* that include UTF-8 continuation bytes.
7+
*/
8+
9+
[MySpecial(typeof(MeineTüre))]
10+
public class Program
11+
{
12+
public static int Main()
13+
{
14+
var attr = (MySpecialAttribute)Attribute.GetCustomAttribute(typeof (Program), typeof(MySpecialAttribute), false);
15+
if (attr == null)
16+
return 101;
17+
if (attr.Type == null)
18+
return 102;
19+
if (attr.Type.FullName != "MeineTüre")
20+
return 103;
21+
return 100;
22+
}
23+
}
24+
25+
public class MySpecialAttribute : Attribute
26+
{
27+
public Type Type {get; private set; }
28+
public MySpecialAttribute(Type t) { Type = t; }
29+
}
30+
31+
public class MeineTüre {}

0 commit comments

Comments
 (0)