diff --git a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go index 8519e4510f9..9096964ca59 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go +++ b/go/test/endtoend/vtgate/queries/aggregation/aggregation_test.go @@ -54,6 +54,9 @@ func start(t *testing.T) (utils.MySQLCompare, func()) { "emp", "dept", "bet_logs", + "example", + "example_enum_unknown", + "example_set_unknown", } for _, table := range tables { _, _ = mcmp.ExecAndIgnore("delete from " + table) diff --git a/go/test/endtoend/vtgate/queries/aggregation/distinct_test.go b/go/test/endtoend/vtgate/queries/aggregation/distinct_test.go index 3ec27dae6a6..1d0f14b302c 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/distinct_test.go +++ b/go/test/endtoend/vtgate/queries/aggregation/distinct_test.go @@ -19,6 +19,7 @@ package aggregation import ( "testing" + "vitess.io/vitess/go/test/endtoend/cluster" "vitess.io/vitess/go/test/endtoend/utils" ) @@ -45,6 +46,28 @@ func TestDistinctIt(t *testing.T) { mcmp.AssertMatchesNoOrder("select distinct val2 from aggr_test", `[[INT64(1)] [INT64(4)] [INT64(3)] [NULL]]`) mcmp.AssertMatchesNoOrder("select distinct id from aggr_test", `[[INT64(1)] [INT64(2)] [INT64(3)] [INT64(5)] [INT64(4)] [INT64(6)] [INT64(7)] [INT64(8)]]`) + ver, _ := cluster.GetMajorVersion("vtgate") + if ver == 20 { + // We can only run the following tests for v20 only, because upgrade tests + // run against a released version of v21 that doesn't support these statements. + + // Ensure DISTINCT on enum columns across shards works correctly. + mcmp.Exec("insert into example(id, foo) values (1, 'a'), (2, 'b')") + mcmp.AssertMatchesNoOrder("select distinct foo from example", `[[ENUM("a")] [ENUM("b")]]`) + + // Exercise fallback hashing for unknown enum values. The vschema for the + // example_enum_unknown table deliberately omits the "c" enum member so that + // vtgate will treat it as unknown. + mcmp.Exec("insert into example_enum_unknown(id, foo) values (1, 'a'), (2, 'c')") + mcmp.AssertMatchesNoOrder("select distinct foo from example_enum_unknown", `[[ENUM("a")] [ENUM("c")]]`) + + // Exercise fallback hashing for unknown set values. The vschema for the + // example_set_unknown table deliberately omits the "c" set member so that + // vtgate will treat it as unknown. + mcmp.Exec("insert into example_set_unknown(id, foo) values (1, 'a'), (2, 'c')") + mcmp.AssertMatchesNoOrder("select distinct foo from example_set_unknown", `[[SET("a")] [SET("c")]]`) + } + if utils.BinaryIsAtLeastAtVersion(17, "vtgate") { mcmp.AssertMatches("select distinct val1 from aggr_test order by val1 desc", `[[VARCHAR("e")] [VARCHAR("d")] [VARCHAR("c")] [VARCHAR("b")] [VARCHAR("a")]]`) mcmp.AssertMatchesNoOrder("select distinct val1, count(*) from aggr_test group by val1", `[[VARCHAR("a") INT64(2)] [VARCHAR("b") INT64(1)] [VARCHAR("c") INT64(2)] [VARCHAR("d") INT64(1)] [VARCHAR("e") INT64(2)]]`) diff --git a/go/test/endtoend/vtgate/queries/aggregation/schema.sql b/go/test/endtoend/vtgate/queries/aggregation/schema.sql index 49956b98302..3eb1e316c73 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/schema.sql +++ b/go/test/endtoend/vtgate/queries/aggregation/schema.sql @@ -105,3 +105,25 @@ CREATE TABLE bet_logs ( game_id bigint, PRIMARY KEY (id) ) ENGINE InnoDB; + +create table example ( + id bigint unsigned not null, + foo enum('a','b') not null, + primary key (id) +) Engine = InnoDB; + +-- Table used to exercise unknown enum value fallback hashing. The vschema for this +-- table deliberately omits the "c" value so we can insert known and unknown enums. +create table example_enum_unknown ( + id bigint unsigned not null, + foo enum('a','b','c') not null, + primary key (id) +) Engine = InnoDB; + +-- Table used to exercise unknown set value fallback hashing. The vschema for this +-- table deliberately omits the "c" value so we can insert known and unknown sets. +create table example_set_unknown ( + id bigint unsigned not null, + foo set('a','b','c') not null, + primary key (id) +) Engine = InnoDB; diff --git a/go/test/endtoend/vtgate/queries/aggregation/vschema.json b/go/test/endtoend/vtgate/queries/aggregation/vschema.json index 6c3cddf4436..879e205461c 100644 --- a/go/test/endtoend/vtgate/queries/aggregation/vschema.json +++ b/go/test/endtoend/vtgate/queries/aggregation/vschema.json @@ -155,6 +155,51 @@ "name": "hash" } ] + }, + "example": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ], + "columns": [ + { + "name": "foo", + "type": "ENUM", + "values": ["a", "b"] + } + ] + }, + "example_enum_unknown": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ], + "columns": [ + { + "name": "foo", + "type": "ENUM", + "values": ["a", "b"] + } + ] + }, + "example_set_unknown": { + "column_vindexes": [ + { + "column": "id", + "name": "hash" + } + ], + "columns": [ + { + "name": "foo", + "type": "SET", + "values": ["a", "b"] + } + ] } } } \ No newline at end of file diff --git a/go/vt/vtgate/evalengine/api_hash_test.go b/go/vt/vtgate/evalengine/api_hash_test.go index bb2652ec6f2..ba63d99b999 100644 --- a/go/vt/vtgate/evalengine/api_hash_test.go +++ b/go/vt/vtgate/evalengine/api_hash_test.go @@ -94,6 +94,27 @@ func TestHashCodesRandom(t *testing.T) { t.Logf("tested %d values, with %d equalities found\n", tested, equal) } +// TestEnumSetHashing exercises hashing for ENUM and SET values. We intentionally +// pass a limited EnumSetValues list to simulate a vschema that doesn't include +// all possible values. When hashing, known values should hash based on their +// ordinal/bitset, while unknown values fall back to hashing their raw string +// under the binary collation. We assert that known and unknown hashes differ. +func TestEnumSetHashing(t *testing.T) { + vals := EnumSetValues{"a", "b"} + // ENUM hashing: known vs unknown + hA, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Enum, []byte("a")), collations.CollationBinaryID, sqltypes.Enum, 0, &vals) + require.NoError(t, err) + hC, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Enum, []byte("c")), collations.CollationBinaryID, sqltypes.Enum, 0, &vals) + require.NoError(t, err) + assert.NotEqual(t, hA, hC) + // SET hashing: known vs unknown + hSetA, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Set, []byte("a")), collations.CollationBinaryID, sqltypes.Set, 0, &vals) + require.NoError(t, err) + hSetC, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Set, []byte("c")), collations.CollationBinaryID, sqltypes.Set, 0, &vals) + require.NoError(t, err) + assert.NotEqual(t, hSetA, hSetC) +} + type equality bool func (e equality) Operator() string { diff --git a/go/vt/vtgate/evalengine/eval_enum.go b/go/vt/vtgate/evalengine/eval_enum.go index fa9675d7c0e..270fb9bea19 100644 --- a/go/vt/vtgate/evalengine/eval_enum.go +++ b/go/vt/vtgate/evalengine/eval_enum.go @@ -2,7 +2,10 @@ package evalengine import ( "vitess.io/vitess/go/hack" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/colldata" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/vthash" ) type evalEnum struct { @@ -34,6 +37,22 @@ func (e *evalEnum) Scale() int32 { return 0 } +// Hash implements the hashable interface for evalEnum. +// For enums that match their declared values list, we hash their ordinal value; +// for any values that could not be resolved to an ordinal (value==-1), we fall back to +// hashing their raw string bytes using the binary collation. +func (e *evalEnum) Hash(h *vthash.Hasher) { + if e.value == -1 { + // For unknown values, fall back to hashing the string contents. + h.Write16(hashPrefixBytes) + colldata.Lookup(collations.CollationBinaryID).Hash(h, hack.StringBytes(e.string), 0) + return + } + // Enums are positive integral values starting at zero internally. + h.Write16(hashPrefixIntegralPositive) + h.Write64(uint64(e.value)) +} + func valueIdx(values *EnumSetValues, value string) int { if values == nil { return -1 diff --git a/go/vt/vtgate/evalengine/eval_set.go b/go/vt/vtgate/evalengine/eval_set.go index bc75a527edc..5bcb1de90b7 100644 --- a/go/vt/vtgate/evalengine/eval_set.go +++ b/go/vt/vtgate/evalengine/eval_set.go @@ -4,7 +4,10 @@ import ( "strings" "vitess.io/vitess/go/hack" + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/colldata" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/vt/vthash" ) type evalSet struct { @@ -37,6 +40,23 @@ func (e *evalSet) Scale() int32 { return 0 } +// Hash implements the hashable interface for evalSet. +// For sets where all elements resolve to ordinals in the declared set, we hash the bitset; otherwise, +// if the set string contained values that cannot be mapped to ordinals, we fall back to hashing +// the raw string using the binary collation. This ensures DISTINCT and other hash-based operations +// treat unknown sets as distinct based on their textual representation. +func (e *evalSet) Hash(h *vthash.Hasher) { + // MySQL allows storing an empty set as an empty string, which yields set==0 and string==""; + // unknown sets will have set==0 but non-empty string content. + if e.set == 0 && e.string != "" { + h.Write16(hashPrefixBytes) + colldata.Lookup(collations.CollationBinaryID).Hash(h, hack.StringBytes(e.string), 0) + return + } + h.Write16(hashPrefixIntegralPositive) + h.Write64(e.set) +} + func evalSetBits(values *EnumSetValues, value string) uint64 { if values != nil && len(*values) > 64 { // This never would happen as MySQL limits SET