Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ func start(t *testing.T) (utils.MySQLCompare, func()) {
"emp",
"dept",
"bet_logs",
"example",
"example_enum_unknown",
"example_set_unknown",
}
for _, table := range tables {
_, _ = mcmp.ExecAndIgnore("delete from " + table)
Expand Down
23 changes: 23 additions & 0 deletions go/test/endtoend/vtgate/queries/aggregation/distinct_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package aggregation
import (
"testing"

"vitess.io/vitess/go/test/endtoend/cluster"
"vitess.io/vitess/go/test/endtoend/utils"
)

Expand All @@ -45,6 +46,28 @@ func TestDistinctIt(t *testing.T) {
mcmp.AssertMatchesNoOrder("select distinct val2 from aggr_test", `[[INT64(1)] [INT64(4)] [INT64(3)] [NULL]]`)
mcmp.AssertMatchesNoOrder("select distinct id from aggr_test", `[[INT64(1)] [INT64(2)] [INT64(3)] [INT64(5)] [INT64(4)] [INT64(6)] [INT64(7)] [INT64(8)]]`)

ver, _ := cluster.GetMajorVersion("vtgate")
if ver == 20 {
// We can only run the following tests for v20 only, because upgrade tests
// run against a released version of v21 that doesn't support these statements.

// Ensure DISTINCT on enum columns across shards works correctly.
mcmp.Exec("insert into example(id, foo) values (1, 'a'), (2, 'b')")
mcmp.AssertMatchesNoOrder("select distinct foo from example", `[[ENUM("a")] [ENUM("b")]]`)

// Exercise fallback hashing for unknown enum values. The vschema for the
// example_enum_unknown table deliberately omits the "c" enum member so that
// vtgate will treat it as unknown.
mcmp.Exec("insert into example_enum_unknown(id, foo) values (1, 'a'), (2, 'c')")
mcmp.AssertMatchesNoOrder("select distinct foo from example_enum_unknown", `[[ENUM("a")] [ENUM("c")]]`)

// Exercise fallback hashing for unknown set values. The vschema for the
// example_set_unknown table deliberately omits the "c" set member so that
// vtgate will treat it as unknown.
mcmp.Exec("insert into example_set_unknown(id, foo) values (1, 'a'), (2, 'c')")
mcmp.AssertMatchesNoOrder("select distinct foo from example_set_unknown", `[[SET("a")] [SET("c")]]`)
}

if utils.BinaryIsAtLeastAtVersion(17, "vtgate") {
mcmp.AssertMatches("select distinct val1 from aggr_test order by val1 desc", `[[VARCHAR("e")] [VARCHAR("d")] [VARCHAR("c")] [VARCHAR("b")] [VARCHAR("a")]]`)
mcmp.AssertMatchesNoOrder("select distinct val1, count(*) from aggr_test group by val1", `[[VARCHAR("a") INT64(2)] [VARCHAR("b") INT64(1)] [VARCHAR("c") INT64(2)] [VARCHAR("d") INT64(1)] [VARCHAR("e") INT64(2)]]`)
Expand Down
22 changes: 22 additions & 0 deletions go/test/endtoend/vtgate/queries/aggregation/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,25 @@ CREATE TABLE bet_logs (
game_id bigint,
PRIMARY KEY (id)
) ENGINE InnoDB;

create table example (
id bigint unsigned not null,
foo enum('a','b') not null,
primary key (id)
) Engine = InnoDB;

-- Table used to exercise unknown enum value fallback hashing. The vschema for this
-- table deliberately omits the "c" value so we can insert known and unknown enums.
create table example_enum_unknown (
id bigint unsigned not null,
foo enum('a','b','c') not null,
primary key (id)
) Engine = InnoDB;

-- Table used to exercise unknown set value fallback hashing. The vschema for this
-- table deliberately omits the "c" value so we can insert known and unknown sets.
create table example_set_unknown (
id bigint unsigned not null,
foo set('a','b','c') not null,
primary key (id)
) Engine = InnoDB;
45 changes: 45 additions & 0 deletions go/test/endtoend/vtgate/queries/aggregation/vschema.json
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,51 @@
"name": "hash"
}
]
},
"example": {
"column_vindexes": [
{
"column": "id",
"name": "hash"
}
],
"columns": [
{
"name": "foo",
"type": "ENUM",
"values": ["a", "b"]
}
]
},
"example_enum_unknown": {
"column_vindexes": [
{
"column": "id",
"name": "hash"
}
],
"columns": [
{
"name": "foo",
"type": "ENUM",
"values": ["a", "b"]
}
]
},
"example_set_unknown": {
"column_vindexes": [
{
"column": "id",
"name": "hash"
}
],
"columns": [
{
"name": "foo",
"type": "SET",
"values": ["a", "b"]
}
]
}
}
}
21 changes: 21 additions & 0 deletions go/vt/vtgate/evalengine/api_hash_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,27 @@ func TestHashCodesRandom(t *testing.T) {
t.Logf("tested %d values, with %d equalities found\n", tested, equal)
}

// TestEnumSetHashing exercises hashing for ENUM and SET values. We intentionally
// pass a limited EnumSetValues list to simulate a vschema that doesn't include
// all possible values. When hashing, known values should hash based on their
// ordinal/bitset, while unknown values fall back to hashing their raw string
// under the binary collation. We assert that known and unknown hashes differ.
func TestEnumSetHashing(t *testing.T) {
vals := EnumSetValues{"a", "b"}
// ENUM hashing: known vs unknown
hA, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Enum, []byte("a")), collations.CollationBinaryID, sqltypes.Enum, 0, &vals)
require.NoError(t, err)
hC, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Enum, []byte("c")), collations.CollationBinaryID, sqltypes.Enum, 0, &vals)
require.NoError(t, err)
assert.NotEqual(t, hA, hC)
// SET hashing: known vs unknown
hSetA, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Set, []byte("a")), collations.CollationBinaryID, sqltypes.Set, 0, &vals)
require.NoError(t, err)
hSetC, err := NullsafeHashcode(sqltypes.MakeTrusted(sqltypes.Set, []byte("c")), collations.CollationBinaryID, sqltypes.Set, 0, &vals)
require.NoError(t, err)
assert.NotEqual(t, hSetA, hSetC)
}

type equality bool

func (e equality) Operator() string {
Expand Down
19 changes: 19 additions & 0 deletions go/vt/vtgate/evalengine/eval_enum.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ package evalengine

import (
"vitess.io/vitess/go/hack"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/mysql/collations/colldata"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/vthash"
)

type evalEnum struct {
Expand Down Expand Up @@ -34,6 +37,22 @@ func (e *evalEnum) Scale() int32 {
return 0
}

// Hash implements the hashable interface for evalEnum.
// For enums that match their declared values list, we hash their ordinal value;
// for any values that could not be resolved to an ordinal (value==-1), we fall back to
// hashing their raw string bytes using the binary collation.
func (e *evalEnum) Hash(h *vthash.Hasher) {
if e.value == -1 {
// For unknown values, fall back to hashing the string contents.
h.Write16(hashPrefixBytes)
colldata.Lookup(collations.CollationBinaryID).Hash(h, hack.StringBytes(e.string), 0)
return
}
// Enums are positive integral values starting at zero internally.
h.Write16(hashPrefixIntegralPositive)
h.Write64(uint64(e.value))
}

func valueIdx(values *EnumSetValues, value string) int {
if values == nil {
return -1
Expand Down
20 changes: 20 additions & 0 deletions go/vt/vtgate/evalengine/eval_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ import (
"strings"

"vitess.io/vitess/go/hack"
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/mysql/collations/colldata"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/vthash"
)

type evalSet struct {
Expand Down Expand Up @@ -37,6 +40,23 @@ func (e *evalSet) Scale() int32 {
return 0
}

// Hash implements the hashable interface for evalSet.
// For sets where all elements resolve to ordinals in the declared set, we hash the bitset; otherwise,
// if the set string contained values that cannot be mapped to ordinals, we fall back to hashing
// the raw string using the binary collation. This ensures DISTINCT and other hash-based operations
// treat unknown sets as distinct based on their textual representation.
func (e *evalSet) Hash(h *vthash.Hasher) {
// MySQL allows storing an empty set as an empty string, which yields set==0 and string=="";
// unknown sets will have set==0 but non-empty string content.
if e.set == 0 && e.string != "" {
h.Write16(hashPrefixBytes)
colldata.Lookup(collations.CollationBinaryID).Hash(h, hack.StringBytes(e.string), 0)
return
}
h.Write16(hashPrefixIntegralPositive)
h.Write64(e.set)
}

func evalSetBits(values *EnumSetValues, value string) uint64 {
if values != nil && len(*values) > 64 {
// This never would happen as MySQL limits SET
Expand Down
Loading