vitessio · harshit-gangal · May 25, 2023 · Apr 28, 2023 · Apr 28, 2023 · Apr 29, 2023
diff --git a/go/test/endtoend/cluster/cluster_process.go b/go/test/endtoend/cluster/cluster_process.go
@@ -1313,3 +1313,19 @@ func (cluster *LocalProcessCluster) EnableVTOrcRecoveries(t *testing.T) {
 		vtorc.EnableGlobalRecoveries(t)
 	}
 }
+
+// EnableGeneralLog enables generals logs on all the mysql server started by this cluster.
+// This method should be used only for local debugging purpose.
+func (cluster *LocalProcessCluster) EnableGeneralLog() error {
+	for _, ks := range cluster.Keyspaces {
+		for _, shard := range ks.Shards {
+			for _, vttablet := range shard.Vttablets {
+				_, err := vttablet.VttabletProcess.QueryTablet("set global general_log = 1", "", false)
+				if err != nil {
+					return err
+				}
+			}
+		}
+	}
+	return nil
+}
diff --git a/go/test/endtoend/utils/cmp.go b/go/test/endtoend/utils/cmp.go
@@ -196,7 +196,7 @@ func (mcmp *MySQLCompare) Exec(query string) *sqltypes.Result {
 
 	mysqlQr, err := mcmp.MySQLConn.ExecuteFetch(query, 1000, true)
 	require.NoError(mcmp.t, err, "[MySQL Error] for query: "+query)
-	compareVitessAndMySQLResults(mcmp.t, query, vtQr, mysqlQr, false)
+	compareVitessAndMySQLResults(mcmp.t, query, mcmp.VtConn, vtQr, mysqlQr, false)
 	return vtQr
 }
 
@@ -222,7 +222,7 @@ func (mcmp *MySQLCompare) ExecWithColumnCompare(query string) *sqltypes.Result {
 
 	mysqlQr, err := mcmp.MySQLConn.ExecuteFetch(query, 1000, true)
 	require.NoError(mcmp.t, err, "[MySQL Error] for query: "+query)
-	compareVitessAndMySQLResults(mcmp.t, query, vtQr, mysqlQr, true)
+	compareVitessAndMySQLResults(mcmp.t, query, mcmp.VtConn, vtQr, mysqlQr, true)
 	return vtQr
 }
 
@@ -241,7 +241,7 @@ func (mcmp *MySQLCompare) ExecAllowAndCompareError(query string) (*sqltypes.Resu
 	// Since we allow errors, we don't want to compare results if one of the client failed.
 	// Vitess and MySQL should always be agreeing whether the query returns an error or not.
 	if vtErr == nil && mysqlErr == nil {
-		compareVitessAndMySQLResults(mcmp.t, query, vtQr, mysqlQr, false)
+		compareVitessAndMySQLResults(mcmp.t, query, mcmp.VtConn, vtQr, mysqlQr, false)
 	}
 	return vtQr, vtErr
 }

diff --git a/go/test/endtoend/utils/mysql.go b/go/test/endtoend/utils/mysql.go
@@ -154,7 +154,7 @@ func prepareMySQLWithSchema(params mysql.ConnParams, sql string) error {
 	return nil
 }
 
-func compareVitessAndMySQLResults(t *testing.T, query string, vtQr, mysqlQr *sqltypes.Result, compareColumns bool) {
+func compareVitessAndMySQLResults(t *testing.T, query string, vtConn *mysql.Conn, vtQr, mysqlQr *sqltypes.Result, compareColumns bool) {
 	if vtQr == nil && mysqlQr == nil {
 		return
 	}
@@ -207,6 +207,10 @@ func compareVitessAndMySQLResults(t *testing.T, query string, vtQr, mysqlQr *sql
 	for _, row := range mysqlQr.Rows {
 		errStr += fmt.Sprintf("%s\n", row)
 	}
+	if vtConn != nil {
+		qr := Exec(t, vtConn, fmt.Sprintf("vexplain plan %s", query))
+		errStr += fmt.Sprintf("query plan: \n%s\n", qr.Rows[0][0].ToString())
+	}
 	t.Error(errStr)
 }
 

diff --git a/go/test/endtoend/utils/utils.go b/go/test/endtoend/utils/utils.go
@@ -163,7 +163,7 @@ func ExecCompareMySQL(t *testing.T, vtConn, mysqlConn *mysql.Conn, query string)
 
 	mysqlQr, err := mysqlConn.ExecuteFetch(query, 1000, true)
 	require.NoError(t, err, "[MySQL Error] for query: "+query)
-	compareVitessAndMySQLResults(t, query, vtQr, mysqlQr, false)
+	compareVitessAndMySQLResults(t, query, vtConn, vtQr, mysqlQr, false)
 	return vtQr
 }
 

@@ -73,10 +73,12 @@ func TestGroupBy(t *testing.T) {
 	mcmp.Exec("insert into t3(id5, id6, id7) values(1,1,2), (2,2,4), (3,2,4), (4,1,2), (5,1,2), (6,3,6)")
 	// test ordering and group by int column
 	mcmp.AssertMatches("select id6, id7, count(*) k from t3 group by id6, id7 order by k", `[[INT64(3) INT64(6) INT64(1)] [INT64(2) INT64(4) INT64(2)] [INT64(1) INT64(2) INT64(3)]]`)
+	mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ id6+id7, count(*) k from t3 group by id6+id7 order by k", `[[INT64(9) INT64(1)] [INT64(6) INT64(2)] [INT64(3) INT64(3)]]`)
 
 	// Test the same queries in streaming mode
 	utils.Exec(t, mcmp.VtConn, "set workload = olap")
 	mcmp.AssertMatches("select id6, id7, count(*) k from t3 group by id6, id7 order by k", `[[INT64(3) INT64(6) INT64(1)] [INT64(2) INT64(4) INT64(2)] [INT64(1) INT64(2) INT64(3)]]`)
+	mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ id6+id7, count(*) k from t3 group by id6+id7 order by k", `[[INT64(9) INT64(1)] [INT64(6) INT64(2)] [INT64(3) INT64(3)]]`)
 }
 
 func TestDistinct(t *testing.T) {

@@ -0,0 +1,212 @@
+/*
+Copyright 2023 The Vitess Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package aggregation
+
+import (
+	"fmt"
+	"math/rand"
+	"strings"
+	"testing"
+	"time"
+
+	"golang.org/x/exp/maps"
+
+	"vitess.io/vitess/go/vt/log"
+)
+
+type (
+	column struct {
+		name string
+		typ  string
+	}
+	tableT struct {
+		name    string
+		columns []column
+	}
+)
+
+func TestFuzzAggregations(t *testing.T) {
+	// This test randomizes values and queries, and checks that mysql returns the same values that Vitess does
+	mcmp, closer := start(t)
+	defer closer()
+
+	noOfRows := rand.Intn(20)
+	var values []string
+	for i := 0; i < noOfRows; i++ {
+		values = append(values, fmt.Sprintf("(%d, 'name%d', 'value%d', %d)", i, i, i, i))
+	}
+	t1Insert := fmt.Sprintf("insert into t1 (t1_id, name, value, shardKey) values %s;", strings.Join(values, ","))
+	values = nil
+	noOfRows = rand.Intn(20)
+	for i := 0; i < noOfRows; i++ {
+		values = append(values, fmt.Sprintf("(%d, %d)", i, i))
+	}
+	t2Insert := fmt.Sprintf("insert into t2 (id, shardKey) values %s;", strings.Join(values, ","))
+
+	mcmp.Exec(t1Insert)
+	mcmp.Exec(t2Insert)
+
+	t.Cleanup(func() {
+		if t.Failed() {
+			fmt.Println(t1Insert)
+			fmt.Println(t2Insert)
+		}
+	})
+
+	schema := map[string]tableT{
+		"t1": {name: "t1", columns: []column{
+			{name: "t1_id", typ: "bigint"},
+			{name: "name", typ: "varchar"},
+			{name: "value", typ: "varchar"},
+			{name: "shardKey", typ: "bigint"},
+		}},
+		"t2": {name: "t2", columns: []column{
+			{name: "id", typ: "bigint"},
+			{name: "shardKey", typ: "bigint"},
+		}},
+	}
+
+	endBy := time.Now().Add(1 * time.Second)
+	schemaTables := maps.Values(schema)
+
+	var queryCount int
+	for time.Now().Before(endBy) || t.Failed() {
+		tables := createTables(schemaTables)
+		query := randomQuery(tables, 3, 3)
+		mcmp.Exec(query)
+		if t.Failed() {
+			fmt.Println(query)
+		}
+		queryCount++
+	}
+	log.Info("Queries successfully executed: %d", queryCount)
+}
+
+func randomQuery(tables []tableT, maxAggrs, maxGroupBy int) string {
+	randomCol := func(tblIdx int) (string, string) {
+		tbl := tables[tblIdx]
+		col := randomEl(tbl.columns)
+		return fmt.Sprintf("tbl%d.%s", tblIdx, col.name), col.typ
+	}
+	predicates := createPredicates(tables, randomCol)
+	aggregates := createAggregations(tables, maxAggrs, randomCol)
+	grouping := createGroupBy(tables, maxGroupBy, randomCol)
+	sel := "select /*vt+ PLANNER=Gen4 */ " + strings.Join(aggregates, ", ") + " from "
+
+	var tbls []string
+	for i, s := range tables {
+		tbls = append(tbls, fmt.Sprintf("%s as tbl%d", s.name, i))
+	}
+	sel += strings.Join(tbls, ", ")
+
+	if len(predicates) > 0 {
+		sel += " where "
+		sel += strings.Join(predicates, " and ")
+	}
+	if len(grouping) > 0 {
+		sel += " group by "
+		sel += strings.Join(grouping, ", ")
+	}
+	// we do it this way so we don't have to do only `only_full_group_by` queries
+	var noOfOrderBy int
+	if len(grouping) > 0 {
+		// panic on rand function call if value is 0
+		noOfOrderBy = rand.Intn(len(grouping))
+	}
+	if noOfOrderBy > 0 {
+		noOfOrderBy = 0 // TODO turning on ORDER BY here causes lots of failures to happen
+	}
+	if noOfOrderBy > 0 {
+		var orderBy []string
+		for noOfOrderBy > 0 {
+			noOfOrderBy--
+			if rand.Intn(2) == 0 || len(grouping) == 0 {
+				orderBy = append(orderBy, randomEl(aggregates))
+			} else {
+				orderBy = append(orderBy, randomEl(grouping))
+			}
+		}
+		sel += " order by "
+		sel += strings.Join(orderBy, ", ")
+	}
+	return sel
+}
+
+func createGroupBy(tables []tableT, maxGB int, randomCol func(tblIdx int) (string, string)) (grouping []string) {
+	noOfGBs := rand.Intn(maxGB)
+	for i := 0; i < noOfGBs; i++ {
+		tblIdx := rand.Intn(len(tables))
+		col, _ := randomCol(tblIdx)
+		grouping = append(grouping, col)
+	}
+	return
+}
+
+func createAggregations(tables []tableT, maxAggrs int, randomCol func(tblIdx int) (string, string)) (aggregates []string) {
+	aggregations := []func(string) string{
+		func(_ string) string { return "count(*)" },
+		func(e string) string { return fmt.Sprintf("count(%s)", e) },
+		//func(e string) string { return fmt.Sprintf("sum(%s)", e) },
+		//func(e string) string { return fmt.Sprintf("avg(%s)", e) },
+		//func(e string) string { return fmt.Sprintf("min(%s)", e) },
+		//func(e string) string { return fmt.Sprintf("max(%s)", e) },
+	}
+
+	noOfAggrs := rand.Intn(maxAggrs) + 1
+	for i := 0; i < noOfAggrs; i++ {
+		tblIdx := rand.Intn(len(tables))
+		e, _ := randomCol(tblIdx)
+		aggregates = append(aggregates, randomEl(aggregations)(e))
+	}
+	return aggregates
+}
+
+func createTables(schemaTables []tableT) []tableT {
+	noOfTables := rand.Intn(2) + 1
+	var tables []tableT
+
+	for i := 0; i < noOfTables; i++ {
+		tables = append(tables, randomEl(schemaTables))
+	}
+	return tables
+}
+
+func createPredicates(tables []tableT, randomCol func(tblIdx int) (string, string)) (predicates []string) {
+	for idx1 := range tables {
+		for idx2 := range tables {
+			if idx1 == idx2 {
+				continue
+			}
+			noOfPredicates := rand.Intn(2)
+
+			for noOfPredicates > 0 {
+				col1, t1 := randomCol(idx1)
+				col2, t2 := randomCol(idx2)
+				if t1 != t2 {
+					continue
+				}
+				predicates = append(predicates, fmt.Sprintf("%s = %s", col1, col2))
+				noOfPredicates--
+			}
+		}
+	}
+	return predicates
+}
+
+func randomEl[K any](in []K) K {
+	return in[rand.Intn(len(in))]
+}
@@ -66,6 +66,8 @@ func TestOrderBy(t *testing.T) {
 	mcmp.AssertMatches("select id1, id2 from t4 order by id2 desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
 	// test ordering of int column
 	mcmp.AssertMatches("select id1, id2 from t4 order by id1 desc", `[[INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(5) VARCHAR("test")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
+	// test ordering of complex column
+	mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ id1, id2 from t4 order by reverse(id2) desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(2) VARCHAR("Abc")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(1) VARCHAR("a")]]`)
 
 	defer func() {
 		utils.Exec(t, mcmp.VtConn, "set workload = oltp")
@@ -75,4 +77,5 @@ func TestOrderBy(t *testing.T) {
 	utils.Exec(t, mcmp.VtConn, "set workload = olap")
 	mcmp.AssertMatches("select id1, id2 from t4 order by id2 desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
 	mcmp.AssertMatches("select id1, id2 from t4 order by id1 desc", `[[INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(5) VARCHAR("test")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(2) VARCHAR("Abc")] [INT64(1) VARCHAR("a")]]`)
+	mcmp.AssertMatches("select /*vt+ PLANNER=Gen4 */ id1, id2 from t4 order by reverse(id2) desc", `[[INT64(5) VARCHAR("test")] [INT64(8) VARCHAR("F")] [INT64(7) VARCHAR("e")] [INT64(6) VARCHAR("d")] [INT64(2) VARCHAR("Abc")] [INT64(4) VARCHAR("c")] [INT64(3) VARCHAR("b")] [INT64(1) VARCHAR("a")]]`)
 }
@@ -2867,6 +2867,33 @@ type (
 	}
 
 	CountStar struct {
+		_ bool
+		// TL;DR; This makes sure that reference equality checks works as expected
+		//
+		// You're correct that this might seem a bit strange at first glance.
+		// It's a quirk of Go's handling of empty structs. In Go, two instances of an empty struct are considered
+		// identical, which can be problematic when using these as keys in maps.
+		// They would be treated as the same key and potentially lead to incorrect map behavior.
+		//
+		// Here's a brief example:
+		//
+		// ```golang
+		// func TestWeirdGo(t *testing.T) {
+		// 	type CountStar struct{}
+		//
+		// 	cs1 := &CountStar{}
+		// 	cs2 := &CountStar{}
+		//  if cs1 == cs2 {
+		// 	  panic("what the what!?")
+		//  }
+		// }
+		// ```
+		//
+		// In the above code, cs1 and cs2, despite being distinct variables, would be treated as the same object.
+		//
+		// The solution we employed was to add a dummy field `_ bool` to the otherwise empty struct `CountStar`.
+		// This ensures that each instance of `CountStar` is treated as a separate object,
+		// even in the context of out semantic state which uses these objects as map keys.
 	}
 
 	Avg struct {

@@ -933,6 +933,11 @@ func writeEscapedString(buf *TrackedBuffer, original string) {
 	buf.WriteByte('`')
 }
 
+func CompliantString(in SQLNode) string {
+	s := String(in)
+	return compliantName(s)
+}
+
 func compliantName(in string) string {
 	var buf strings.Builder
 	for i, c := range in {

@@ -75,11 +75,15 @@ func (r *ReservedVars) ReserveAll(names ...string) bool {
 // with the same name already exists, it'll be suffixed with a numberic identifier
 // to make it unique.
 func (r *ReservedVars) ReserveColName(col *ColName) string {
-	compliantName := col.CompliantName()
-	if r.fast && strings.HasPrefix(compliantName, r.prefix) {
-		compliantName = "_" + compliantName
+	reserveName := col.CompliantName()
+	if r.fast && strings.HasPrefix(reserveName, r.prefix) {
+		reserveName = "_" + reserveName
 	}
 
+	return r.ReserveVariable(reserveName)
+}
+
+func (r *ReservedVars) ReserveVariable(compliantName string) string {
 	joinVar := []byte(compliantName)
 	baseLen := len(joinVar)
 	i := int64(1)