diff --git a/enginetest/join_op_tests.go b/enginetest/join_op_tests.go index dd8f0dd486..b47dbfb343 100644 --- a/enginetest/join_op_tests.go +++ b/enginetest/join_op_tests.go @@ -2339,6 +2339,30 @@ WHERE }, }, }, + { + name: "join on string and number columns", + setup: [][]string{ + { + "create table t0(c0 varchar(500) primary key, c1 int)", + "create table t1(c0 int primary key, c1 varchar(500))", + "insert into t0(c0) values (5), (33), (223), ('123a')", + "insert into t1(c0) values (5), (33), (223), (123)", + "insert into t1(c0) values (-1)", + "insert into t0(c0, c1) values (false, -2)", + }, + }, + tests: []JoinOpTests{ + { + Query: "select t0.c0, t1.c0 from t0 join t1 on t0.c0 = t1.c0 order by t1.c0;", + Expected: []sql.Row{{"5", 5}, {"33", 33}, {"123a", 123}, {"223", 223}}, + }, + { + // https://github.com/dolthub/dolt/issues/10435 + Query: "select * from t1 inner join t0 on (t1.c0 between t0.c1 and t0.c0)", + Expected: []sql.Row{{-1, nil, "0", -2}}, + }, + }, + }, } var rangeJoinOpTests = []JoinOpTests{ diff --git a/enginetest/queries/query_plans.go b/enginetest/queries/query_plans.go index aa398a0a43..475965fae6 100644 --- a/enginetest/queries/query_plans.go +++ b/enginetest/queries/query_plans.go @@ -24358,23 +24358,20 @@ order by x, y; "", }, { - Query: `select * from bigtable join xy where t between x and y;`, + Query: `select * from bigtable join xy where n between x and y;`, ExpectedPlan: "RangeHeapJoin\n" + " ├─ AND\n" + " │ ├─ GreaterThanOrEqual\n" + - " │ │ ├─ bigtable.t:0!null\n" + + " │ │ ├─ bigtable.n:1\n" + " │ │ └─ xy.x:2!null\n" + " │ └─ LessThanOrEqual\n" + - " │ ├─ bigtable.t:0!null\n" + + " │ ├─ bigtable.n:1\n" + " │ └─ xy.y:3\n" + - " ├─ IndexedTableAccess(bigtable)\n" + - " │ ├─ index: [bigtable.t]\n" + - " │ ├─ static: [{[NULL, ∞)}]\n" + - " │ ├─ colSet: (1,2)\n" + - " │ ├─ tableId: 1\n" + - " │ └─ Table\n" + - " │ ├─ name: bigtable\n" + - " │ └─ columns: [t n]\n" + + " ├─ Sort(bigtable.n:1 ASC nullsFirst)\n" + + " │ └─ ProcessTable\n" + + " │ └─ Table\n" + + " │ ├─ name: bigtable\n" + + " │ └─ columns: [t n]\n" + " └─ IndexedTableAccess(xy)\n" + " ├─ index: [xy.x]\n" + " ├─ static: [{[NULL, ∞)}]\n" + @@ -24385,22 +24382,22 @@ order by x, y; " └─ columns: [x y]\n" + "", ExpectedEstimates: "RangeHeapJoin (estimated cost=7000.000 rows=17)\n" + - " ├─ ((bigtable.t >= xy.x) AND (bigtable.t <= xy.y))\n" + - " ├─ IndexedTableAccess(bigtable)\n" + - " │ ├─ index: [bigtable.t]\n" + - " │ ├─ filters: [{[NULL, ∞)}]\n" + - " │ └─ columns: [t n]\n" + + " ├─ ((bigtable.n >= xy.x) AND (bigtable.n <= xy.y))\n" + + " ├─ Sort(bigtable.n ASC)\n" + + " │ └─ Table\n" + + " │ ├─ name: bigtable\n" + + " │ └─ columns: [t n]\n" + " └─ IndexedTableAccess(xy)\n" + " ├─ index: [xy.x]\n" + " ├─ filters: [{[NULL, ∞)}]\n" + " └─ columns: [x y]\n" + "", - ExpectedAnalysis: "RangeHeapJoin (estimated cost=7000.000 rows=17) (actual rows=14 loops=1)\n" + - " ├─ ((bigtable.t >= xy.x) AND (bigtable.t <= xy.y))\n" + - " ├─ IndexedTableAccess(bigtable)\n" + - " │ ├─ index: [bigtable.t]\n" + - " │ ├─ filters: [{[NULL, ∞)}]\n" + - " │ └─ columns: [t n]\n" + + ExpectedAnalysis: "RangeHeapJoin (estimated cost=7000.000 rows=17) (actual rows=8 loops=1)\n" + + " ├─ ((bigtable.n >= xy.x) AND (bigtable.n <= xy.y))\n" + + " ├─ Sort(bigtable.n ASC)\n" + + " │ └─ Table\n" + + " │ ├─ name: bigtable\n" + + " │ └─ columns: [t n]\n" + " └─ IndexedTableAccess(xy)\n" + " ├─ index: [xy.x]\n" + " ├─ filters: [{[NULL, ∞)}]\n" + diff --git a/sql/analyzer/indexed_joins.go b/sql/analyzer/indexed_joins.go index 7dfb736bec..75f7897ffd 100644 --- a/sql/analyzer/indexed_joins.go +++ b/sql/analyzer/indexed_joins.go @@ -1033,6 +1033,15 @@ func addRangeHeapJoin(m *memo.Memo) error { return nil } + valType := valueColRef.Type() + // TODO: Incompatible sort orders between the value and min columns would be fine if we sorted the tables + // using the same sort order (for example, if value is a number type column and min is a string, we sort + // the right table based on min converted to a number). Incompatible sort orders between value and max + // columns could be fine depending on the heap implementation and if we updated the range heap join iter to + // use a compare expression instead of hard-coding it to use maxColRef.Type().Compare + if !compatibleSortOrders(valType, minColRef.Type()) || !compatibleSortOrders(valType, maxColRef.Type()) { + return nil + } leftIndexScans, err := sortedIndexScansForTableCol(m.Ctx, m.StatsProvider(), leftTab, lIndexes, valueColRef, join.Left.RelProps.FuncDeps().Constants(), lFilters) if err != nil { return err @@ -1147,7 +1156,7 @@ func addMergeJoins(ctx *sql.Context, m *memo.Memo) error { } // check that comparer is not non-decreasing - if !canMergeTypes(l.Type(), r.Type()) || !isWeaklyMonotonic(l) || !isWeaklyMonotonic(r) { + if !compatibleSortOrders(l.Type(), r.Type()) || !isWeaklyMonotonic(l) || !isWeaklyMonotonic(r) { continue } @@ -1491,15 +1500,18 @@ func makeIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, tab plan.Table }, true, nil } -// canMerge checks the types of two columns to see if they can be merged into one another if sorted. -func canMergeTypes(t1, t2 sql.Type) bool { - // TODO: handle other types here. For example, Number and Text types likely can't be merged together. But we need to - // add more testing https://github.com/dolthub/dolt/issues/10316 +// compatibleSortOrders checks the types of two columns to see if they can be merged into one another if sorted. +func compatibleSortOrders(t1, t2 sql.Type) bool { + // TODO: handle other types here https://github.com/dolthub/dolt/issues/10316 switch { case types.IsEnum(t1): if types.IsEnum(t2) { return types.TypesEqual(t1, t2) } + case types.IsNumber(t1): + return !types.IsText(t2) + case types.IsText(t1): + return !types.IsNumber(t2) } return true }