From cc619a724fbfd2eceba0730e1d05620dd39be533 Mon Sep 17 00:00:00 2001 From: Shivam Saraf Date: Wed, 15 Mar 2023 11:02:43 -0400 Subject: [PATCH] sql: enabling forward indexes and ORDERBY on JSONB columns Currently, #97928 outlines the scheme for JSONB encoding and decoding for forward indexes. However, the PR doesn't enable this feature to our users. This current PR aims to allow forward indexes on JSONB columns. The presence of a lexicographical ordering, as described in #97928, shall now allow primary and secondary indexes on JSONB columns along with the ability to use ORDER BY filter in their queries. Additionally, JSON values consist of decimal numbers and containers, such as Arrays and Objects, which can contain these decimal numbers. In order to preserve the values after the decimal, JSONB columns are now required to be composite in nature. This shall enable such values to be stored in both the key and the value side of a K/V pair in hopes of receiving the exact value. Fixes: #35706 Release note (sql change): This PR adds support for enabling forward indexes and ordering on JSON values. Epic: CRDB-24501 --- .../tests/3node-tenant/generated_test.go | 7 + pkg/sql/catalog/colinfo/col_type_info.go | 5 +- .../catalog/colinfo/column_type_properties.go | 2 +- .../colinfo/column_type_properties_test.go | 2 +- pkg/sql/catalog/table_col_set.go | 3 + pkg/sql/catalog/tabledesc/structured.go | 12 +- pkg/sql/colenc/encode.go | 3 + pkg/sql/colenc/key.go | 15 + pkg/sql/colencoding/key_encoding.go | 21 +- .../testdata/logic_test/distsql_stats | 4 +- .../testdata/logic_test/expression_index | 183 ++++++- .../testdata/logic_test/inverted_index | 10 - .../logic_test/inverted_index_multi_column | 95 +++- pkg/sql/logictest/testdata/logic_test/json | 4 +- .../logictest/testdata/logic_test/json_index | 509 ++++++++++++++++++ .../logictest/testdata/logic_test/pg_catalog | 54 +- .../tests/fakedist-disk/generated_test.go | 7 + .../tests/fakedist-vec-off/generated_test.go | 7 + .../tests/fakedist/generated_test.go | 7 + .../generated_test.go | 7 + .../local-mixed-22.2-23.1/generated_test.go | 7 + .../tests/local-vec-off/generated_test.go | 7 + .../logictest/tests/local/generated_test.go | 7 + pkg/sql/opt/exec/execbuilder/testdata/json | 401 ++++++++++++++ pkg/sql/opt/exec/execbuilder/testdata/stats | 4 + .../execbuilder/tests/local/generated_test.go | 7 + pkg/sql/opt/indexrec/candidate.go | 16 +- pkg/sql/opt/indexrec/hypothetical_table.go | 5 +- pkg/sql/opt/norm/testdata/rules/inline | 8 +- pkg/sql/opt/norm/testdata/rules/project | 4 +- pkg/sql/opt/optbuilder/orderby.go | 2 - pkg/sql/opt/xform/testdata/external/customer | 5 +- pkg/sql/rowenc/index_encoding.go | 2 +- pkg/sql/rowenc/index_encoding_test.go | 96 +++- pkg/sql/sem/tree/datum.go | 39 ++ pkg/sql/sem/tree/eval.go | 2 + pkg/sql/ttl/ttljob/ttljob_test.go | 5 +- 37 files changed, 1487 insertions(+), 87 deletions(-) create mode 100644 pkg/sql/logictest/testdata/logic_test/json_index create mode 100644 pkg/sql/opt/exec/execbuilder/testdata/json diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index a6b4400ccd1d..4935a2779e72 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -1088,6 +1088,13 @@ func TestTenantLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestTenantLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestTenantLogic_kv_builtin_functions_tenant( t *testing.T, ) { diff --git a/pkg/sql/catalog/colinfo/col_type_info.go b/pkg/sql/catalog/colinfo/col_type_info.go index 98d21abdbc5e..ec400e70b3fd 100644 --- a/pkg/sql/catalog/colinfo/col_type_info.go +++ b/pkg/sql/catalog/colinfo/col_type_info.go @@ -146,7 +146,7 @@ func ColumnTypeIsIndexable(t *types.T) bool { // using an inverted index. func ColumnTypeIsInvertedIndexable(t *types.T) bool { switch t.Family() { - case types.ArrayFamily, types.StringFamily: + case types.JsonFamily, types.ArrayFamily, types.StringFamily: return true } return ColumnTypeIsOnlyInvertedIndexable(t) @@ -162,7 +162,6 @@ func ColumnTypeIsOnlyInvertedIndexable(t *types.T) bool { t = t.ArrayContents() } switch t.Family() { - case types.JsonFamily: case types.GeographyFamily: case types.GeometryFamily: case types.TSVectorFamily: @@ -183,7 +182,7 @@ func MustBeValueEncoded(semanticType *types.T) bool { default: return MustBeValueEncoded(semanticType.ArrayContents()) } - case types.JsonFamily, types.TupleFamily, types.GeographyFamily, types.GeometryFamily: + case types.TupleFamily, types.GeographyFamily, types.GeometryFamily: return true case types.TSVectorFamily, types.TSQueryFamily: return true diff --git a/pkg/sql/catalog/colinfo/column_type_properties.go b/pkg/sql/catalog/colinfo/column_type_properties.go index 7eeeddbb3cea..96ed30d5a633 100644 --- a/pkg/sql/catalog/colinfo/column_type_properties.go +++ b/pkg/sql/catalog/colinfo/column_type_properties.go @@ -52,6 +52,7 @@ func CanHaveCompositeKeyEncoding(typ *types.T) bool { switch typ.Family() { case types.FloatFamily, types.DecimalFamily, + types.JsonFamily, types.CollatedStringFamily: return true case types.ArrayFamily: @@ -75,7 +76,6 @@ func CanHaveCompositeKeyEncoding(typ *types.T) bool { types.UuidFamily, types.INetFamily, types.TimeFamily, - types.JsonFamily, types.TimeTZFamily, types.BitFamily, types.GeometryFamily, diff --git a/pkg/sql/catalog/colinfo/column_type_properties_test.go b/pkg/sql/catalog/colinfo/column_type_properties_test.go index fb985eaee505..9a1d1a0310df 100644 --- a/pkg/sql/catalog/colinfo/column_type_properties_test.go +++ b/pkg/sql/catalog/colinfo/column_type_properties_test.go @@ -52,7 +52,7 @@ func TestCanHaveCompositeKeyEncoding(t *testing.T) { {types.IntArray, false}, {types.Interval, false}, {types.IntervalArray, false}, - {types.Jsonb, false}, + {types.Jsonb, true}, {types.Name, false}, {types.Oid, false}, {types.String, false}, diff --git a/pkg/sql/catalog/table_col_set.go b/pkg/sql/catalog/table_col_set.go index 1476afb6a97a..166a0f5e8a5a 100644 --- a/pkg/sql/catalog/table_col_set.go +++ b/pkg/sql/catalog/table_col_set.go @@ -56,6 +56,9 @@ func (s TableColSet) ForEach(f func(col descpb.ColumnID)) { s.set.ForEach(func(i int) { f(descpb.ColumnID(i)) }) } +// Copy returns a copy of s which can be modified independently. +func (s TableColSet) Copy() TableColSet { return TableColSet{set: s.set.Copy()} } + // SubsetOf returns true if s is a subset of other. func (s TableColSet) SubsetOf(other TableColSet) bool { return s.set.SubsetOf(other.set) diff --git a/pkg/sql/catalog/tabledesc/structured.go b/pkg/sql/catalog/tabledesc/structured.go index 9037cc825dc5..465edcc9a441 100644 --- a/pkg/sql/catalog/tabledesc/structured.go +++ b/pkg/sql/catalog/tabledesc/structured.go @@ -714,6 +714,14 @@ func (desc *Mutable) allocateIndexIDs(columnNames map[string]descpb.ColumnID) er colIDs = idx.CollectKeyColumnIDs() } + // Inverted indexes don't store composite values in the individual + // paths present. The composite values will be differentiated in + // the primary index itself. + compositeColIDsLocal := compositeColIDs.Copy() + if isInverted { + compositeColIDsLocal.Remove(invID) + } + // StoreColumnIDs are derived from StoreColumnNames just like KeyColumnIDs // derives from KeyColumnNames. // For primary indexes this set of columns is typically defined as the set @@ -755,12 +763,12 @@ func (desc *Mutable) allocateIndexIDs(columnNames map[string]descpb.ColumnID) er // or in the primary key whose type has a composite encoding, like DECIMAL // for instance. for _, colID := range idx.IndexDesc().KeyColumnIDs { - if compositeColIDs.Contains(colID) { + if compositeColIDsLocal.Contains(colID) { idx.IndexDesc().CompositeColumnIDs = append(idx.IndexDesc().CompositeColumnIDs, colID) } } for _, colID := range idx.IndexDesc().KeySuffixColumnIDs { - if compositeColIDs.Contains(colID) { + if compositeColIDsLocal.Contains(colID) { idx.IndexDesc().CompositeColumnIDs = append(idx.IndexDesc().CompositeColumnIDs, colID) } } diff --git a/pkg/sql/colenc/encode.go b/pkg/sql/colenc/encode.go index b1a35cb5c4b4..f52ae26703f5 100644 --- a/pkg/sql/colenc/encode.go +++ b/pkg/sql/colenc/encode.go @@ -707,6 +707,9 @@ func isComposite(vec coldata.Vec, row int) bool { case types.DecimalFamily: d := tree.DDecimal{Decimal: vec.Decimal()[row]} return d.IsComposite() + case types.JsonFamily: + j := tree.DJSON{JSON: vec.JSON().Get(row)} + return j.IsComposite() default: d := vec.Datum().Get(row) if cdatum, ok := d.(tree.CompositeDatum); ok { diff --git a/pkg/sql/colenc/key.go b/pkg/sql/colenc/key.go index f8f057926a5d..598cc708d051 100644 --- a/pkg/sql/colenc/key.go +++ b/pkg/sql/colenc/key.go @@ -180,6 +180,21 @@ func encodeKeys[T []byte | roachpb.Key]( } kys[r] = b } + case types.JsonFamily: + jsonVector := vec.JSON() + for r := 0; r < count; r++ { + b := kys[r] + if partialIndexAndNullCheck(kys, r, start, nulls, dir) { + continue + } + var err error + jsonObj := jsonVector.Get(r + start) + b, err = jsonObj.EncodeForwardIndex(b, dir) + if err != nil { + return err + } + kys[r] = b + } default: if buildutil.CrdbTestBuild { if typeconv.TypeFamilyToCanonicalTypeFamily(typ.Family()) != typeconv.DatumVecCanonicalTypeFamily { diff --git a/pkg/sql/colencoding/key_encoding.go b/pkg/sql/colencoding/key_encoding.go index 32f6641a3532..10e72b385b99 100644 --- a/pkg/sql/colencoding/key_encoding.go +++ b/pkg/sql/colencoding/key_encoding.go @@ -187,12 +187,21 @@ func decodeTableKeyToCol( } vecs.IntervalCols[colIdx][rowIdx] = d case types.JsonFamily: - // Don't attempt to decode the JSON value. Instead, just return the - // remaining bytes of the key. - var jsonLen int - jsonLen, err = encoding.PeekLength(key) - vecs.JSONCols[colIdx].Bytes.Set(rowIdx, key[:jsonLen]) - rkey = key[jsonLen:] + // Decode the JSON, and then store the bytes in the + // vector in the value-encoded format. + // TODO (shivam): Make it possible for the vector to store + // key-encoded JSONs instead of value-encoded JSONs. + var d tree.Datum + encDir := encoding.Ascending + if dir == catenumpb.IndexColumn_DESC { + encDir = encoding.Descending + } + d, rkey, err = keyside.Decode(da, valType, key, encDir) + json, ok := d.(*tree.DJSON) + if !ok { + return nil, false, scratch, errors.AssertionFailedf("Could not type assert into DJSON") + } + vecs.JSONCols[colIdx].Set(rowIdx, json.JSON) case types.EncodedKeyFamily: // Don't attempt to decode the inverted key. keyLen, err := encoding.PeekLength(key) diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index 5737db76796d..0fd7e84b4f4b 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -1257,7 +1257,7 @@ ORDER BY statistics_name column_names row_count null_count has_histogram s {a} 3 0 true s {b} 3 0 true -s {j} 3 0 false +s {j} 3 0 true s {rowid} 3 0 true # Test that non-index columns have histograms collected for them, with @@ -2348,7 +2348,7 @@ SHOW STATISTICS USING JSON FOR TABLE j; statement ok ALTER TABLE j INJECT STATISTICS '$j_stats' -statement error pq: cannot create partial statistics on an inverted index column +statement error pq: table j does not contain a non-partial forward index with j as a prefix column CREATE STATISTICS j_partial ON j FROM j USING EXTREMES; statement ok diff --git a/pkg/sql/logictest/testdata/logic_test/expression_index b/pkg/sql/logictest/testdata/logic_test/expression_index index 3f242f89f0e0..21206bdf7746 100644 --- a/pkg/sql/logictest/testdata/logic_test/expression_index +++ b/pkg/sql/logictest/testdata/logic_test/expression_index @@ -111,15 +111,9 @@ CREATE TABLE err (a INT, INDEX (a, (NULL))) statement ok CREATE TABLE t_null_cast (a INT, INDEX (a, (NULL::TEXT))) -statement error index element j->'a' of type jsonb is not indexable -CREATE TABLE err (a INT, j JSON, INDEX (a, (j->'a'))) - statement error index element \(a, b\) of type record is not indexable CREATE TABLE err (a INT, b INT, INDEX (a, (row(a, b)))) -statement error index element j->'a' of type jsonb is not allowed as a prefix column in an inverted index.*\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html -CREATE TABLE err (a INT, j JSON, INVERTED INDEX ((j->'a'), j)) - statement error index element a \+ b of type int is not allowed as the last column in an inverted index.*\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html CREATE TABLE err (a INT, b INT, INVERTED INDEX (a, (a + b))) @@ -340,15 +334,21 @@ CREATE INDEX err ON t (a, (NULL), b) statement ok CREATE INDEX t_cast_idx ON t (a, (NULL::TEXT), b) -statement error index element j->'a' of type jsonb is not indexable +statement ok CREATE INDEX err ON t (a, (j->'a')); +statement ok +DROP INDEX err + statement error index element \(a, b\) of type record is not indexable CREATE INDEX err ON t (a, (row(a, b))); -statement error index element j->'a' of type jsonb is not allowed as a prefix column in an inverted index.*\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html +statement ok CREATE INVERTED INDEX err ON t ((j->'a'), j); +statement ok +DROP INDEX err + statement error index element a \+ b of type int is not allowed as the last column in an inverted index.*\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html CREATE INVERTED INDEX err ON t (a, (a + b)); @@ -639,6 +639,68 @@ SELECT k, a, b, c, comp FROM t@t_lower_c_a_plus_b_idx WHERE lower(c) = 'foo' AND k a b c comp 2 20 200 FOO 30 +statement ok +CREATE TABLE d (a INT, j JSON); +CREATE INDEX json_expr_index on d ((j->'a')) + +statement ok +INSERT INTO d VALUES + (1, '{"a": "hello"}'), + (2, '{"a": "b"}'), + (3, '{"a": "bye"}'), + (4, '{"a": "json"}') + +query IT +SELECT a, j from d@json_expr_index where j->'a' = '"b"' ORDER BY a +---- +2 {"a": "b"} + +query IT +SELECT a, j from d@json_expr_index where j->'a' = '"b"' OR j->'a' = '"bye"' ORDER BY a +---- +2 {"a": "b"} +3 {"a": "bye"} + +query IT +SELECT a, j from d@json_expr_index where j->'a' > '"a"' ORDER BY a +---- +1 {"a": "hello"} +2 {"a": "b"} +3 {"a": "bye"} +4 {"a": "json"} + +query IT +SELECT a, j from d@json_expr_index where j->'a' <= '"hello"' ORDER BY a +---- +1 {"a": "hello"} +2 {"a": "b"} +3 {"a": "bye"} + + +statement ok +INSERT INTO d VALUES + (5, '{"a": "forward", "json": "inverted"}'), + (6, '{"a": "c", "b": "d"}') + + +statement ok +CREATE INVERTED INDEX json_inv on d ((j->'a'), j) + +query IT +SELECT a, j from d@json_inv where j->'a' = '"forward"' AND j->'json' = '"inverted"' ORDER BY a +---- +5 {"a": "forward", "json": "inverted"} + +query IT +SELECT a, j from d@json_inv where j->'a' = '"c"' AND j->'json' = '"inverted"' ORDER BY a +---- + +query IT +SELECT a, j from d@json_inv where j->'a' = '"c"' AND j->'b' = '"d"' ORDER BY a +---- +6 {"a": "c", "b": "d"} + + # Backfilling expression indexes. statement ok @@ -931,6 +993,97 @@ SELECT i, j FROM inv@i_plus_100_j_a WHERE i+100 = 101 AND j->'a' @> '"x"' ORDER 1 {"a": ["x", "y", "z"]} 1 {"a": [1, "x"]} +# Backfilling JSON expression indexes + +statement ok +CREATE TABLE json_backfill ( + k INT PRIMARY KEY, + j JSON, + INDEX forward_expr ((j->'a')), + INDEX forward (j) +) + +statement ok +INSERT INTO json_backfill VALUES + (1, '[1, 2, 3]'), + (2, '{"a": [1, 2, 3], "b": [4, 5, 6]}'), + (3, '{"a": {"a": "b"}, "d": {"e": [1, 2, 3]}}'), + (4, '{"a": [4, 5]}') + +query T +SELECT j from json_backfill@forward_expr where j->'a' IN ('[1, 2, 3]', '[4,5]') ORDER BY k +---- +{"a": [1, 2, 3], "b": [4, 5, 6]} +{"a": [4, 5]} + +query T +SELECT j from json_backfill@forward_expr where j->'a' = '{"a": "b"}' ORDER BY k +---- +{"a": {"a": "b"}, "d": {"e": [1, 2, 3]}} + +query T +SELECT j from json_backfill@forward_expr where j->'a' > '{"a": "b"}' ORDER BY k +---- + +query T +SELECT j from json_backfill@forward_expr where j->'a' < '{"a": "b"}' ORDER BY k +---- +{"a": [1, 2, 3], "b": [4, 5, 6]} +{"a": [4, 5]} + +query T +SELECT j from json_backfill@forward where j = '[1, 2, 3]' ORDER BY k +---- +[1, 2, 3] + +query T +SELECT j from json_backfill@forward where j = '{"a": [4, 5]}' OR j = '[1, 2, 3]' ORDER BY k +---- +[1, 2, 3] +{"a": [4, 5]} + + +statement ok +DROP INDEX forward_expr; +DROP INDEX forward; + +statement ok +CREATE INDEX forward_expr on json_backfill ((j->'a')); +CREATE INDEX forward on json_backfill (j); + +query T +SELECT j from json_backfill@forward_expr where j->'a' IN ('[1, 2, 3]', '[4,5]') ORDER BY k +---- +{"a": [1, 2, 3], "b": [4, 5, 6]} +{"a": [4, 5]} + +query T +SELECT j from json_backfill@forward_expr where j->'a' = '{"a": "b"}' ORDER BY k +---- +{"a": {"a": "b"}, "d": {"e": [1, 2, 3]}} + +query T +SELECT j from json_backfill@forward_expr where j->'a' > '{"a": "b"}' ORDER BY k +---- + +query T +SELECT j from json_backfill@forward_expr where j->'a' < '{"a": "b"}' ORDER BY k +---- +{"a": [1, 2, 3], "b": [4, 5, 6]} +{"a": [4, 5]} + +query T +SELECT j from json_backfill@forward where j = '[1, 2, 3]' ORDER BY k +---- +[1, 2, 3] + +query T +SELECT j from json_backfill@forward where j = '{"a": [4, 5]}' OR j = '[1, 2, 3]' ORDER BY k +---- +[1, 2, 3] +{"a": [4, 5]} + + # Unique expression indexes. statement ok @@ -1009,3 +1162,17 @@ BEGIN; CREATE INDEX t_a_times_three_idx ON t ((a * 3)); SELECT crdb_internal.force_retry('10ms'); COMMIT + +# JSON Expression Unique Indexes + +statement ok +CREATE TABLE uniq_json ( + k INT PRIMARY KEY, + j JSON, + UNIQUE INDEX ((j->'a')) +) + +statement error duplicate key value violates unique constraint "uniq_json_expr_key" +INSERT INTO uniq_json VALUES + (1, '{"a": "b"}'), + (2, '{"a": "b"}') diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index b/pkg/sql/logictest/testdata/logic_test/inverted_index index 81b3a410b980..67655c3299e9 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index @@ -73,14 +73,6 @@ CREATE INVERTED INDEX ON c (foo DESC) statement ok CREATE INVERTED INDEX ON c("qUuX") -statement error column foo of type jsonb is only allowed as the last column in an inverted index\nHINT: see the documentation for more information about inverted indexes -CREATE TABLE d ( - id INT PRIMARY KEY, - foo JSONB, - bar JSONB, - INVERTED INDEX (foo, bar) -) - statement error column foo of type int is not allowed as the last column in an inverted index\nHINT: see the documentation for more information about inverted indexes CREATE TABLE d ( id INT PRIMARY KEY, @@ -1834,8 +1826,6 @@ SELECT j FROM cb2@i WHERE j <@ '[[1], [2]]' ORDER BY k [[2]] [[1], [2]] -statement error pq: index element j->'some_key' of type jsonb is not indexable in a non-inverted index\nHINT: you may want to create an inverted index instead. See the documentation for inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html -CREATE TABLE t2 (j JSON, INDEX ((j->'some_key'))) statement ok CREATE TABLE table_desc_inverted_index ( diff --git a/pkg/sql/logictest/testdata/logic_test/inverted_index_multi_column b/pkg/sql/logictest/testdata/logic_test/inverted_index_multi_column index ae0efdeee95d..b398ca90b69d 100644 --- a/pkg/sql/logictest/testdata/logic_test/inverted_index_multi_column +++ b/pkg/sql/logictest/testdata/logic_test/inverted_index_multi_column @@ -5,12 +5,9 @@ statement error column b of type int is not allowed as the last column in an inv CREATE TABLE m_err (k INT PRIMARY KEY, a INT, b INT, geom GEOMETRY, INVERTED INDEX (a, b)) # Err if a non-last column is not a non-invertable type. -statement error column j of type jsonb is only allowed as the last column in an inverted index\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html -CREATE TABLE m_err (k INT PRIMARY KEY, j JSON, geom GEOMETRY, INVERTED INDEX (j, geom)) +statement error column geom1 of type geometry is only allowed as the last column in an inverted index\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html +CREATE TABLE m_err (k INT PRIMARY KEY, geom1 GEOMETRY , geom GEOMETRY, INVERTED INDEX (geom1, geom)) -# Err if a non-last column is not a non-invertable type. -statement error column j of type jsonb is only allowed as the last column in an inverted index\nHINT: see the documentation for more information about inverted indexes: https://www.cockroachlabs.com/docs/.*/inverted-indexes.html -CREATE TABLE m_err (k INT PRIMARY KEY, a INT, j JSON, geom GEOMETRY, INVERTED INDEX (a, j, geom)) statement ok CREATE TABLE l (k INT PRIMARY KEY, a INT, j JSON, INVERTED INDEX (a, j ASC)) @@ -323,3 +320,91 @@ query ITT SELECT * FROM backfill_d@idx WHERE i IN (1, 2, 3, 4) AND s = 'bar' AND j @> '7'::JSON ---- 2 bar [7, 0, 7] + +# Test selecting, inserting, updating, and deleting on a table with a +# multi-column JSON inverted index. +statement ok +CREATE TABLE d ( + id INT PRIMARY KEY, + foo JSONB, + bar JSONB, + INVERTED INDEX idx (foo, bar) +); + +# Testing inserting +statement ok +INSERT into d VALUES + (1, '"foo"', '[7]'), + (2, '"bar"', '[7, 0, 7]'), + (3, '"baz"', '{"a": "b"}'), + (4, '"foo"', '[7, 8, 9, 10]'), + (5, '"foo"', '[[0], [7, 8, 9, 10]]') + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->0 = '7' ORDER BY id +---- +1 "foo" [7] +4 "foo" [7, 8, 9, 10] + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->1 = '0' ORDER BY id +---- + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->1 = '8' ORDER BY id +---- +4 "foo" [7, 8, 9, 10] + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->0 @> '[0]' ORDER BY id +---- +5 "foo" [[0], [7, 8, 9, 10]] + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->0 <@ '[0]' ORDER BY id +---- +5 "foo" [[0], [7, 8, 9, 10]] + +# Testing deleting +statement ok +DELETE FROM d WHERE id = 5 + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->0 <@ '[0]' ORDER BY id +---- + +# Testing updating +statement ok +UPDATE d SET foo = '"updated"' WHERE id = 2 + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"updated"' AND bar->0 @> '7' ORDER BY id +---- +2 "updated" [7, 0, 7] + +# Backfill this multi-column inverted index. + +statement ok +DROP INDEX d@idx + +statement ok +INSERT into d VALUES + (6, '"backfilling"', '[[0], [1], 2, 3]'), + (7, '"q"', '[[0], [1], [2], []]'), + (8, '"backfilling"', '[[0], [1], [2], []]') + + +statement ok +CREATE INVERTED INDEX idx on d (foo, bar) + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"backfilling"' AND bar->2 @> '2' ORDER BY id +---- +6 "backfilling" [[0], [1], 2, 3] +8 "backfilling" [[0], [1], [2], []] + +query ITT +SELECT id, foo, bar FROM d@idx where foo = '"foo"' AND bar->0 = '7' ORDER BY id +---- +1 "foo" [7] +4 "foo" [7, 8, 9, 10] diff --git a/pkg/sql/logictest/testdata/logic_test/json b/pkg/sql/logictest/testdata/logic_test/json index da291dc6de93..684c6a6c0119 100644 --- a/pkg/sql/logictest/testdata/logic_test/json +++ b/pkg/sql/logictest/testdata/logic_test/json @@ -179,10 +179,10 @@ SELECT bar FROM foo WHERE bar->'a' = '"b"'::JSON ---- {"a": "b"} -statement error pgcode 0A000 can't order by column type jsonb.*\nHINT.*\n.*35706 +statement ok SELECT bar FROM foo ORDER BY bar -statement error pgcode 0A000 column k is of type jsonb and thus is not indexable +statement ok CREATE TABLE pk (k JSON PRIMARY KEY) query T rowsort diff --git a/pkg/sql/logictest/testdata/logic_test/json_index b/pkg/sql/logictest/testdata/logic_test/json_index new file mode 100644 index 000000000000..11d77fab8f0a --- /dev/null +++ b/pkg/sql/logictest/testdata/logic_test/json_index @@ -0,0 +1,509 @@ +# Add JSON columns as primary index. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x JSONB PRIMARY KEY) + +# Adding JSON primitive types. +statement ok +INSERT INTO t VALUES + ('"a"'::JSONB), + ('"b"'::JSONB), + ('"aa"'::JSONB), + ('"abcdefghi"'::JSONB), + ('100'::JSONB), + ('1'::JSONB), + ('{"a": "b"}'), + ('[]') + +# Ensuring the ordering is intact. +query T +SELECT x FROM t ORDER BY x +---- +"a" +"aa" +"abcdefghi" +"b" +1 +100 +[] +{"a": "b"} + + +# Test that unique indexes reject bad inserts. +statement error pq: duplicate key value violates unique constraint "t_pkey" +INSERT INTO t VALUES + ('"a"'::JSONB) + +query T +SELECT x FROM t@t_pkey ORDER BY x +---- +"a" +"aa" +"abcdefghi" +"b" +1 +100 +[] +{"a": "b"} + +# Use the index for point lookups. +query T +SELECT x FROM t@t_pkey WHERE x = '"a"' +---- +"a" + +query T +SELECT x FROM t@t_pkey WHERE x = '"aa"' +---- +"aa" + +query T +SELECT x FROM t@t_pkey WHERE x = '100' +---- +100 + +query T +SELECT x FROM t@t_pkey WHERE x = '12' +---- + +query T +SELECT x FROM t@t_pkey WHERE x = '{"a": "b"}' +---- +{"a": "b"} + +# Using the index for bounded scans. +query T +SELECT x FROM t@t_pkey WHERE x > '1' ORDER BY x +---- +100 +[] +{"a": "b"} + +query T +SELECT x FROM t@t_pkey WHERE x < '1' ORDER BY x +---- +"a" +"aa" +"abcdefghi" +"b" + + +query T +SELECT x FROM t@t_pkey WHERE x > '1' OR x < '1' ORDER BY x +---- +"a" +"aa" +"abcdefghi" +"b" +100 +[] +{"a": "b"} + +query T +SELECT x FROM t@t_pkey WHERE x > '1' AND x < '1' ORDER BY x +---- + +# Trying to order by in a descending fashion. +query T +SELECT x FROM t@t_pkey WHERE x > '1' OR x < '1' ORDER BY x DESC +---- +{"a": "b"} +[] +100 +"b" +"abcdefghi" +"aa" +"a" + +# Adding more primitive JSON values. +statement ok +INSERT INTO t VALUES + ('true'), + ('false'), + ('null'), + ('"aaaaaaayouube"'), + ('"testing spaces"'), + ('"Testing Punctuation?!."') + +query T +SELECT x FROM t@t_pkey ORDER BY x +---- +null +"Testing Punctuation?!." +"a" +"aa" +"aaaaaaayouube" +"abcdefghi" +"b" +"testing spaces" +1 +100 +false +true +[] +{"a": "b"} + +query T +SELECT x FROM t@t_pkey WHERE x > 'true' ORDER BY x +---- +[] +{"a": "b"} + +query T +SELECT x FROM t@t_pkey WHERE x < 'false' ORDER BY x +---- +null +"Testing Punctuation?!." +"a" +"aa" +"aaaaaaayouube" +"abcdefghi" +"b" +"testing spaces" +1 +100 + +# Testing JSON Arrays. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x JSONB PRIMARY KEY) + +statement ok +INSERT INTO t VALUES + ('[]'), + ('[null]'), + ('[1]'), + ('[null, null, false, true, "a", 1]'), + ('[{"a":"b"}]'), + ('[{"a":"b", "c": [1, 2, 3, 4, 5]}]') + +query T +SELECT x FROM t@t_pkey ORDER BY x +---- +[] +[null] +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[null, null, false, true, "a", 1] + +query T +SELECT x FROM t@t_pkey where x = '[1]' ORDER BY x +---- +[1] + +query T +SELECT x FROM t@t_pkey where x >= '[1]' ORDER BY x +---- +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[null, null, false, true, "a", 1] + +query T +SELECT x FROM t@t_pkey where x <= '[1]' ORDER BY x +---- +[] +[null] +[1] + +query T +SELECT x FROM t@t_pkey where x >= '[1]' AND x <= '{"a": "b"}' ORDER BY x +---- +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[null, null, false, true, "a", 1] + +# Nested JSON Arrays. +statement ok +INSERT INTO t VALUES + ('[1, [2, 3]]'), + ('[1, [2, [3, [4]]]]') + +query T +SELECT x FROM t@t_pkey WHERE x = '[1, [2, 3]]' ORDER BY x +---- +[1, [2, 3]] + +query T +SELECT x FROM t@t_pkey WHERE x = '[1, [2, [3, [4]]]]' ORDER BY x +---- +[1, [2, [3, [4]]]] + +# Testing the ordering again. +query T +SELECT x FROM t@t_pkey ORDER BY x +---- +[] +[null] +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[1, [2, 3]] +[1, [2, [3, [4]]]] +[null, null, false, true, "a", 1] + +# Testing the scans with nested arrays. +query T +SELECT x FROM t@t_pkey WHERE x < '[1, [2, [3, [4]]]]' ORDER BY X +---- +[] +[null] +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[1, [2, 3]] + +# Testing JSON Objects. +statement ok +DROP TABLE IF EXISTS t; +CREATE TABLE t (x JSONB PRIMARY KEY) + +statement ok +INSERT INTO t VALUES + ('{}'), + ('{"a": 1}'), + ('{"a": "sh", "b": 1}'), + ('{"a": ["1"]}'), + ('{"a": [{"b":"c"}]}'), + ('{"c": true, "d": null, "newkey": "newvalue"}'), + ('{"e": {"f": {"g": 1}}, "f": [1, 2, 3]}'), + ('{ "aa": 1, "c": 1}'), + ('{"b": 1, "d": 1}') + +# Testing the ordering again. +query T +SELECT x FROM t@t_pkey ORDER BY x +---- +{} +{"a": 1} +{"a": ["1"]} +{"a": [{"b": "c"}]} +{"a": "sh", "b": 1} +{"aa": 1, "c": 1} +{"b": 1, "d": 1} +{"e": {"f": {"g": 1}}, "f": [1, 2, 3]} +{"c": true, "d": null, "newkey": "newvalue"} + +query T +SELECT x FROM t@t_pkey WHERE x >= '{}' ORDER BY x; +---- +{} +{"a": 1} +{"a": ["1"]} +{"a": [{"b": "c"}]} +{"a": "sh", "b": 1} +{"aa": 1, "c": 1} +{"b": 1, "d": 1} +{"e": {"f": {"g": 1}}, "f": [1, 2, 3]} +{"c": true, "d": null, "newkey": "newvalue"} + +query T +SELECT x FROM t@t_pkey WHERE x < '{}' ORDER BY x; +---- + +query T +SELECT x FROM t@t_pkey WHERE x = '{"e": {"f": {"g": 1}}, "f": [1, 2, 3]}' ORDER BY x; +---- +{"e": {"f": {"g": 1}}, "f": [1, 2, 3]} + +# Ensure that we can order by JSONS without any indexes. +statement ok +DROP TABLE t; +CREATE TABLE t (x JSONB); +INSERT INTO t VALUES + ('{}'), + ('[]'), + ('true'), + ('false'), + ('null'), + ('"crdb"'), + ('[1, 2, 3]'), + ('1'), + ('{"a": "b", "c": "d"}'), + (NULL) + + +query T +SELECT x FROM t@t_pkey ORDER BY x +---- +NULL +null +"crdb" +1 +false +true +[] +[1, 2, 3] +{} +{"a": "b", "c": "d"} + +query T +SELECT x FROM t@t_pkey ORDER BY x DESC +---- +{"a": "b", "c": "d"} +{} +[1, 2, 3] +[] +true +false +1 +"crdb" +null +NULL + +# Test to show JSON Null is different from NULL. +query T +SELECT x FROM t@t_pkey WHERE x IS NOT NULL ORDER BY x +---- +null +"crdb" +1 +false +true +[] +[1, 2, 3] +{} +{"a": "b", "c": "d"} + + +# Test JSONS of composite types without an index. +statement ok +CREATE TABLE tjson(x JSONB); +INSERT INTO tjson VALUES + ('1.250'), + ('1.0'), + ('1.000'), + ('1.111111'), + ('10'), + ('[1, 2.0, 1.21, 1.00]'), + ('{"a": [1, 1.1], "b": 1.0000, "c": 10.0}') + +# Ensure these are round tripped correctly. +query T +SELECT x FROM tjson ORDER BY x, rowid +---- +1.0 +1.000 +1.111111 +1.250 +10 +[1, 2.0, 1.21, 1.00] +{"a": [1, 1.1], "b": 1.0000, "c": 10.0} + +# Test JSONS of composite types with an index. +statement ok +CREATE TABLE y(x JSONB PRIMARY KEY); +INSERT INTO y VALUES + ('1.00'), + ('1.250'), + ('10'), + ('[1, 2.0, 1.21, 1.00]'), + ('{"a": [1, 1.1], "b": 1.0000, "c": 10.0}') + +# Ensure these are round tripped correctly. +query T +SELECT x FROM y ORDER BY x +---- +1.00 +1.250 +10 +[1, 2.0, 1.21, 1.00] +{"a": [1, 1.1], "b": 1.0000, "c": 10.0} + +# Test that unique indexes reject bad inserts for composite types. +statement error pq: duplicate key value violates unique constraint "y_pkey" +INSERT INTO y VALUES + ('1.0000') + +# Testing with the descending designation. +statement ok +DROP TABLE t; +CREATE TABLE t (x JSONB) + +statement ok +CREATE INDEX i ON t(x DESC) + +statement ok +INSERT INTO t VALUES + ('{}'), + ('[]'), + ('true'), + ('false'), + ('null'), + ('"crdb"'), + ('[1, 2, 3]'), + ('1'), + ('{"a": "b", "c": "d"}'), + ('[null]'), + ('[1]'), + ('[null, null, false, true, "a", 1]'), + ('[{"a":"b"}]'), + ('[{"a":"b", "c": [1, 2, 3, 4, 5]}]') + +query T +SELECT x FROM t@i ORDER BY x; +---- +null +"crdb" +1 +false +true +[] +[null] +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[1, 2, 3] +[null, null, false, true, "a", 1] +{} +{"a": "b", "c": "d"} + + +# Testing different joins on indexed JSONS. +statement ok +DROP TABLE IF EXISTS t1, t2 CASCADE; +CREATE TABLE t1 (x JSONB PRIMARY KEY); +CREATE TABLE t2 (x JSONB PRIMARY KEY); +INSERT INTO t1 VALUES + ('[1, [2, 3]]'), + ('[1, [2, [3, [4]]]]'); +INSERT INTO t2 VALUES + ('[1, [2, 3]]'), + ('{}'), + ('[1, [2, 4]]') + +query T rowsort +SELECT t1.x FROM t1 INNER HASH JOIN t2 ON t1.x = t2.x +---- +[1, [2, 3]] + +query T rowsort +SELECT t1.x FROM t1 INNER MERGE JOIN t2 ON t1.x = t2.x +---- +[1, [2, 3]] + +query T rowsort +SELECT t1.x FROM t1 INNER LOOKUP JOIN t2 ON t1.x = t2.x +---- +[1, [2, 3]] + +# Test that we can group by JSONS. +query T +SELECT x FROM t GROUP BY x ORDER BY x; +---- +null +"crdb" +1 +false +true +[] +[null] +[1] +[{"a": "b"}] +[{"a": "b", "c": [1, 2, 3, 4, 5]}] +[1, 2, 3] +[null, null, false, true, "a", 1] +{} +{"a": "b", "c": "d"} diff --git a/pkg/sql/logictest/testdata/logic_test/pg_catalog b/pkg/sql/logictest/testdata/logic_test/pg_catalog index 9516136f8819..152e33b157f7 100644 --- a/pkg/sql/logictest/testdata/logic_test/pg_catalog +++ b/pkg/sql/logictest/testdata/logic_test/pg_catalog @@ -3375,29 +3375,30 @@ JOIN pg_operator c ON c.oprname = '>' AND b.proargtypes[0] = c.oprleft AND b.pro WHERE (b.proname = 'max' OR b.proname = 'bool_or') AND c.oid = a.aggsortop; ---- oid oprname aggsortop -1224236426 > 1224236426 -3636536082 > 3636536082 3636536082 > 3636536082 -2948286002 > 2948286002 -3234851498 > 3234851498 -2318307066 > 2318307066 1737252658 > 1737252658 1737252658 > 1737252658 -1383827510 > 1383827510 +1224236426 > 1224236426 +3636536082 > 3636536082 264553706 > 264553706 -2105536758 > 2105536758 -1928531314 > 1928531314 -3421685890 > 3421685890 883535762 > 883535762 +1383827510 > 1383827510 +2318307066 > 2318307066 +3234851498 > 3234851498 +256681770 > 256681770 530358714 > 530358714 -3802002898 > 3802002898 +2105536758 > 2105536758 +1928531314 > 1928531314 1737252658 > 1737252658 1737252658 > 1737252658 -1064453514 > 1064453514 -1778355034 > 1778355034 -256681770 > 256681770 +2948286002 > 2948286002 2139039570 > 2139039570 +3802002898 > 3802002898 3457382662 > 3457382662 +3421685890 > 3421685890 +1064453514 > 1064453514 +1778355034 > 1778355034 +3944320082 > 3944320082 1385359122 > 1385359122 # Check whether correct operator's oid is set for min, bool_and and every. @@ -3408,30 +3409,31 @@ JOIN pg_operator c ON c.oprname = '<' AND b.proargtypes[0] = c.oprleft AND b.pro WHERE (b.proname = 'min' OR b.proname = 'bool_and' OR b.proname = 'every') AND c.oid = a.aggsortop; ---- oid oprname aggsortop -3859576864 < 3859576864 -2134593616 < 2134593616 2134593616 < 2134593616 2134593616 < 2134593616 -1446343536 < 1446343536 -2457977576 < 2457977576 -2790955336 < 2790955336 235310192 < 235310192 235310192 < 235310192 -2011297100 < 2011297100 +3859576864 < 3859576864 +2134593616 < 2134593616 3269496816 < 3269496816 -2104629996 < 2104629996 -3942776496 < 3942776496 -4132205728 < 4132205728 3676560592 < 3676560592 +2011297100 < 2011297100 +2790955336 < 2790955336 +2457977576 < 2457977576 +426663592 < 426663592 1494969736 < 1494969736 -3842027408 < 3842027408 +2104629996 < 2104629996 +3942776496 < 3942776496 235310192 < 235310192 235310192 < 235310192 -2300570720 < 2300570720 -3675947880 < 3675947880 -426663592 < 426663592 +1446343536 < 1446343536 2699108304 < 2699108304 +3842027408 < 3842027408 2897050084 < 2897050084 +4132205728 < 4132205728 +2300570720 < 2300570720 +3675947880 < 3675947880 +3575809104 < 3575809104 1579888144 < 1579888144 subtest collated_string_type diff --git a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go index 67b20ec7f1c2..e669955b4ded 100644 --- a/pkg/sql/logictest/tests/fakedist-disk/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-disk/generated_test.go @@ -1059,6 +1059,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go index d897cdf6cc52..c73e6b466458 100644 --- a/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist-vec-off/generated_test.go @@ -1059,6 +1059,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/fakedist/generated_test.go b/pkg/sql/logictest/tests/fakedist/generated_test.go index b5ab2ecb7404..30a9826e6359 100644 --- a/pkg/sql/logictest/tests/fakedist/generated_test.go +++ b/pkg/sql/logictest/tests/fakedist/generated_test.go @@ -1066,6 +1066,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go index 20a4224814e8..978fea60dd22 100644 --- a/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/sql/logictest/tests/local-legacy-schema-changer/generated_test.go @@ -1038,6 +1038,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go b/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go index 6a996d2f1d58..88afcc2436b6 100644 --- a/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go +++ b/pkg/sql/logictest/tests/local-mixed-22.2-23.1/generated_test.go @@ -1052,6 +1052,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local-vec-off/generated_test.go b/pkg/sql/logictest/tests/local-vec-off/generated_test.go index 860b4ca1cf5f..f698fd7082b8 100644 --- a/pkg/sql/logictest/tests/local-vec-off/generated_test.go +++ b/pkg/sql/logictest/tests/local-vec-off/generated_test.go @@ -1066,6 +1066,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/logictest/tests/local/generated_test.go b/pkg/sql/logictest/tests/local/generated_test.go index d9220d202593..1d3c3dfef086 100644 --- a/pkg/sql/logictest/tests/local/generated_test.go +++ b/pkg/sql/logictest/tests/local/generated_test.go @@ -1157,6 +1157,13 @@ func TestLogic_json_builtins( runLogicTest(t, "json_builtins") } +func TestLogic_json_index( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runLogicTest(t, "json_index") +} + func TestLogic_kv_builtin_functions( t *testing.T, ) { diff --git a/pkg/sql/opt/exec/execbuilder/testdata/json b/pkg/sql/opt/exec/execbuilder/testdata/json new file mode 100644 index 000000000000..a536ae48b2fc --- /dev/null +++ b/pkg/sql/opt/exec/execbuilder/testdata/json @@ -0,0 +1,401 @@ +# LogicTest: local + +statement ok +CREATE TABLE t (x JSONB PRIMARY KEY) + +# Testing range scans on forward indexes. + +query T +EXPLAIN SELECT x FROM t WHERE x = 'null'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'null' - /'null'] + +query T +EXPLAIN SELECT x FROM t WHERE x = '"a"'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'"a"' - /'"a"'] + +query T +EXPLAIN SELECT x FROM t WHERE x = '1'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'1' - /'1'] + +query T +EXPLAIN SELECT x FROM t WHERE x = 'false'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'false' - /'false'] + +query T +EXPLAIN SELECT x FROM t WHERE x = 'true'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'true' - /'true'] + +query T +EXPLAIN SELECT x from t WHERE x = '[1, 2, 3]'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'[1, 2, 3]' - /'[1, 2, 3]'] + + +query T +EXPLAIN SELECT x from t WHERE x = '{"a": [1, 2, 3], "b": [1, 2]}'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'{"a": [1, 2, 3], "b": [1, 2]}' - /'{"a": [1, 2, 3], "b": [1, 2]}'] + + +query T +EXPLAIN SELECT x FROM t WHERE x < '1'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [ - /'1') + +query T +EXPLAIN SELECT x FROM t WHERE x < '"ABCD"'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [ - /'"ABCD"') + +query T +EXPLAIN SELECT x FROM t WHERE x < '[1, 2, 3]'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [ - /'[1, 2, 3]') + +query T +EXPLAIN SELECT x FROM t WHERE x <= '[]'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [ - /'[]'] + +query T +EXPLAIN SELECT x FROM t WHERE x > '{"a": "b"}'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: (/'{"a": "b"}' - ] + +query T +EXPLAIN SELECT x FROM t WHERE x > '{"a": "b"}'::JSONB +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: (/'{"a": "b"}' - ] + +query T +EXPLAIN SELECT x FROM t WHERE x > '{"a": "b"}'::JSONB AND x < '[1, 2, 3]'::JSONB ORDER BY x +---- +distribution: local +vectorized: true +· +• norows + +query T +EXPLAIN SELECT x FROM t WHERE x <= '{"a": "b"}'::JSONB AND x >= '[1, 2, 3]'::JSONB ORDER BY x +---- +distribution: local +vectorized: true +· +• scan + missing stats + table: t@t_pkey + spans: [/'[1, 2, 3]' - /'{"a": "b"}'] + +query T +EXPLAIN SELECT x FROM t WHERE x <= '"a"'::JSONB OR x >= 'null'::JSONB ORDER BY x; +---- +distribution: local +vectorized: true +· +• filter +│ filter: (x <= '"a"') OR (x >= 'null') +│ +└── • scan + missing stats + table: t@t_pkey + spans: FULL SCAN + +# Multicolumn index, including JSONB + +statement ok +CREATE TABLE s (x INT, y JSONB, z INT, INDEX i (x, y, z)) + + +query T +EXPLAIN SELECT x, y, z FROM s WHERE x = 2 AND y < '[1, 2, 3]'::JSONB AND z = 100 ORDER BY y +---- +distribution: local +vectorized: true +· +• filter +│ filter: z = 100 +│ +└── • scan + missing stats + table: s@i + spans: (/2/NULL - /2/'[1, 2, 3]') + +query T +EXPLAIN SELECT x, y, z FROM s WHERE y >= '"a"'::JSONB ORDER BY y +---- +distribution: local +vectorized: true +· +• sort +│ order: +y +│ +└── • filter + │ filter: y >= '"a"' + │ + └── • scan + missing stats + table: s@s_pkey + spans: FULL SCAN + +# Ensuring that the presence of composite values results in +# encoding in the valueside as well for a given K/V pair. +statement ok +CREATE TABLE composite (k INT PRIMARY KEY, j JSONB, FAMILY (k, j)); +CREATE INDEX on composite (j); + +query T kvtrace +INSERT INTO composite VALUES (1, '1.00'::JSONB), (2, '1'::JSONB), (3, '2'::JSONB), + (4, '3.0'::JSONB), (5, '"a"'::JSONB) +---- +CPut /Table/108/1/1/0 -> /TUPLE/ +InitPut /Table/108/2/"G*\x02\x00\x00\x89\x88" -> /BYTES/0x2f0f0c200000002000000403348964 +CPut /Table/108/1/2/0 -> /TUPLE/ +InitPut /Table/108/2/"G*\x02\x00\x00\x8a\x88" -> /BYTES/ +CPut /Table/108/1/3/0 -> /TUPLE/ +InitPut /Table/108/2/"G*\x04\x00\x00\x8b\x88" -> /BYTES/ +CPut /Table/108/1/4/0 -> /TUPLE/ +InitPut /Table/108/2/"G*\x06\x00\x00\x8c\x88" -> /BYTES/0x2f0f0c20000000200000040334891e +CPut /Table/108/1/5/0 -> /TUPLE/ +InitPut /Table/108/2/"F\x12a\x00\x01\x00\x8d\x88" -> /BYTES/ + +query T kvtrace +SELECT j FROM composite where j = '1.00'::JSONB +---- +Scan /Table/108/2/"G*\x02\x00\x0{0"-1"} + +query T +SELECT j FROM composite ORDER BY j; +---- +"a" +1.00 +1 +2 +3.0 + +# JSON Expression Indexes. +statement ok +CREATE TABLE d (a INT, j JSON); +CREATE INDEX json_expr_index on d ((j->'a')) + +statement ok +INSERT INTO d VALUES + (1, '{"a": "hello"}'), + (2, '{"a": "b"}'), + (3, '{"a": "bye"}'), + (4, '{"a": "json"}') + + +query T +EXPLAIN SELECT j from d where j->'a' = '"b"' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • index join + │ table: d@d_pkey + │ + └── • scan + missing stats + table: d@json_expr_index + spans: [/'"b"' - /'"b"'] + + +query T +EXPLAIN SELECT j from d where j->'a' = '"b"' OR j->'a' = '"bye"' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • index join + │ table: d@d_pkey + │ + └── • scan + missing stats + table: d@json_expr_index + spans: [/'"b"' - /'"b"'] [/'"bye"' - /'"bye"'] + +# The expression index is not used for this query. +query T +EXPLAIN SELECT j from d where j > '{"a":"b"}' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • filter + │ filter: j > '{"a": "b"}' + │ + └── • scan + missing stats + table: d@d_pkey + spans: FULL SCAN + +query T +EXPLAIN SELECT j from d where j->'a' = '"a"' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • index join + │ table: d@d_pkey + │ + └── • scan + missing stats + table: d@json_expr_index + spans: [/'"a"' - /'"a"'] + +query T +EXPLAIN SELECT j from d where j->'a' = '"hello"' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • index join + │ table: d@d_pkey + │ + └── • scan + missing stats + table: d@json_expr_index + spans: [/'"hello"' - /'"hello"'] + +# Inverted Indexes where JSON is also forward indexed. +statement ok +INSERT INTO d VALUES + (5, '{"a": "forward", "json": "inverted"}'), + (6, '{"a": "c", "b": "d"}') + + +statement ok +CREATE INVERTED INDEX json_inv on d ((j->'a'), j) + + +query T +EXPLAIN SELECT j from d where j->'a' = '"forward"' AND j->'json' = '"inverted"' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • index join + │ table: d@d_pkey + │ + └── • scan + missing stats + table: d@json_inv + spans: 1 span + +query T +EXPLAIN SELECT j from d where j->'a' = '"c"' AND j->'json' = '"d"' ORDER BY a +---- +distribution: local +vectorized: true +· +• sort +│ order: +a +│ +└── • index join + │ table: d@d_pkey + │ + └── • scan + missing stats + table: d@json_inv + spans: 1 span diff --git a/pkg/sql/opt/exec/execbuilder/testdata/stats b/pkg/sql/opt/exec/execbuilder/testdata/stats index 8a924bb8d80d..3dddc5bee8c9 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/stats +++ b/pkg/sql/opt/exec/execbuilder/testdata/stats @@ -355,6 +355,8 @@ limit │ ├── columns: j:1 │ ├── immutable │ ├── stats: [rows=1, distinct(1)=1, null(1)=1] + │ │ histogram(1)= 0 1 + │ │ <--- NULL │ ├── cost: 23.775 │ ├── fd: ()-->(1) │ ├── limit hint: 1.00 @@ -362,6 +364,8 @@ limit │ ├── scan tj │ │ ├── columns: j:1 │ │ ├── stats: [rows=5, distinct(1)=4, null(1)=1] + │ │ │ histogram(1)= 0 1 0 1 2 1 + │ │ │ <--- NULL --- '1' --- '{}' │ │ ├── cost: 23.695 │ │ ├── limit hint: 5.00 │ │ ├── distribution: test diff --git a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go index ced9ea9bef2b..83fb197ec1cc 100644 --- a/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go +++ b/pkg/sql/opt/exec/execbuilder/tests/local/generated_test.go @@ -343,6 +343,13 @@ func TestExecBuild_join_order( runExecBuildLogicTest(t, "join_order") } +func TestExecBuild_json( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runExecBuildLogicTest(t, "json") +} + func TestExecBuild_limit( t *testing.T, ) { diff --git a/pkg/sql/opt/indexrec/candidate.go b/pkg/sql/opt/indexrec/candidate.go index d94b5f265476..327fbc58dfcb 100644 --- a/pkg/sql/opt/indexrec/candidate.go +++ b/pkg/sql/opt/indexrec/candidate.go @@ -16,6 +16,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/opt" "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" + "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/intsets" ) @@ -223,7 +224,10 @@ func (ics indexCandidateSet) addOrderingIndex(ordering opt.Ordering) { func (ics *indexCandidateSet) addJoinIndexes(expr memo.FiltersExpr) { outerCols := expr.OuterCols().ToList() for _, col := range outerCols { - if colinfo.ColumnTypeIsIndexable(ics.md.ColumnMeta(col).Type) { + // TODO (Shivam): Index recommendations should not only allow JSON columns + // to be part of inverted indexes since they are also forward indexable. + if colinfo.ColumnTypeIsIndexable(ics.md.ColumnMeta(col).Type) && + ics.md.ColumnMeta(col).Type.Family() != types.JsonFamily { ics.addSingleColumnIndex(col, false /* desc */, ics.joinCandidates) } else { ics.addSingleColumnIndex(col, false /* desc */, ics.invertedCandidates) @@ -309,7 +313,10 @@ func (ics *indexCandidateSet) addVariableExprIndex( switch expr := expr.(type) { case *memo.VariableExpr: col := expr.Col - if colinfo.ColumnTypeIsIndexable(ics.md.ColumnMeta(col).Type) { + // TODO (Shivam): Index recommendations should not only allow JSON columns + // to be part of inverted indexes since they are also forward indexable. + if colinfo.ColumnTypeIsIndexable(ics.md.ColumnMeta(col).Type) && + ics.md.ColumnMeta(col).Type.Family() != types.JsonFamily { ics.addSingleColumnIndex(col, false /* desc */, indexCandidates) } else { ics.addSingleColumnIndex(col, false /* desc */, ics.invertedCandidates) @@ -339,7 +346,10 @@ func (ics *indexCandidateSet) addMultiColumnIndex( index := make([]cat.IndexColumn, 0, len(tableToCols[currTable])) for _, colSlice := range tableToCols[currTable] { indexCol := colSlice[0] - if colinfo.ColumnTypeIsIndexable(indexCol.Column.DatumType()) { + // TODO (Shivam): Index recommendations should not only allow JSON columns + // to be part of inverted indexes since they are also forward indexable. + if indexCol.Column.DatumType().Family() != types.JsonFamily && + colinfo.ColumnTypeIsIndexable(indexCol.Column.DatumType()) { index = append(index, indexCol) } } diff --git a/pkg/sql/opt/indexrec/hypothetical_table.go b/pkg/sql/opt/indexrec/hypothetical_table.go index 0037badc452c..4722d6cd374e 100644 --- a/pkg/sql/opt/indexrec/hypothetical_table.go +++ b/pkg/sql/opt/indexrec/hypothetical_table.go @@ -42,7 +42,10 @@ func BuildOptAndHypTableMaps( for _, indexCols := range indexes { indexOrd := hypTable.Table.IndexCount() + len(hypIndexes) lastKeyCol := indexCols[len(indexCols)-1] - inverted := !colinfo.ColumnTypeIsIndexable(lastKeyCol.DatumType()) + // TODO (Shivam): Index recommendations should not only allow JSON columns + // to be part of inverted indexes since they are also forward indexable. + inverted := !(colinfo.ColumnTypeIsIndexable(lastKeyCol.DatumType()) && + lastKeyCol.DatumType().Family() != types.JsonFamily) if inverted { invertedCol := hypTable.addInvertedCol(lastKeyCol.Column) indexCols[len(indexCols)-1] = cat.IndexColumn{Column: invertedCol} diff --git a/pkg/sql/opt/norm/testdata/rules/inline b/pkg/sql/opt/norm/testdata/rules/inline index eac039b21cab..084b6efd5aa6 100644 --- a/pkg/sql/opt/norm/testdata/rules/inline +++ b/pkg/sql/opt/norm/testdata/rules/inline @@ -740,7 +740,7 @@ project ├── columns: k:1!null i:2 s:3 j:4 v:5 x:6 ├── immutable ├── key: (1) - ├── fd: (1)-->(2-4), (3)-->(5), (4)-->(6) + ├── fd: (1)-->(2-4,6), (3)-->(5) ├── select │ ├── columns: k:1!null i:2 s:3 j:4 │ ├── immutable @@ -769,7 +769,7 @@ project ├── columns: k:1!null i:2 s:3 j:4 v:5 x:6 ├── immutable ├── key: (1) - ├── fd: (1)-->(2-4), (3)-->(5), (4)-->(6) + ├── fd: (1)-->(2-4,6), (3)-->(5) ├── select │ ├── columns: k:1!null i:2 s:3 j:4 │ ├── immutable @@ -835,12 +835,12 @@ semi-join (hash) ├── columns: k:1!null i:2 s:3 j:4 v:5 x:6 ├── immutable ├── key: (1) - ├── fd: (1)-->(2-4), (3)-->(5), (4)-->(6) + ├── fd: (1)-->(2-4,6), (3)-->(5) ├── project │ ├── columns: v:5 x:6 k:1!null i:2 s:3 j:4 │ ├── immutable │ ├── key: (1) - │ ├── fd: (1)-->(2-4), (3)-->(5), (4)-->(6) + │ ├── fd: (1)-->(2-4,6), (3)-->(5) │ ├── scan virt │ │ ├── columns: k:1!null i:2 s:3 j:4 │ │ ├── computed column expressions diff --git a/pkg/sql/opt/norm/testdata/rules/project b/pkg/sql/opt/norm/testdata/rules/project index f0bd44b41e09..ce57a23bcb20 100644 --- a/pkg/sql/opt/norm/testdata/rules/project +++ b/pkg/sql/opt/norm/testdata/rules/project @@ -1345,7 +1345,7 @@ ON True project ├── columns: x:1!null z:2 j:3 x:7 "?column?":8!null ├── immutable - ├── fd: (1)-->(2,3), (3)-->(7) + ├── fd: (1)-->(2,3) ├── inner-join (cross) │ ├── columns: b.x:1!null z:2 j:3 "?column?":8!null │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) @@ -1390,7 +1390,6 @@ project │ ├── project │ │ ├── columns: "?column?":7 x:1!null │ │ ├── immutable - │ │ ├── fd: (1)-->(7) │ │ ├── inner-join (cross) │ │ │ ├── columns: x:1!null j:3 │ │ │ ├── multiplicity: left-rows(one-or-more), right-rows(zero-or-more) @@ -1516,7 +1515,6 @@ project ├── columns: x:2 j:1!null ├── cardinality: [2 - 2] ├── immutable - ├── fd: (1)-->(2) ├── values │ ├── columns: column1:1!null │ ├── cardinality: [2 - 2] diff --git a/pkg/sql/opt/optbuilder/orderby.go b/pkg/sql/opt/optbuilder/orderby.go index 014807e8aeec..bfb049f386ce 100644 --- a/pkg/sql/opt/optbuilder/orderby.go +++ b/pkg/sql/opt/optbuilder/orderby.go @@ -297,8 +297,6 @@ func ensureColumnOrderable(e tree.TypedExpr) { typ = typ.ArrayContents() } switch typ.Family() { - case types.JsonFamily: - panic(unimplementedWithIssueDetailf(35706, "", "can't order by column type jsonb")) case types.TSQueryFamily, types.TSVectorFamily: panic(unimplementedWithIssueDetailf(92165, "", "can't order by column type %s", typ.SQLString())) } diff --git a/pkg/sql/opt/xform/testdata/external/customer b/pkg/sql/opt/xform/testdata/external/customer index c7a737a871b4..c98bba84bb14 100644 --- a/pkg/sql/opt/xform/testdata/external/customer +++ b/pkg/sql/opt/xform/testdata/external/customer @@ -544,17 +544,16 @@ project │ ├── key columns: [1] = [1] │ ├── lookup columns are key │ ├── immutable - │ ├── fd: (1)-->(2,3), (6)-->(7), (2)==(7), (7)==(2) + │ ├── fd: (1)-->(2,3), (2)==(7), (7)==(2) │ ├── inner-join (lookup idtable@secondary_id) │ │ ├── columns: primary_id:1!null idtable.secondary_id:2!null value:6!null column7:7!null │ │ ├── key columns: [7] = [2] │ │ ├── immutable - │ │ ├── fd: (6)-->(7), (1)-->(2), (2)==(7), (7)==(2) + │ │ ├── fd: (1)-->(2), (2)==(7), (7)==(2) │ │ ├── project │ │ │ ├── columns: column7:7 value:6!null │ │ │ ├── cardinality: [2 - 2] │ │ │ ├── immutable - │ │ │ ├── fd: (6)-->(7) │ │ │ ├── values │ │ │ │ ├── columns: value:6!null │ │ │ │ ├── cardinality: [2 - 2] diff --git a/pkg/sql/rowenc/index_encoding.go b/pkg/sql/rowenc/index_encoding.go index 033ba11c6cfe..f2dca66cadb5 100644 --- a/pkg/sql/rowenc/index_encoding.go +++ b/pkg/sql/rowenc/index_encoding.go @@ -1454,7 +1454,7 @@ func GetValueColumns(index catalog.Index) []ValueEncodedColumn { id := index.GetCompositeColumnID(i) // Inverted indexes on a composite type (i.e. an array of composite types) // should not add the indexed column to the value. - if index.GetType() == descpb.IndexDescriptor_INVERTED && id == index.GetKeyColumnID(0) { + if index.GetType() == descpb.IndexDescriptor_INVERTED && id == index.InvertedColumnID() { continue } cols = append(cols, ValueEncodedColumn{ColID: id, IsComposite: true}) diff --git a/pkg/sql/rowenc/index_encoding_test.go b/pkg/sql/rowenc/index_encoding_test.go index d1a969eb1041..ef2b1176e61e 100644 --- a/pkg/sql/rowenc/index_encoding_test.go +++ b/pkg/sql/rowenc/index_encoding_test.go @@ -46,7 +46,9 @@ type indexKeyTest struct { secondaryValues []tree.Datum } -func makeTableDescForTest(test indexKeyTest) (catalog.TableDescriptor, catalog.TableColMap) { +func makeTableDescForTest( + test indexKeyTest, isSecondaryIndexForward bool, +) (catalog.TableDescriptor, catalog.TableColMap) { primaryColumnIDs := make([]descpb.ColumnID, len(test.primaryValues)) secondaryColumnIDs := make([]descpb.ColumnID, len(test.secondaryValues)) columns := make([]descpb.ColumnDescriptor, len(test.primaryValues)+len(test.secondaryValues)) @@ -65,6 +67,9 @@ func makeTableDescForTest(test indexKeyTest) (catalog.TableDescriptor, catalog.T if colinfo.ColumnTypeIsInvertedIndexable(columns[i].Type) { secondaryType = descpb.IndexDescriptor_INVERTED } + if isSecondaryIndexForward && columns[i].Type.Family() == types.JsonFamily { + secondaryType = descpb.IndexDescriptor_FORWARD + } secondaryColumnIDs[i-len(test.primaryValues)] = columns[i].ID } } @@ -116,6 +121,14 @@ func decodeIndex( } func TestIndexKey(t *testing.T) { + parseJSON := func(s string) *tree.DJSON { + j, err := json.ParseJSON(s) + if err != nil { + t.Fatalf("Failed to parse %s: %v", s, err) + } + return tree.NewDJSON(j) + } + rng, _ := randutil.NewTestRand() var a tree.DatumAlloc @@ -155,6 +168,82 @@ func TestIndexKey(t *testing.T) { []tree.Datum{tree.NewDInt(10), tree.NewDInt(11), tree.NewDInt(12)}, []tree.Datum{tree.NewDInt(20), tree.NewDInt(21), tree.NewDInt(22)}, }, + // Testing JSON in primary indexes. + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`"a"`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`1`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`"a"`), parseJSON(`[1, 2, 3]`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`{"a": "b"}`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`{"a": "b", "c": "d"}`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`[1, "a", {"a": "b"}]`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`null`), parseJSON(`[]`), parseJSON(`{}`), + parseJSON(`""`)}, + secondaryValues: []tree.Datum{tree.NewDInt(20)}, + }, + // Testing JSON in secondary indexes. + { + tableID: 50, + primaryValues: []tree.Datum{tree.NewDInt(20)}, + secondaryValues: []tree.Datum{parseJSON(`{"a": "b"}`)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{tree.NewDInt(20), tree.NewDInt(50)}, + secondaryValues: []tree.Datum{parseJSON(`{"a": "b"}`), parseJSON(`[1, "a", {"a": "b"}]`)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{tree.NewDInt(20), tree.NewDInt(50)}, + secondaryValues: []tree.Datum{parseJSON(`1`)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{tree.NewDInt(20), tree.NewDInt(50)}, + secondaryValues: []tree.Datum{parseJSON(`"b"`)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{tree.NewDInt(20), tree.NewDInt(50)}, + secondaryValues: []tree.Datum{parseJSON(`null`), parseJSON(`[]`), parseJSON(`{}`), + parseJSON(`""`)}, + }, + // Testing JSON in both primary and secondary indexes. + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`"a"`)}, + secondaryValues: []tree.Datum{parseJSON(`"b"`)}, + }, + { + tableID: 50, + primaryValues: []tree.Datum{parseJSON(`{"a": "b"}`), parseJSON(`[1, "a", {"a": "b"}]`)}, + secondaryValues: []tree.Datum{parseJSON(`null`), parseJSON(`[]`), parseJSON(`{}`), + parseJSON(`""`)}, + }, } for i := 0; i < 1000; i++ { @@ -178,7 +267,7 @@ func TestIndexKey(t *testing.T) { for i, test := range tests { evalCtx := eval.NewTestingEvalContext(cluster.MakeTestingClusterSettings()) defer evalCtx.Stop(context.Background()) - tableDesc, colMap := makeTableDescForTest(test) + tableDesc, colMap := makeTableDescForTest(test, true /* isSecondaryIndexForward */) // Add the default family to each test, since secondary indexes support column families. var ( colNames []string @@ -348,7 +437,8 @@ func TestInvertedIndexKey(t *testing.T) { runTest := func(value tree.Datum, expectedKeys int, version descpb.IndexDescriptorVersion) { primaryValues := []tree.Datum{tree.NewDInt(10)} secondaryValues := []tree.Datum{value} - tableDesc, colMap := makeTableDescForTest(indexKeyTest{50, primaryValues, secondaryValues}) + tableDesc, colMap := makeTableDescForTest(indexKeyTest{50, primaryValues, secondaryValues}, + false /* isSecondaryIndexForward */) for _, idx := range tableDesc.PublicNonPrimaryIndexes() { idx.IndexDesc().Version = version } diff --git a/pkg/sql/sem/tree/datum.go b/pkg/sql/sem/tree/datum.go index ef1bb8c1eacc..821b2a3233ba 100644 --- a/pkg/sql/sem/tree/datum.go +++ b/pkg/sql/sem/tree/datum.go @@ -3622,6 +3622,45 @@ func NewDJSON(j json.JSON) *DJSON { return &DJSON{j} } +// IsComposite implements the CompositeDatum interface. +func (d *DJSON) IsComposite() bool { + switch d.JSON.Type() { + case json.NumberJSONType: + dec, ok := d.JSON.AsDecimal() + if !ok { + panic(errors.AssertionFailedf("could not convert into JSON Decimal")) + } + DDec := DDecimal{Decimal: *dec} + return DDec.IsComposite() + case json.ArrayJSONType: + jsonArray, ok := d.AsArray() + if !ok { + panic(errors.AssertionFailedf("could not extract the JSON Array")) + } + for _, elem := range jsonArray { + dJsonVal := DJSON{elem} + if dJsonVal.IsComposite() { + return true + } + } + case json.ObjectJSONType: + if it, err := d.ObjectIter(); it != nil && err == nil { + // Assumption: no collated strings are present as JSON keys. + // Thus, JSON keys are not being checked if they are + // composite or not. + for it.Next() { + valDJSON := NewDJSON(it.Value()) + if valDJSON.IsComposite() { + return true + } + } + } else if err != nil { + panic(errors.AssertionFailedf("could not receive an ObjectKeyIterator")) + } + } + return false +} + // ParseDJSON takes a string of JSON and returns a DJSON value. func ParseDJSON(s string) (Datum, error) { j, err := json.ParseJSON(s) diff --git a/pkg/sql/sem/tree/eval.go b/pkg/sql/sem/tree/eval.go index 9a4123adbfd7..752b85b34a46 100644 --- a/pkg/sql/sem/tree/eval.go +++ b/pkg/sql/sem/tree/eval.go @@ -1578,6 +1578,7 @@ var CmpOps = cmpOpFixups(map[treecmp.ComparisonOperatorSymbol]*CmpOpOverloads{ makeLtFn(types.TimestampTZ, types.TimestampTZ, volatility.Leakproof), makeLtFn(types.Uuid, types.Uuid, volatility.Leakproof), makeLtFn(types.VarBit, types.VarBit, volatility.Leakproof), + makeLtFn(types.Jsonb, types.Jsonb, volatility.Immutable), // Mixed-type comparisons. makeLtFn(types.Date, types.Timestamp, volatility.Immutable), @@ -1634,6 +1635,7 @@ var CmpOps = cmpOpFixups(map[treecmp.ComparisonOperatorSymbol]*CmpOpOverloads{ makeLeFn(types.TimestampTZ, types.TimestampTZ, volatility.Leakproof), makeLeFn(types.Uuid, types.Uuid, volatility.Leakproof), makeLeFn(types.VarBit, types.VarBit, volatility.Leakproof), + makeLeFn(types.Jsonb, types.Jsonb, volatility.Immutable), // Mixed-type comparisons. makeLeFn(types.Date, types.Timestamp, volatility.Immutable), diff --git a/pkg/sql/ttl/ttljob/ttljob_test.go b/pkg/sql/ttl/ttljob/ttljob_test.go index 54b9003a66a8..d452dabdedf3 100644 --- a/pkg/sql/ttl/ttljob/ttljob_test.go +++ b/pkg/sql/ttl/ttljob/ttljob_test.go @@ -582,7 +582,10 @@ func TestRowLevelTTLJobRandomEntries(t *testing.T) { var indexableTyps []*types.T for _, typ := range types.Scalar { // TODO(#76419): DateFamily has a broken `-infinity` case. - if colinfo.ColumnTypeIsIndexable(typ) && typ.Family() != types.DateFamily { + // TODO(#99432): JsonFamily has broken cases. This is because the test is wrapping JSON + // objects in multiple single quotes which causes parsing errors. + if colinfo.ColumnTypeIsIndexable(typ) && typ.Family() != types.DateFamily && + typ.Family() != types.JsonFamily { indexableTyps = append(indexableTyps, typ) } }