From e73f7bc75fb6a63021d6079d1064b592552bc8eb Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Thu, 27 Feb 2025 23:58:51 +0800 Subject: [PATCH] Implement ppl join command with Calcite Signed-off-by: Lantao Jin --- .../calcite/standalone/CalcitePPLJoinIT.java | 1316 +++++++++++++++++ .../sql/legacy/SQLIntegTestCase.java | 20 +- .../org/opensearch/sql/legacy/TestUtils.java | 15 + .../opensearch/sql/legacy/TestsConstants.java | 3 + integ-test/src/test/resources/hobbies.json | 16 + .../hobbies_index_mapping.json | 24 + .../occupation_index_mapping.json | 24 + .../state_country_index_mapping.json | 30 + integ-test/src/test/resources/occupation.json | 12 + .../src/test/resources/state_country.json | 16 + ppl/src/main/antlr/OpenSearchPPLLexer.g4 | 12 + ppl/src/main/antlr/OpenSearchPPLParser.g4 | 35 +- .../opensearch/sql/ppl/parser/AstBuilder.java | 97 +- .../sql/ppl/calcite/CalcitePPLJoinTest.java | 258 +++- 14 files changed, 1872 insertions(+), 6 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLJoinIT.java create mode 100644 integ-test/src/test/resources/hobbies.json create mode 100644 integ-test/src/test/resources/indexDefinitions/hobbies_index_mapping.json create mode 100644 integ-test/src/test/resources/indexDefinitions/occupation_index_mapping.json create mode 100644 integ-test/src/test/resources/indexDefinitions/state_country_index_mapping.json create mode 100644 integ-test/src/test/resources/occupation.json create mode 100644 integ-test/src/test/resources/state_country.json diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLJoinIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLJoinIT.java new file mode 100644 index 00000000000..fe0904b2027 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLJoinIT.java @@ -0,0 +1,1316 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.standalone; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_HOBBIES; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OCCUPATION; +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_STATE_COUNTRY; + +import java.io.IOException; +import org.junit.Ignore; +import org.junit.Test; + +public class CalcitePPLJoinIT extends CalcitePPLIntegTestCase { + + @Override + public void init() throws IOException { + super.init(); + + loadIndex(Index.STATE_COUNTRY); + loadIndex(Index.OCCUPATION); + loadIndex(Index.HOBBIES); + } + + @Test + public void testJoinWithCondition() { + String actual = + execute( + String.format( + "source=%s | inner join left=a, right=b ON a.name = b.name AND a.year = 2023" + + " AND a.month = 4 AND b.year = 2023 AND b.month = 4 %s | fields a.name," + + " a.age, a.state, a.country, b.occupation, b.country, b.salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"occupation\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"salary\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Jake\",\n" + + " 70,\n" + + " \"California\",\n" + + " \"USA\",\n" + + " \"Engineer\",\n" + + " \"England\",\n" + + " 100000\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " 30,\n" + + " \"New York\",\n" + + " \"USA\",\n" + + " \"Artist\",\n" + + " \"USA\",\n" + + " 70000\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " 25,\n" + + " \"Ontario\",\n" + + " \"Canada\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"Jane\",\n" + + " 20,\n" + + " \"Quebec\",\n" + + " \"Canada\",\n" + + " \"Scientist\",\n" + + " \"Canada\",\n" + + " 90000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Unemployed\",\n" + + " \"Canada\",\n" + + " 0\n" + + " ]\n" + + " ],\n" + + " \"total\": 6,\n" + + " \"size\": 6\n" + + "}", + actual); + } + + @Test + public void testJoinWithTwoJoinConditions() { + String actual = + execute( + String.format( + "source = %s | inner join left=a, right=b ON a.name = b.name AND a.country =" + + " b.country AND a.year = 2023 AND a.month = 4 AND b.year = 2023 AND b.month =" + + " 4 %s | fields a.name, a.age, a.state, a.country, b.occupation, b.country," + + " b.salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"occupation\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"salary\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Hello\",\n" + + " 30,\n" + + " \"New York\",\n" + + " \"USA\",\n" + + " \"Artist\",\n" + + " \"USA\",\n" + + " 70000\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " 25,\n" + + " \"Ontario\",\n" + + " \"Canada\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"Jane\",\n" + + " 20,\n" + + " \"Quebec\",\n" + + " \"Canada\",\n" + + " \"Scientist\",\n" + + " \"Canada\",\n" + + " 90000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ]\n" + + " ],\n" + + " \"total\": 4,\n" + + " \"size\": 4\n" + + "}", + actual); + } + + @Test + public void testJoinTwoColumnsAndDisjointFilters() { + String actual = + execute( + String.format( + "source = %s | inner join left=a, right=b ON a.name = b.name AND a.country =" + + " b.country AND a.year = 2023 AND a.month = 4 AND b.salary > 100000 %s |" + + " fields a.name, a.age, a.state, a.country, b.occupation, b.country," + + " b.salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"occupation\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"salary\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"John\",\n" + + " 25,\n" + + " \"Ontario\",\n" + + " \"Canada\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ]\n" + + " ],\n" + + " \"total\": 2,\n" + + " \"size\": 2\n" + + "}", + actual); + } + + @Test + public void testJoinThenStats() { + String actual = + execute( + String.format( + "source = %s | inner join left=a, right=b ON a.name = b.name %s | stats avg(salary)" + + " by span(age, 10) as age_span", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"age_span\",\n" + + " \"type\": \"double\"\n" + + " },\n" + + " {\n" + + " \"name\": \"avg(salary)\",\n" + + " \"type\": \"double\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " 70.0,\n" + + " 100000.0\n" + + " ],\n" + + " [\n" + + " 40.0,\n" + + " 60000.0\n" + + " ],\n" + + " [\n" + + " 20.0,\n" + + " 105000.0\n" + + " ],\n" + + " [\n" + + " 30.0,\n" + + " 70000.0\n" + + " ]\n" + + " ],\n" + + " \"total\": 4,\n" + + " \"size\": 4\n" + + "}", + actual); + } + + @Test + public void testJoinThenStatsWithGroupBy() { + String actual = + execute( + String.format( + "source = %s | inner join left=a, right=b ON a.name = b.name %s | stats avg(salary)" + + " by span(age, 10) as age_span, b.country", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"b.country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age_span\",\n" + + " \"type\": \"double\"\n" + + " },\n" + + " {\n" + + " \"name\": \"avg(salary)\",\n" + + " \"type\": \"double\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Canada\",\n" + + " 40.0,\n" + + " 0.0\n" + + " ],\n" + + " [\n" + + " \"Canada\",\n" + + " 20.0,\n" + + " 105000.0\n" + + " ],\n" + + " [\n" + + " \"USA\",\n" + + " 40.0,\n" + + " 120000.0\n" + + " ],\n" + + " [\n" + + " \"England\",\n" + + " 70.0,\n" + + " 100000.0\n" + + " ],\n" + + " [\n" + + " \"USA\",\n" + + " 30.0,\n" + + " 70000.0\n" + + " ]\n" + + " ],\n" + + " \"total\": 5,\n" + + " \"size\": 5\n" + + "}", + actual); + } + + @Test + public void testComplexInnerJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'USA' OR country = 'England' | inner join left=a," + + " right=b ON a.name = b.name %s | stats avg(salary) by span(age, 10) as" + + " age_span, b.country", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"b.country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age_span\",\n" + + " \"type\": \"double\"\n" + + " },\n" + + " {\n" + + " \"name\": \"avg(salary)\",\n" + + " \"type\": \"double\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Canada\",\n" + + " 40.0,\n" + + " 0.0\n" + + " ],\n" + + " [\n" + + " \"USA\",\n" + + " 40.0,\n" + + " 120000.0\n" + + " ],\n" + + " [\n" + + " \"England\",\n" + + " 70.0,\n" + + " 100000.0\n" + + " ],\n" + + " [\n" + + " \"USA\",\n" + + " 30.0,\n" + + " 70000.0\n" + + " ]\n" + + " ],\n" + + " \"total\": 4,\n" + + " \"size\": 4\n" + + "}", + actual); + } + + @Test + public void testComplexLeftJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'Canada' OR country = 'England' | left join left=a," + + " right=b ON a.name = b.name %s | sort a.age | fields a.name, a.age, a.state," + + " a.country, b.occupation, b.country, b.salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"occupation\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"salary\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Jane\",\n" + + " 20,\n" + + " \"Quebec\",\n" + + " \"Canada\",\n" + + " \"Scientist\",\n" + + " \"Canada\",\n" + + " 90000\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " 25,\n" + + " \"Ontario\",\n" + + " \"Canada\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"Jim\",\n" + + " 27,\n" + + " \"B.C\",\n" + + " \"Canada\",\n" + + " null,\n" + + " null,\n" + + " 0\n" + + " ],\n" + + " [\n" + + " \"Peter\",\n" + + " 57,\n" + + " \"B.C\",\n" + + " \"Canada\",\n" + + " null,\n" + + " null,\n" + + " 0\n" + + " ],\n" + + " [\n" + + " \"Rick\",\n" + + " 70,\n" + + " \"B.C\",\n" + + " \"Canada\",\n" + + " null,\n" + + " null,\n" + + " 0\n" + + " ]\n" + + " ],\n" + + " \"total\": 5,\n" + + " \"size\": 5\n" + + "}", + actual); + } + + @Test + public void testComplexRightJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'Canada' OR country = 'England' | right join left=a," + + " right=b ON a.name = b.name %s | sort a.age | fields a.name, a.age, a.state," + + " a.country, b.occupation, b.country, b.salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"occupation\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"salary\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Jane\",\n" + + " 20,\n" + + " \"Quebec\",\n" + + " \"Canada\",\n" + + " \"Scientist\",\n" + + " \"Canada\",\n" + + " 90000\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " 25,\n" + + " \"Ontario\",\n" + + " \"Canada\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " null,\n" + + " 0,\n" + + " null,\n" + + " null,\n" + + " \"Engineer\",\n" + + " \"England\",\n" + + " 100000\n" + + " ],\n" + + " [\n" + + " null,\n" + + " 0,\n" + + " null,\n" + + " null,\n" + + " \"Artist\",\n" + + " \"USA\",\n" + + " 70000\n" + + " ],\n" + + " [\n" + + " null,\n" + + " 0,\n" + + " null,\n" + + " null,\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " null,\n" + + " 0,\n" + + " null,\n" + + " null,\n" + + " \"Unemployed\",\n" + + " \"Canada\",\n" + + " 0\n" + + " ]\n" + + " ],\n" + + " \"total\": 6,\n" + + " \"size\": 6\n" + + "}", + actual); + } + + @Test + public void testComplexSemiJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'Canada' OR country = 'England' | left semi join" + + " left=a, right=b ON a.name = b.name %s | sort a.age", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"month\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"year\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Jane\",\n" + + " \"Canada\",\n" + + " \"Quebec\",\n" + + " 4,\n" + + " 2023,\n" + + " 20\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " \"Canada\",\n" + + " \"Ontario\",\n" + + " 4,\n" + + " 2023,\n" + + " 25\n" + + " ]\n" + + " ],\n" + + " \"total\": 2,\n" + + " \"size\": 2\n" + + "}", + actual); + } + + @Test + public void testComplexAntiJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'Canada' OR country = 'England' | left anti join" + + " left=a, right=b ON a.name = b.name %s | sort a.age", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"month\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"year\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Jim\",\n" + + " \"Canada\",\n" + + " \"B.C\",\n" + + " 4,\n" + + " 2023,\n" + + " 27\n" + + " ],\n" + + " [\n" + + " \"Peter\",\n" + + " \"Canada\",\n" + + " \"B.C\",\n" + + " 4,\n" + + " 2023,\n" + + " 57\n" + + " ],\n" + + " [\n" + + " \"Rick\",\n" + + " \"Canada\",\n" + + " \"B.C\",\n" + + " 4,\n" + + " 2023,\n" + + " 70\n" + + " ]\n" + + " ],\n" + + " \"total\": 3,\n" + + " \"size\": 3\n" + + "}", + actual); + } + + @Test + public void testComplexCrossJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'Canada' OR country = 'England' | join left=a," + + " right=b %s | sort a.age | stats count()", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"count()\",\n" + + " \"type\": \"long\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " 30\n" + + " ]\n" + + " ],\n" + + " \"total\": 1,\n" + + " \"size\": 1\n" + + "}", + actual); + } + + @Test + public void testNonEquiJoin() { + String actual = + execute( + String.format( + "source = %s | where country = 'USA' OR country = 'England' | inner join left=a," + + " right=b ON age < salary %s | where occupation = 'Doctor' OR occupation =" + + " 'Engineer' | fields a.name, age, state, a.country, occupation, b.country," + + " salary", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"age\",\n" + + " \"type\": \"integer\"\n" + + " },\n" + + " {\n" + + " \"name\": \"state\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"occupation\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"country0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"salary\",\n" + + " \"type\": \"integer\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"Jake\",\n" + + " 70,\n" + + " \"California\",\n" + + " \"USA\",\n" + + " \"Engineer\",\n" + + " \"England\",\n" + + " 100000\n" + + " ],\n" + + " [\n" + + " \"Jake\",\n" + + " 70,\n" + + " \"California\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"Jake\",\n" + + " 70,\n" + + " \"California\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " 30,\n" + + " \"New York\",\n" + + " \"USA\",\n" + + " \"Engineer\",\n" + + " \"England\",\n" + + " 100000\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " 30,\n" + + " \"New York\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " 30,\n" + + " \"New York\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Engineer\",\n" + + " \"England\",\n" + + " 100000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"Canada\",\n" + + " 120000\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " 40,\n" + + " \"Washington\",\n" + + " \"USA\",\n" + + " \"Doctor\",\n" + + " \"USA\",\n" + + " 120000\n" + + " ]\n" + + " ],\n" + + " \"total\": 9,\n" + + " \"size\": 9\n" + + "}", + actual); + } + + @Test + public void testCrossJoinWithJoinCriteriaFallbackToInnerJoin() { + String cross = + execute( + String.format( + "source = %s | where country = 'USA' | cross join left=a, right=b ON a.name =" + + " b.name %s | sort a.age", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String inner = + execute( + String.format( + "source = %s | where country = 'USA' | inner join left=a, right=b ON a.name =" + + " b.name %s | sort a.age", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals(cross, inner); + } + + @Ignore // TODO seems a calcite bug + public void testMultipleJoins() { + String actual = + execute( + String.format( + """ + source = %s + | where country = 'Canada' OR country = 'England' + | inner join left=a, right=b + ON a.name = b.name AND a.year = 2023 AND a.month = 4 AND b.year = 2023 AND b.month = 4 + %s + | eval a_name = a.name + | eval a_country = a.country + | eval b_country = b.country + | fields a_name, age, state, a_country, occupation, b_country, salary + | left join left=a, right=b + ON a.a_name = b.name + %s + | eval aa_country = a.a_country + | eval ab_country = a.b_country + | eval bb_country = b.country + | fields a_name, age, state, aa_country, occupation, ab_country, salary, bb_country, hobby, language + | cross join left=a, right=b + %s + | eval new_country = a.aa_country + | eval new_salary = b.salary + | stats avg(new_salary) as avg_salary by span(age, 5) as age_span, state + | left semi join left=a, right=b + ON a.state = b.state + %s + | eval new_avg_salary = floor(avg_salary) + | fields state, age_span, new_avg_salary + """, + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_HOBBIES, + TEST_INDEX_OCCUPATION, + TEST_INDEX_STATE_COUNTRY)); + assertEquals("30", actual); + } + + @Test + public void testMultipleJoinsWithoutTableAliases() { + String actual = + execute( + String.format( + "source = %s | JOIN ON %s.name = %s.name %s | JOIN ON %s.name = %s.name %s | fields" + + " %s.name, %s.name, %s.name", + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_OCCUPATION, + TEST_INDEX_OCCUPATION, + TEST_INDEX_HOBBIES, + TEST_INDEX_HOBBIES, + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_HOBBIES)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name1\",\n" + + " \"type\": \"string\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\"\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\"\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " \"Hello\",\n" + + " \"Hello\"\n" + + " ],\n" + + " [\n" + + " \"Jake\",\n" + + " \"Jake\",\n" + + " \"Jake\"\n" + + " ],\n" + + " [\n" + + " \"Jane\",\n" + + " \"Jane\",\n" + + " \"Jane\"\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " \"John\",\n" + + " \"John\"\n" + + " ]\n" + + " ],\n" + + " \"total\": 6,\n" + + " \"size\": 6\n" + + "}", + actual); + } + + @Test + public void testMultipleJoinsWithPartTableAliases() { + String actual = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name %s | JOIN right = t3" + + " ON t1.name = t3.name %s | fields t1.name, t2.name, t3.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION, TEST_INDEX_HOBBIES)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name1\",\n" + + " \"type\": \"string\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\"\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\"\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " \"Hello\",\n" + + " \"Hello\"\n" + + " ],\n" + + " [\n" + + " \"Jake\",\n" + + " \"Jake\",\n" + + " \"Jake\"\n" + + " ],\n" + + " [\n" + + " \"Jane\",\n" + + " \"Jane\",\n" + + " \"Jane\"\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " \"John\",\n" + + " \"John\"\n" + + " ]\n" + + " ],\n" + + " \"total\": 6,\n" + + " \"size\": 6\n" + + "}", + actual); + } + + @Test + public void testMultipleJoinsWithSelfJoin1() { + String actual = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name %s | JOIN right = t3" + + " ON t1.name = t3.name %s | JOIN right = t4 ON t1.name = t4.name %s | fields" + + " t1.name, t2.name, t3.name, t4.name", + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_HOBBIES, + TEST_INDEX_STATE_COUNTRY)); + assertEquals( + "" + + "{\n" + + " \"schema\": [\n" + + " {\n" + + " \"name\": \"name\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name0\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name1\",\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"name\": \"name2\",\n" + + " \"type\": \"string\"\n" + + " }\n" + + " ],\n" + + " \"datarows\": [\n" + + " [\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\"\n" + + " ],\n" + + " [\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\",\n" + + " \"David\"\n" + + " ],\n" + + " [\n" + + " \"Hello\",\n" + + " \"Hello\",\n" + + " \"Hello\",\n" + + " \"Hello\"\n" + + " ],\n" + + " [\n" + + " \"Jake\",\n" + + " \"Jake\",\n" + + " \"Jake\",\n" + + " \"Jake\"\n" + + " ],\n" + + " [\n" + + " \"Jane\",\n" + + " \"Jane\",\n" + + " \"Jane\",\n" + + " \"Jane\"\n" + + " ],\n" + + " [\n" + + " \"John\",\n" + + " \"John\",\n" + + " \"John\",\n" + + " \"John\"\n" + + " ]\n" + + " ],\n" + + " \"total\": 6,\n" + + " \"size\": 6\n" + + "}", + actual); + } + + @Ignore // TODO subquery not support + public void testMultipleJoinsWithSelfJoin2() { + String actual = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name %s | JOIN right = t3" + + " ON t1.name = t3.name %s | JOIN ON t1.name = t4.name [ source = %s ] as t4 |" + + " fields t1.name, t2.name, t3.name, t4.name", + TEST_INDEX_STATE_COUNTRY, + TEST_INDEX_OCCUPATION, + TEST_INDEX_HOBBIES, + TEST_INDEX_STATE_COUNTRY)); + assertEquals("", actual); + } + + @Test + public void testCheckAccessTheReferenceByAliases1() { + String res1 = + execute( + String.format( + "source = %s | JOIN left = t1 ON t1.name = t2.name %s as t2 | fields t1.name," + + " t2.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res2 = + execute( + String.format( + "source = %s as t1 | JOIN ON t1.name = t2.name %s as t2 | fields t1.name, t2.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals(res1, res2); + + String res3 = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name %s as tt | fields" + + " tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res4 = + execute( + String.format( + "source = %s as tt | JOIN left = t1 right = t2 ON t1.name = t2.name %s | fields" + + " tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res5 = + execute( + String.format( + "source = %s as tt | JOIN left = t1 ON t1.name = t2.name %s as t2 | fields" + + " tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals(res3, res4); + assertEquals(res4, res5); + } + + @Ignore // TODO subquery not support + public void testCheckAccessTheReferenceByAliases2() { + String res1 = + execute( + String.format( + "source = %s | JOIN left = t1 ON t1.name = t2.name [ source = %s ] as t2 | fields" + + " t1.name, t2.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res2 = + execute( + String.format( + "source = %s | JOIN left = t1 ON t1.name = t2.name [ source = %s as t2 ] | fields" + + " t1.name, t2.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals(res1, res2); + + String res3 = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name [ source = %s as" + + " tt ] | fields tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res4 = + execute( + String.format( + "source = %s | JOIN left = t1 ON t1.name = t2.name [ source = %s as tt ] as t2" + + " | fields tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res5 = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name [ source = %s ]" + + " as tt | fields tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals(res3, res4); + assertEquals(res4, res5); + } + + @Test + public void testCheckAccessTheReferenceByAliases3() { + String res1 = + execute( + String.format( + "source = %s | JOIN left = t1 right = t2 ON t1.name = t2.name %s as tt | fields" + + " tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res2 = + execute( + String.format( + "source = %s as tt | JOIN left = t1 right = t2 ON t1.name = t2.name %s | fields" + + " tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + String res3 = + execute( + String.format( + "source = %s as tt | JOIN left = t1 ON t1.name = t2.name %s as t2 | fields" + + " tt.name", + TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION)); + assertEquals(res1, res2); + assertEquals(res1, res3); + } +} diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java index 1728be74e6c..13fbcf678b6 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/SQLIntegTestCase.java @@ -21,16 +21,19 @@ import static org.opensearch.sql.legacy.TestUtils.getEmployeeNestedTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGameOfThronesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getGeopointIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getHobbiesIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getJoinTypeIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getLocationIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getMappingFile; import static org.opensearch.sql.legacy.TestUtils.getNestedSimpleIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getNestedTypeIndexMapping; +import static org.opensearch.sql.legacy.TestUtils.getOccupationIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getOdbcIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getOrderIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getPeople2IndexMapping; import static org.opensearch.sql.legacy.TestUtils.getPhraseIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getResponseBody; +import static org.opensearch.sql.legacy.TestUtils.getStateCountryIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getStringIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getUnexpandedObjectIndexMapping; import static org.opensearch.sql.legacy.TestUtils.getWeblogsIndexMapping; @@ -745,7 +748,22 @@ public enum Index { TestsConstants.TEST_INDEX_GEOPOINT, "dates", getGeopointIndexMapping(), - "src/test/resources/geopoints.json"); + "src/test/resources/geopoints.json"), + STATE_COUNTRY( + TestsConstants.TEST_INDEX_STATE_COUNTRY, + "state_country", + getStateCountryIndexMapping(), + "src/test/resources/state_country.json"), + OCCUPATION( + TestsConstants.TEST_INDEX_OCCUPATION, + "occupation", + getOccupationIndexMapping(), + "src/test/resources/occupation.json"), + HOBBIES( + TestsConstants.TEST_INDEX_HOBBIES, + "hobbies", + getHobbiesIndexMapping(), + "src/test/resources/hobbies.json"); private final String name; private final String type; diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java index 195dda0cbdd..7f056e6fd0a 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestUtils.java @@ -250,6 +250,21 @@ public static String getGeopointIndexMapping() { return getMappingFile(mappingFile); } + public static String getStateCountryIndexMapping() { + String mappingFile = "state_country_index_mapping.json"; + return getMappingFile(mappingFile); + } + + public static String getOccupationIndexMapping() { + String mappingFile = "occupation_index_mapping.json"; + return getMappingFile(mappingFile); + } + + public static String getHobbiesIndexMapping() { + String mappingFile = "hobbies_index_mapping.json"; + return getMappingFile(mappingFile); + } + public static void loadBulk(Client client, String jsonPath, String defaultIndex) throws Exception { System.out.println(String.format("Loading file %s into opensearch cluster", jsonPath)); diff --git a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java index 1e336f544e9..cb99849db85 100644 --- a/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java +++ b/integ-test/src/test/java/org/opensearch/sql/legacy/TestsConstants.java @@ -59,6 +59,9 @@ public class TestsConstants { public static final String TEST_INDEX_NESTED_WITH_NULLS = TEST_INDEX + "_nested_with_nulls"; public static final String TEST_INDEX_GEOPOINT = TEST_INDEX + "_geopoint"; public static final String DATASOURCES = ".ql-datasources"; + public static final String TEST_INDEX_STATE_COUNTRY = TEST_INDEX + "_state_country"; + public static final String TEST_INDEX_OCCUPATION = TEST_INDEX + "_occupation"; + public static final String TEST_INDEX_HOBBIES = TEST_INDEX + "_hobbies"; public static final String DATE_FORMAT = "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"; public static final String TS_DATE_FORMAT = "yyyy-MM-dd HH:mm:ss.SSS"; diff --git a/integ-test/src/test/resources/hobbies.json b/integ-test/src/test/resources/hobbies.json new file mode 100644 index 00000000000..182b43cd979 --- /dev/null +++ b/integ-test/src/test/resources/hobbies.json @@ -0,0 +1,16 @@ +{"index":{"_id":"1"}} +{"name":"Jake","country":"USA","hobby":"Fishing","language":"English","year":2023,"month":4} +{"index":{"_id":"2"}} +{"name":"Hello","country":"USA","hobby":"Painting","language":"English","year":2023,"month":4} +{"index":{"_id":"3"}} +{"name":"John","country":"Canada","hobby":"Reading","language":"French","year":2023,"month":4} +{"index":{"_id":"4"}} +{"name":"Jim","country":"Canada","hobby":"Hiking","language":"English","year":2023,"month":4} +{"index":{"_id":"5"}} +{"name":"Peter","country":"Canada","hobby":"Gaming","language":"English","year":2023,"month":4} +{"index":{"_id":"6"}} +{"name":"Rick","country":"USA","hobby":"Swimming","language":"English","year":2023,"month":4} +{"index":{"_id":"7"}} +{"name":"David","country":"USA","hobby":"Gardening","language":"English","year":2023,"month":4} +{"index":{"_id":"8"}} +{"name":"Jane","country":"Canada","hobby":"Singing","language":"French","year":2023,"month":4} diff --git a/integ-test/src/test/resources/indexDefinitions/hobbies_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/hobbies_index_mapping.json new file mode 100644 index 00000000000..012e29cd7e8 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/hobbies_index_mapping.json @@ -0,0 +1,24 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "country": { + "type": "text" + }, + "hobby": { + "type": "text" + }, + "language": { + "type": "keyword" + }, + "year": { + "type": "integer" + }, + "month": { + "type": "integer" + } + } + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/occupation_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/occupation_index_mapping.json new file mode 100644 index 00000000000..cfbd6fc7773 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/occupation_index_mapping.json @@ -0,0 +1,24 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "occupation": { + "type": "text" + }, + "country": { + "type": "text" + }, + "salary": { + "type": "integer" + }, + "year": { + "type": "integer" + }, + "month": { + "type": "integer" + } + } + } +} diff --git a/integ-test/src/test/resources/indexDefinitions/state_country_index_mapping.json b/integ-test/src/test/resources/indexDefinitions/state_country_index_mapping.json new file mode 100644 index 00000000000..da614cf5253 --- /dev/null +++ b/integ-test/src/test/resources/indexDefinitions/state_country_index_mapping.json @@ -0,0 +1,30 @@ +{ + "mappings": { + "properties": { + "name": { + "type": "keyword" + }, + "age": { + "type": "integer" + }, + "state": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "country": { + "type": "text" + }, + "year": { + "type": "integer" + }, + "month": { + "type": "integer" + } + } + } +} diff --git a/integ-test/src/test/resources/occupation.json b/integ-test/src/test/resources/occupation.json new file mode 100644 index 00000000000..a21c5051441 --- /dev/null +++ b/integ-test/src/test/resources/occupation.json @@ -0,0 +1,12 @@ +{"index":{"_id":"1"}} +{"name":"Jake","occupation":"Engineer","country":"England","salary":100000,"year":2023,"month":4} +{"index":{"_id":"2"}} +{"name":"Hello","occupation":"Artist","country":"USA","salary":70000,"year":2023,"month":4} +{"index":{"_id":"3"}} +{"name":"John","occupation":"Doctor","country":"Canada","salary":120000,"year":2023,"month":4} +{"index":{"_id":"4"}} +{"name":"David","occupation":"Doctor","country":"USA","salary":120000,"year":2023,"month":4} +{"index":{"_id":"5"}} +{"name":"David","occupation":"Unemployed","country":"Canada","salary":0,"year":2023,"month":4} +{"index":{"_id":"6"}} +{"name":"Jane","occupation":"Scientist","country":"Canada","salary":90000,"year":2023,"month":4} diff --git a/integ-test/src/test/resources/state_country.json b/integ-test/src/test/resources/state_country.json new file mode 100644 index 00000000000..058e2f0511b --- /dev/null +++ b/integ-test/src/test/resources/state_country.json @@ -0,0 +1,16 @@ +{"index":{"_id":"1"}} +{"name":"Jake","age":70,"state":"California","country":"USA","year":2023,"month":4} +{"index":{"_id":"2"}} +{"name":"Hello","age":30,"state":"New York","country":"USA","year":2023,"month":4} +{"index":{"_id":"3"}} +{"name":"John","age":25,"state":"Ontario","country":"Canada","year":2023,"month":4} +{"index":{"_id":"4"}} +{"name":"Jane","age":20,"state":"Quebec","country":"Canada","year":2023,"month":4} +{"index":{"_id":"5"}} +{"name":"Jim","age":27,"state":"B.C","country":"Canada","year":2023,"month":4} +{"index":{"_id":"6"}} +{"name":"Peter","age":57,"state":"B.C","country":"Canada","year":2023,"month":4} +{"index":{"_id":"7"}} +{"name":"Rick","age":70,"state":"B.C","country":"Canada","year":2023,"month":4} +{"index":{"_id":"8"}} +{"name":"David","age":40,"state":"Washington","country":"USA","year":2023,"month":4} diff --git a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 index 8011532db5f..b0c048ee471 100644 --- a/ppl/src/main/antlr/OpenSearchPPLLexer.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLLexer.g4 @@ -38,6 +38,18 @@ ML: 'ML'; FILLNULL: 'FILLNULL'; TRENDLINE: 'TRENDLINE'; +//Native JOIN KEYWORDS +JOIN: 'JOIN'; +ON: 'ON'; +INNER: 'INNER'; +OUTER: 'OUTER'; +FULL: 'FULL'; +SEMI: 'SEMI'; +ANTI: 'ANTI'; +CROSS: 'CROSS'; +LEFT_HINT: 'HINT.LEFT'; +RIGHT_HINT: 'HINT.RIGHT'; + // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; diff --git a/ppl/src/main/antlr/OpenSearchPPLParser.g4 b/ppl/src/main/antlr/OpenSearchPPLParser.g4 index 834549cfa61..24dcaef15a2 100644 --- a/ppl/src/main/antlr/OpenSearchPPLParser.g4 +++ b/ppl/src/main/antlr/OpenSearchPPLParser.g4 @@ -35,6 +35,7 @@ pplCommands commands : whereCommand | fieldsCommand + | joinCommand | renameCommand | statsCommand | dedupCommand @@ -204,7 +205,39 @@ fromClause ; tableSourceClause - : tableSource (COMMA tableSource)* + : tableSource (COMMA tableSource)* (AS alias = qualifiedName)? + ; + +// join +joinCommand + : (joinType) JOIN sideAlias joinHintList? joinCriteria? right = tableSourceClause + ; + +joinType + : INNER? + | CROSS + | LEFT OUTER? + | RIGHT OUTER? + | FULL OUTER? + | LEFT? SEMI + | LEFT? ANTI + ; + +sideAlias + : (LEFT EQUAL leftAlias = qualifiedName)? COMMA? (RIGHT EQUAL rightAlias = qualifiedName)? + ; + +joinCriteria + : ON logicalExpression + ; + +joinHintList + : hintPair (COMMA? hintPair)* + ; + +hintPair + : leftHintKey = LEFT_HINT DOT ID EQUAL leftHintValue = ident #leftHint + | rightHintKey = RIGHT_HINT DOT ID EQUAL rightHintValue = ident #rightHint ; renameClasue diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index a99c5dda324..ffae07bf63f 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -30,6 +30,7 @@ import com.google.common.collect.ImmutableMap; import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedHashMap; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -38,6 +39,7 @@ import org.antlr.v4.runtime.tree.ParseTree; import org.opensearch.sql.ast.dsl.AstDSL; import org.opensearch.sql.ast.expression.Alias; +import org.opensearch.sql.ast.expression.EqualTo; import org.opensearch.sql.ast.expression.Field; import org.opensearch.sql.ast.expression.Let; import org.opensearch.sql.ast.expression.Literal; @@ -54,6 +56,7 @@ import org.opensearch.sql.ast.tree.FillNull; import org.opensearch.sql.ast.tree.Filter; import org.opensearch.sql.ast.tree.Head; +import org.opensearch.sql.ast.tree.Join; import org.opensearch.sql.ast.tree.Kmeans; import org.opensearch.sql.ast.tree.ML; import org.opensearch.sql.ast.tree.Parse; @@ -63,6 +66,7 @@ import org.opensearch.sql.ast.tree.Relation; import org.opensearch.sql.ast.tree.Rename; import org.opensearch.sql.ast.tree.Sort; +import org.opensearch.sql.ast.tree.SubqueryAlias; import org.opensearch.sql.ast.tree.TableFunction; import org.opensearch.sql.ast.tree.Trendline; import org.opensearch.sql.ast.tree.UnresolvedPlan; @@ -144,6 +148,89 @@ public UnresolvedPlan visitWhereCommand(WhereCommandContext ctx) { return new Filter(internalVisitExpression(ctx.logicalExpression())); } + @Override + public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ctx) { + Join.JoinType joinType = getJoinType(ctx.joinType()); + if (ctx.joinCriteria() == null) { + joinType = Join.JoinType.CROSS; + } + Join.JoinHint joinHint = getJoinHint(ctx.joinHintList()); + Optional leftAlias = + ctx.sideAlias().leftAlias != null + ? Optional.of(internalVisitExpression(ctx.sideAlias().leftAlias).toString()) + : Optional.empty(); + Optional rightAlias = Optional.empty(); + if (ctx.tableSourceClause().alias != null) { + rightAlias = Optional.of(internalVisitExpression(ctx.tableSourceClause().alias).toString()); + } + if (ctx.sideAlias().rightAlias != null) { + rightAlias = Optional.of(internalVisitExpression(ctx.sideAlias().rightAlias).toString()); + } + + UnresolvedPlan rightRelation = visit(ctx.tableSourceClause()); + // Add a SubqueryAlias to the right plan when the right alias is present and no duplicated alias + // existing in right. + UnresolvedPlan right; + if (rightAlias.isEmpty() + || (rightRelation instanceof SubqueryAlias + && rightAlias.get().equals(((SubqueryAlias) rightRelation).getAlias()))) { + right = rightRelation; + } else { + right = new SubqueryAlias(rightAlias.get(), rightRelation); + } + Optional joinCondition = + ctx.joinCriteria() == null + ? Optional.empty() + : Optional.of(expressionBuilder.visitJoinCriteria(ctx.joinCriteria())); + + return new Join(right, leftAlias, rightAlias, joinType, joinCondition, joinHint); + } + + private Join.JoinHint getJoinHint(OpenSearchPPLParser.JoinHintListContext ctx) { + Join.JoinHint joinHint; + if (ctx == null) { + joinHint = new Join.JoinHint(); + } else { + joinHint = + new Join.JoinHint( + ctx.hintPair().stream() + .map(expressionBuilder::visit) + .filter(e -> e instanceof EqualTo) + .map(e -> (EqualTo) e) + .collect( + Collectors.toMap( + k -> k.getLeft().toString(), // always literal + v -> v.getRight().toString(), // always literal + (v1, v2) -> v2, + LinkedHashMap::new))); + } + return joinHint; + } + + private Join.JoinType getJoinType(OpenSearchPPLParser.JoinTypeContext ctx) { + Join.JoinType joinType; + if (ctx == null) { + joinType = Join.JoinType.INNER; + } else if (ctx.INNER() != null) { + joinType = Join.JoinType.INNER; + } else if (ctx.SEMI() != null) { + joinType = Join.JoinType.SEMI; + } else if (ctx.ANTI() != null) { + joinType = Join.JoinType.ANTI; + } else if (ctx.LEFT() != null) { + joinType = Join.JoinType.LEFT; + } else if (ctx.RIGHT() != null) { + joinType = Join.JoinType.RIGHT; + } else if (ctx.CROSS() != null) { + joinType = Join.JoinType.CROSS; + } else if (ctx.FULL() != null) { + joinType = Join.JoinType.FULL; + } else { + joinType = Join.JoinType.INNER; + } + return joinType; + } + /** Fields command. */ @Override public UnresolvedPlan visitFieldsCommand(FieldsCommandContext ctx) { @@ -325,8 +412,14 @@ public UnresolvedPlan visitFromClause(FromClauseContext ctx) { @Override public UnresolvedPlan visitTableSourceClause(TableSourceClauseContext ctx) { - return new Relation( - ctx.tableSource().stream().map(this::internalVisitExpression).collect(Collectors.toList())); + Relation relation = + new Relation( + ctx.tableSource().stream() + .map(this::internalVisitExpression) + .collect(Collectors.toList())); + return ctx.alias != null + ? new SubqueryAlias(internalVisitExpression(ctx.alias).toString(), relation) + : relation; } @Override diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java index 80221c76877..feb8ff1607d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java @@ -7,10 +7,8 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert; -import org.junit.Ignore; import org.junit.Test; -@Ignore public class CalcitePPLJoinTest extends CalcitePPLAbstractTest { public CalcitePPLJoinTest() { @@ -97,6 +95,28 @@ public void testJoinWithSpecificAliases() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testJoinWithMultiplePredicates() { + String ppl = + "source=EMP | join left = l right = r on l.DEPTNO = r.DEPTNO AND l.DEPTNO > 10 AND EMP.SAL" + + " < 3000 DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[AND(=($7, $8), >($7, 10), <($5, 3000))], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 9); + + String expectedSparkSql = + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO` AND `EMP`.`DEPTNO` >" + + " 10 AND `EMP`.`SAL` < 3000"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testLeftJoin() { String ppl = "source=EMP as e | left join on e.DEPTNO = d.DEPTNO DEPT as d"; @@ -117,6 +137,90 @@ public void testLeftJoin() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testRightJoin() { + String ppl = "source=EMP as e | right join on e.DEPTNO = d.DEPTNO DEPT as d"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($7, $8)], joinType=[right])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 15); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "RIGHT JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testLeftSemi() { + String ppl = "source=EMP as e | left semi join on e.DEPTNO = d.DEPTNO DEPT as d"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($7, $8)], joinType=[semi])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE EXISTS (SELECT 1\n" + + "FROM `scott`.`DEPT`\n" + + "WHERE `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testLeftAnti() { + String ppl = "source=EMP as e | left anti join on e.DEPTNO = d.DEPTNO DEPT as d"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($7, $8)], joinType=[anti])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 0); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "WHERE NOT EXISTS (SELECT 1\n" + + "FROM `scott`.`DEPT`\n" + + "WHERE `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`)"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testFullOuter() { + String ppl = "source=EMP as e | full outer join on e.DEPTNO = d.DEPTNO DEPT as d"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($7, $8)], joinType=[full])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 15); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "FULL JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testCrossJoin() { String ppl = "source=EMP as e | cross join DEPT as d"; @@ -134,6 +238,26 @@ public void testCrossJoin() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testCrossJoinWithJoinConditions() { + String ppl = "source=EMP as e | cross join on e.DEPTNO = d.DEPTNO DEPT as d"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testNonEquiJoin() { String ppl = "source=EMP as e | join on e.DEPTNO > d.DEPTNO DEPT as d"; @@ -153,4 +277,134 @@ public void testNonEquiJoin() { + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` > `DEPT`.`DEPTNO`"; verifyPPLToSparkSQL(root, expectedSparkSql); } + + @Test + public void testMultipleTablesJoin() { + String ppl = + "source=EMP | join left = l right = r ON l.DEPTNO = r.DEPTNO DEPT | left join left = l" + + " right = r ON l.SAL = r.HISAL SALGRADE"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($5, $13)], joinType=[left])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " LogicalTableScan(table=[[scott, SALGRADE]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`\n" + + "LEFT JOIN `scott`.`SALGRADE` ON `EMP`.`SAL` = `SALGRADE`.`HISAL`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleTablesJoinWithTableAliases() { + String ppl = + "source=EMP as t1 | join ON t1.DEPTNO = t2.DEPTNO DEPT as t2 | left join ON t1.SAL =" + + " t3.HISAL SALGRADE as t3"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($5, $13)], joinType=[left])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " LogicalTableScan(table=[[scott, SALGRADE]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`\n" + + "LEFT JOIN `scott`.`SALGRADE` ON `EMP`.`SAL` = `SALGRADE`.`HISAL`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleTablesJoinWithTableNames() { + String ppl = + "source=EMP | join ON EMP.DEPTNO = DEPT.DEPTNO DEPT | left join ON EMP.SAL = SALGRADE.HISAL" + + " SALGRADE"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($5, $13)], joinType=[left])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " LogicalTableScan(table=[[scott, SALGRADE]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`\n" + + "LEFT JOIN `scott`.`SALGRADE` ON `EMP`.`SAL` = `SALGRADE`.`HISAL`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleJoinWithPartSideAliases() { + String ppl = + "source=EMP | join left = t1 right = t2 ON t1.DEPTNO = t2.DEPTNO DEPT | left join right =" + + " t3 ON t1.SAL = t3.HISAL SALGRADE"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalJoin(condition=[=($5, $13)], joinType=[left])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " LogicalTableScan(table=[[scott, SALGRADE]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "" + + "SELECT *\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`\n" + + "LEFT JOIN `scott`.`SALGRADE` ON `EMP`.`SAL` = `SALGRADE`.`HISAL`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testMultipleJoinWithSelfJoin() { + String ppl = + "source=EMP | join left = t1 right = t2 ON t1.DEPTNO = t2.DEPTNO DEPT | left join right =" + + " t3 ON t1.SAL = t3.HISAL SALGRADE | join right = t4 ON t1.DEPTNO = t4.DEPTNO EMP |" + + " fields t1.ENAME, t2.DNAME, t3.GRADE, t4.EMPNO"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "" + + "LogicalProject(ENAME=[$1], DNAME=[$9], GRADE=[$11], EMPNO=[$14])\n" + + " LogicalJoin(condition=[=($7, $21)], joinType=[inner])\n" + + " LogicalJoin(condition=[=($5, $13)], joinType=[left])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[inner])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n" + + " LogicalTableScan(table=[[scott, SALGRADE]])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 70); + + String expectedSparkSql = + "" + + "SELECT `EMP`.`ENAME`, `DEPT`.`DNAME`, `SALGRADE`.`GRADE`, `EMP0`.`EMPNO`\n" + + "FROM `scott`.`EMP`\n" + + "INNER JOIN `scott`.`DEPT` ON `EMP`.`DEPTNO` = `DEPT`.`DEPTNO`\n" + + "LEFT JOIN `scott`.`SALGRADE` ON `EMP`.`SAL` = `SALGRADE`.`HISAL`\n" + + "INNER JOIN `scott`.`EMP` `EMP0` ON `EMP`.`DEPTNO` = `EMP0`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } }