diff --git a/DEVELOPER_GUIDE.rst b/DEVELOPER_GUIDE.rst index 54099a0d155..7a1fac66d4a 100644 --- a/DEVELOPER_GUIDE.rst +++ b/DEVELOPER_GUIDE.rst @@ -173,7 +173,6 @@ Here are other files and sub-folders that you are likely to touch: - ``build.gradle``: Gradle build script. - ``docs``: documentation for developers and reference manual for users. - ``doc-test``: code that run .rst docs in ``docs`` folder by Python doctest library. -- ``language-grammar``: centralized package for ANTLR grammar files. See `Language Grammar Package`_ for details. Note that other related project code has already merged into this single repository together: @@ -444,29 +443,3 @@ with an appropriate label `backport ` is merged to main wi PR. For example, if a PR on main needs to be backported to `1.x` branch, add a label `backport 1.x` to the PR and make sure the backport workflow runs on the PR along with other checks. Once this PR is merged to main, the workflow will create a backport PR to the `1.x` branch. - -Language Grammar Package -======================== - -The ``language-grammar`` package serves as a centralized repository for all ANTLR grammar files used throughout the OpenSearch SQL project. This package contains the definitive versions of grammar files for: - -- SQL parsing (``OpenSearchSQLParser.g4``, ``OpenSearchSQLLexer.g4``) -- PPL parsing (``OpenSearchPPLParser.g4``, ``OpenSearchPPLLexer.g4``) -- Legacy SQL parsing (``OpenSearchLegacySqlParser.g4``, ``OpenSearchLegacySqlLexer.g4``) -- Spark SQL extensions (``SparkSqlBase.g4``, ``FlintSparkSqlExtensions.g4``, ``SqlBaseParser.g4``, ``SqlBaseLexer.g4``) - -Purpose -------- - -The language-grammar package enables sharing of grammar files between the main SQL repository and the Spark repository, ensuring consistency and reducing duplication. Once updated, the package automatically triggers CI to upload the new version to Maven Central for consumption by other projects. - -Updating Grammar Files ----------------------- - -When grammar files are modified in their respective modules (``sql/``, ``ppl/``, ``legacy/``, ``async-query-core/``), they must be manually copied to the ``language-grammar/src/main/antlr4/`` directory. - -**Workflow:** - -1. Modify grammar files in their source locations (e.g., ``sql/src/main/antlr/``) -2. Copy updated files to ``language-grammar/src/main/antlr4/`` -3. Commit changes to trigger automatic Maven publication via CI diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 index 4c37be2f318..b7dc4b7286d 100644 --- a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 @@ -13,12 +13,9 @@ options { caseInsensitive = true; } SEARCH: 'SEARCH'; DESCRIBE: 'DESCRIBE'; SHOW: 'SHOW'; -EXPLAIN: 'EXPLAIN'; FROM: 'FROM'; WHERE: 'WHERE'; FIELDS: 'FIELDS'; -FIELD: 'FIELD'; -TABLE: 'TABLE'; // Alias for FIELDS command RENAME: 'RENAME'; STATS: 'STATS'; EVENTSTATS: 'EVENTSTATS'; @@ -26,14 +23,13 @@ DEDUP: 'DEDUP'; SORT: 'SORT'; EVAL: 'EVAL'; HEAD: 'HEAD'; -BIN: 'BIN'; +TOP_APPROX: 'TOP_APPROX'; TOP: 'TOP'; +RARE_APPROX: 'RARE_APPROX'; RARE: 'RARE'; PARSE: 'PARSE'; -SPATH: 'SPATH'; +METHOD: 'METHOD'; REGEX: 'REGEX'; -REX: 'REX'; -SED: 'SED'; PUNCT: 'PUNCT'; GROK: 'GROK'; PATTERN: 'PATTERN'; @@ -43,22 +39,10 @@ KMEANS: 'KMEANS'; AD: 'AD'; ML: 'ML'; FILLNULL: 'FILLNULL'; +EXPAND: 'EXPAND'; FLATTEN: 'FLATTEN'; TRENDLINE: 'TRENDLINE'; -TIMECHART: 'TIMECHART'; APPENDCOL: 'APPENDCOL'; -EXPAND: 'EXPAND'; -SIMPLE_PATTERN: 'SIMPLE_PATTERN'; -BRAIN: 'BRAIN'; -VARIABLE_COUNT_THRESHOLD: 'VARIABLE_COUNT_THRESHOLD'; -FREQUENCY_THRESHOLD_PERCENTAGE: 'FREQUENCY_THRESHOLD_PERCENTAGE'; -METHOD: 'METHOD'; -MAX_SAMPLE_COUNT: 'MAX_SAMPLE_COUNT'; -MAX_MATCH: 'MAX_MATCH'; -OFFSET_FIELD: 'OFFSET_FIELD'; -BUFFER_LIMIT: 'BUFFER_LIMIT'; -LABEL: 'LABEL'; -AGGREGATION: 'AGGREGATION'; //Native JOIN KEYWORDS JOIN: 'JOIN'; @@ -72,35 +56,52 @@ CROSS: 'CROSS'; LEFT_HINT: 'HINT.LEFT'; RIGHT_HINT: 'HINT.RIGHT'; +//CORRELATION KEYWORDS +CORRELATE: 'CORRELATE'; +SELF: 'SELF'; +EXACT: 'EXACT'; +APPROXIMATE: 'APPROXIMATE'; +SCOPE: 'SCOPE'; +MAPPING: 'MAPPING'; + +//EXPLAIN KEYWORDS +EXPLAIN: 'EXPLAIN'; +FORMATTED: 'FORMATTED'; +COST: 'COST'; +CODEGEN: 'CODEGEN'; +EXTENDED: 'EXTENDED'; +SIMPLE: 'SIMPLE'; + // COMMAND ASSIST KEYWORDS AS: 'AS'; BY: 'BY'; SOURCE: 'SOURCE'; INDEX: 'INDEX'; -A: 'A'; -ASC: 'ASC'; D: 'D'; DESC: 'DESC'; DATASOURCES: 'DATASOURCES'; USING: 'USING'; WITH: 'WITH'; -SIMPLE: 'SIMPLE'; -STANDARD: 'STANDARD'; -COST: 'COST'; -EXTENDED: 'EXTENDED'; -OVERRIDE: 'OVERRIDE'; -OVERWRITE: 'OVERWRITE'; // SORT FIELD KEYWORDS -// TODO #3180: Fix broken sort functionality +// TODO #963: Implement 'num', 'str', and 'ip' sort syntax AUTO: 'AUTO'; STR: 'STR'; +IP: 'IP'; NUM: 'NUM'; -// TRENDLINE KEYWORDS +// FIELDSUMMARY keywords +FIELDSUMMARY: 'FIELDSUMMARY'; +INCLUDEFIELDS: 'INCLUDEFIELDS'; +NULLS: 'NULLS'; + +//TRENDLINE KEYWORDS SMA: 'SMA'; WMA: 'WMA'; +// APPENDCOL options +OVERRIDE: 'OVERRIDE'; + // ARGUMENT KEYWORDS KEEPEMPTY: 'KEEPEMPTY'; CONSECUTIVE: 'CONSECUTIVE'; @@ -108,7 +109,6 @@ DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES'; PARTITIONS: 'PARTITIONS'; ALLNUM: 'ALLNUM'; DELIM: 'DELIM'; -BUCKET_NULLABLE: 'BUCKET_NULLABLE'; CENTROIDS: 'CENTROIDS'; ITERATIONS: 'ITERATIONS'; DISTANCE_TYPE: 'DISTANCE_TYPE'; @@ -124,13 +124,6 @@ TIME_ZONE: 'TIME_ZONE'; TRAINING_DATA_SIZE: 'TRAINING_DATA_SIZE'; ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD'; APPEND: 'APPEND'; -COUNTFIELD: 'COUNTFIELD'; -SHOWCOUNT: 'SHOWCOUNT'; -LIMIT: 'LIMIT'; -USEOTHER: 'USEOTHER'; -INPUT: 'INPUT'; -OUTPUT: 'OUTPUT'; -PATH: 'PATH'; // COMPARISON FUNCTION KEYWORDS CASE: 'CASE'; @@ -138,9 +131,6 @@ ELSE: 'ELSE'; IN: 'IN'; EXISTS: 'EXISTS'; -// Geo IP eval function -GEOIP: 'GEOIP'; - // LOGICAL KEYWORDS NOT: 'NOT'; OR: 'OR'; @@ -149,7 +139,6 @@ XOR: 'XOR'; TRUE: 'TRUE'; FALSE: 'FALSE'; REGEXP: 'REGEXP'; -REGEX_MATCH: 'REGEX_MATCH'; // DATETIME, INTERVAL AND UNIT KEYWORDS CONVERT_TZ: 'CONVERT_TZ'; @@ -197,14 +186,12 @@ LONG: 'LONG'; FLOAT: 'FLOAT'; STRING: 'STRING'; BOOLEAN: 'BOOLEAN'; -IP: 'IP'; // SPECIAL CHARACTERS AND OPERATORS PIPE: '|'; COMMA: ','; DOT: '.'; EQUAL: '='; -DOUBLE_EQUAL: '=='; GREATER: '>'; LESS: '<'; NOT_GREATER: '<' '='; @@ -221,8 +208,6 @@ LT_PRTHS: '('; RT_PRTHS: ')'; LT_SQR_PRTHS: '['; RT_SQR_PRTHS: ']'; -LT_CURLY: '{'; -RT_CURLY: '}'; SINGLE_QUOTE: '\''; DOUBLE_QUOTE: '"'; BACKTICK: '`'; @@ -255,12 +240,11 @@ VAR_SAMP: 'VAR_SAMP'; VAR_POP: 'VAR_POP'; STDDEV_SAMP: 'STDDEV_SAMP'; STDDEV_POP: 'STDDEV_POP'; -PERC: 'PERC'; PERCENTILE: 'PERCENTILE'; PERCENTILE_APPROX: 'PERCENTILE_APPROX'; -EARLIEST: 'EARLIEST'; -LATEST: 'LATEST'; TAKE: 'TAKE'; +FIRST: 'FIRST'; +LAST: 'LAST'; LIST: 'LIST'; VALUES: 'VALUES'; PER_DAY: 'PER_DAY'; @@ -272,22 +256,7 @@ SPARKLINE: 'SPARKLINE'; C: 'C'; DC: 'DC'; -// SCALAR WINDOW FUNCTIONS -ROW_NUMBER: 'ROW_NUMBER'; -RANK: 'RANK'; -DENSE_RANK: 'DENSE_RANK'; -PERCENT_RANK: 'PERCENT_RANK'; -CUME_DIST: 'CUME_DIST'; -FIRST: 'FIRST'; -LAST: 'LAST'; -NTH: 'NTH'; -NTILE: 'NTILE'; - // BASIC FUNCTIONS -PLUS_FUCTION: 'ADD'; -MINUS_FUCTION: 'SUBTRACT'; -STAR_FUNCTION: 'MULTIPLY'; -DIVIDE_FUNCTION: 'DIVIDE'; ABS: 'ABS'; CBRT: 'CBRT'; CEIL: 'CEIL'; @@ -296,13 +265,12 @@ CONV: 'CONV'; CRC32: 'CRC32'; E: 'E'; EXP: 'EXP'; -EXPM1: 'EXPM1'; FLOOR: 'FLOOR'; LN: 'LN'; LOG: 'LOG'; -LOG_WITH_BASE: ([0-9]+ ('.' [0-9]+)?)? ('LOG' | 'log') [0-9]+ ('.' [0-9]+)?; +LOG10: 'LOG10'; +LOG2: 'LOG2'; MOD: 'MOD'; -MODULUS: 'MODULUS'; PI: 'PI'; POSITION: 'POSITION'; POW: 'POW'; @@ -310,10 +278,9 @@ POWER: 'POWER'; RAND: 'RAND'; ROUND: 'ROUND'; SIGN: 'SIGN'; +SIGNUM: 'SIGNUM'; SQRT: 'SQRT'; TRUNCATE: 'TRUNCATE'; -RINT: 'RINT'; -SIGNUM: 'SIGNUM'; // TRIGONOMETRIC FUNCTIONS ACOS: 'ACOS'; @@ -321,12 +288,10 @@ ASIN: 'ASIN'; ATAN: 'ATAN'; ATAN2: 'ATAN2'; COS: 'COS'; -COSH: 'COSH'; COT: 'COT'; DEGREES: 'DEGREES'; RADIANS: 'RADIANS'; SIN: 'SIN'; -SINH: 'SINH'; TAN: 'TAN'; // CRYPTOGRAPHIC FUNCTIONS @@ -341,6 +306,7 @@ CURDATE: 'CURDATE'; CURRENT_DATE: 'CURRENT_DATE'; CURRENT_TIME: 'CURRENT_TIME'; CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; +CURRENT_TIMEZONE: 'CURRENT_TIMEZONE'; CURTIME: 'CURTIME'; DATE: 'DATE'; DATEDIFF: 'DATEDIFF'; @@ -353,6 +319,7 @@ DAYOFWEEK: 'DAYOFWEEK'; DAYOFYEAR: 'DAYOFYEAR'; DAY_OF_MONTH: 'DAY_OF_MONTH'; DAY_OF_WEEK: 'DAY_OF_WEEK'; +DURATION: 'DURATION'; EXTRACT: 'EXTRACT'; FROM_DAYS: 'FROM_DAYS'; FROM_UNIXTIME: 'FROM_UNIXTIME'; @@ -361,6 +328,7 @@ LAST_DAY: 'LAST_DAY'; LOCALTIME: 'LOCALTIME'; LOCALTIMESTAMP: 'LOCALTIMESTAMP'; MAKEDATE: 'MAKEDATE'; +MAKE_DATE: 'MAKE_DATE'; MAKETIME: 'MAKETIME'; MONTHNAME: 'MONTHNAME'; NOW: 'NOW'; @@ -387,6 +355,11 @@ UTC_TIMESTAMP: 'UTC_TIMESTAMP'; WEEKDAY: 'WEEKDAY'; YEARWEEK: 'YEARWEEK'; +// RELATIVE TIME FUNCTIONS +RELATIVE_TIMESTAMP: 'RELATIVE_TIMESTAMP'; +EARLIEST: 'EARLIEST'; +LATEST: 'LATEST'; + // TEXT FUNCTIONS SUBSTR: 'SUBSTR'; SUBSTRING: 'SUBSTRING'; @@ -408,45 +381,67 @@ REPLACE: 'REPLACE'; REVERSE: 'REVERSE'; CAST: 'CAST'; -// BOOL FUNCTIONS -LIKE: 'LIKE'; -ISNULL: 'ISNULL'; -ISNOTNULL: 'ISNOTNULL'; -CIDRMATCH: 'CIDRMATCH'; -BETWEEN: 'BETWEEN'; -ISPRESENT: 'ISPRESENT'; -ISEMPTY: 'ISEMPTY'; -ISBLANK: 'ISBLANK'; +// JSON TEXT FUNCTIONS +JSON: 'JSON'; +JSON_OBJECT: 'JSON_OBJECT'; +JSON_ARRAY: 'JSON_ARRAY'; +JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH'; +TO_JSON_STRING: 'TO_JSON_STRING'; +JSON_EXTRACT: 'JSON_EXTRACT'; +JSON_DELETE : 'JSON_DELETE'; +JSON_KEYS: 'JSON_KEYS'; +JSON_VALID: 'JSON_VALID'; +JSON_APPEND: 'JSON_APPEND'; +JSON_EXTEND : 'JSON_EXTEND'; +JSON_SET: 'JSON_SET'; +//JSON_ARRAY_ALL_MATCH: 'JSON_ARRAY_ALL_MATCH'; +//JSON_ARRAY_ANY_MATCH: 'JSON_ARRAY_ANY_MATCH'; +//JSON_ARRAY_FILTER: 'JSON_ARRAY_FILTER'; +//JSON_ARRAY_MAP: 'JSON_ARRAY_MAP'; +//JSON_ARRAY_REDUCE: 'JSON_ARRAY_REDUCE'; // COLLECTION FUNCTIONS ARRAY: 'ARRAY'; ARRAY_LENGTH: 'ARRAY_LENGTH'; -MVJOIN: 'MVJOIN'; + +// LAMBDA FUNCTIONS +//EXISTS: 'EXISTS'; FORALL: 'FORALL'; FILTER: 'FILTER'; TRANSFORM: 'TRANSFORM'; REDUCE: 'REDUCE'; -// JSON FUNCTIONS -JSON_VALID: 'JSON_VALID'; -JSON: 'JSON'; -JSON_OBJECT: 'JSON_OBJECT'; -JSON_ARRAY: 'JSON_ARRAY'; -JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH'; -JSON_EXTRACT: 'JSON_EXTRACT'; -JSON_KEYS: 'JSON_KEYS'; -JSON_SET: 'JSON_SET'; -JSON_DELETE: 'JSON_DELETE'; -JSON_APPEND: 'JSON_APPEND'; -JSON_EXTEND: 'JSON_EXTEND'; +// BOOL FUNCTIONS +LIKE: 'LIKE'; +ISNULL: 'ISNULL'; +ISNOTNULL: 'ISNOTNULL'; +BETWEEN: 'BETWEEN'; +CIDRMATCH: 'CIDRMATCH'; +ISPRESENT: 'ISPRESENT'; +ISEMPTY: 'ISEMPTY'; +ISBLANK: 'ISBLANK'; // FLOWCONTROL FUNCTIONS IFNULL: 'IFNULL'; NULLIF: 'NULLIF'; IF: 'IF'; TYPEOF: 'TYPEOF'; + +//OTHER CONDITIONAL EXPRESSIONS COALESCE: 'COALESCE'; +//GEOLOCATION FUNCTIONS +GEOIP: 'GEOIP'; + +//GEOLOCATION PROPERTIES +COUNTRY_ISO_CODE: 'COUNTRY_ISO_CODE'; +COUNTRY_NAME: 'COUNTRY_NAME'; +CONTINENT_NAME: 'CONTINENT_NAME'; +REGION_ISO_CODE: 'REGION_ISO_CODE'; +REGION_NAME: 'REGION_NAME'; +CITY_NAME: 'CITY_NAME'; +LOCATION: 'LOCATION'; + // RELEVANCE FUNCTIONS AND PARAMETERS MATCH: 'MATCH'; MATCH_PHRASE: 'MATCH_PHRASE'; @@ -490,11 +485,6 @@ ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY'; // SPAN KEYWORDS SPAN: 'SPAN'; -BINS: 'BINS'; -MINSPAN: 'MINSPAN'; -START: 'START'; -END: 'END'; -ALIGNTIME: 'ALIGNTIME'; MS: 'MS'; S: 'S'; M: 'M'; @@ -503,26 +493,6 @@ W: 'W'; Q: 'Q'; Y: 'Y'; -// Extended timescale units -SEC: 'SEC'; -SECS: 'SECS'; -SECONDS: 'SECONDS'; -MINS: 'MINS'; -MINUTES: 'MINUTES'; -HR: 'HR'; -HRS: 'HRS'; -HOURS: 'HOURS'; -DAYS: 'DAYS'; -MON: 'MON'; -MONTHS: 'MONTHS'; -US: 'US'; -CS: 'CS'; -DS: 'DS'; - - -// PERCENTILE SHORTCUT FUNCTIONS -// Must precede ID to avoid conflicts with identifier matching -PERCENTILE_SHORTCUT: PERC(INTEGER_LITERAL | DECIMAL_LITERAL) | 'P'(INTEGER_LITERAL | DECIMAL_LITERAL); // LITERALS AND VALUES //STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING; @@ -530,10 +500,9 @@ ID: ID_LITERAL; CLUSTER: CLUSTER_PREFIX_LITERAL; INTEGER_LITERAL: DEC_DIGIT+; DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+; -FLOAT_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ 'F'; -DOUBLE_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+ 'D'; fragment DATE_SUFFIX: ([\-.][*0-9]+)+; +fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*; fragment CLUSTER_PREFIX_LITERAL: [*A-Z]+?[*A-Z_\-0-9]* COLON; ID_DATE_SUFFIX: CLUSTER_PREFIX_LITERAL? ID_LITERAL DATE_SUFFIX; DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"'; @@ -541,10 +510,6 @@ SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'' BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`'; fragment DEC_DIGIT: [0-9]; -// Identifiers cannot start with a single '_' since this an OpenSearch reserved -// metadata field. Two underscores (or more) is acceptable, such as '__field'. -fragment ID_LITERAL: ([@*A-Z_])+?[*A-Z_\-0-9]*; - LINE_COMMENT: '//' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN); BLOCK_COMMENT: '/*' .*? '*/' -> channel(HIDDEN); diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 index d5cb4e3452b..cae57b53181 100644 --- a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 +++ b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 @@ -3,233 +3,168 @@ * SPDX-License-Identifier: Apache-2.0 */ - parser grammar OpenSearchPPLParser; options { tokenVocab = OpenSearchPPLLexer; } - root : pplStatement? EOF ; // statement pplStatement - : explainStatement - | queryStatement + : dmlStatement + ; + +dmlStatement + : (explainCommand PIPE)? queryStatement ; queryStatement : pplCommands (PIPE commands)* ; -explainStatement - : EXPLAIN (explainMode)? queryStatement - ; - -explainMode - : SIMPLE - | STANDARD - | COST - | EXTENDED - ; - subSearch : searchCommand (PIPE commands)* ; // commands pplCommands - : describeCommand - | showDataSourcesCommand - | searchCommand + : searchCommand + | describeCommand ; commands : whereCommand - | fieldsCommand - | tableCommand + | correlateCommand | joinCommand - | renameCommand + | fieldsCommand | statsCommand - | eventstatsCommand | dedupCommand | sortCommand - | evalCommand | headCommand - | binCommand | topCommand | rareCommand + | evalCommand | grokCommand | parseCommand - | spathCommand | patternsCommand | lookupCommand - | kmeansCommand - | adCommand - | mlCommand + | renameCommand | fillnullCommand + | fieldsummaryCommand + | flattenCommand + | expandCommand | trendlineCommand | appendcolCommand - | appendCommand - | expandCommand - | flattenCommand - | reverseCommand - | regexCommand - | timechartCommand - | rexCommand ; commandName : SEARCH | DESCRIBE | SHOW + | AD + | ML + | KMEANS | WHERE - | FIELDS - | TABLE + | CORRELATE | JOIN - | RENAME + | FIELDS | STATS | EVENTSTATS | DEDUP + | EXPLAIN | SORT - | EVAL | HEAD - | BIN | TOP + | TOP_APPROX | RARE + | RARE_APPROX + | EVAL | GROK | PARSE | PATTERNS | LOOKUP - | KMEANS - | AD - | ML - | FILLNULL + | RENAME | EXPAND + | FILLNULL + | FIELDSUMMARY | FLATTEN | TRENDLINE - | TIMECHART - | EXPLAIN - | REVERSE - | REGEX - | APPEND - | REX + | APPENDCOL ; searchCommand - : (SEARCH)? (searchExpression)* fromClause (searchExpression)* # searchFrom - ; - -searchExpression - : LT_PRTHS searchExpression RT_PRTHS # groupedExpression - | NOT searchExpression # notExpression - | searchExpression OR searchExpression # orExpression - | searchExpression AND searchExpression # andExpression - | searchTerm # termExpression - ; - -searchTerm - : searchFieldComparison # searchComparisonTerm - | searchFieldInList # searchInListTerm - | searchLiteral # searchLiteralTerm - ; - -// Unified search literal for both free text and field comparisons -searchLiteral - : numericLiteral - | booleanLiteral - | ID - | stringLiteral - | searchableKeyWord + : (SEARCH)? fromClause # searchFrom + | (SEARCH)? fromClause logicalExpression # searchFromFilter + | (SEARCH)? logicalExpression fromClause # searchFilterFrom ; -searchFieldComparison - : fieldExpression searchComparisonOperator searchLiteral # searchFieldCompare - ; - -searchFieldInList - : fieldExpression IN LT_PRTHS searchLiteralList RT_PRTHS # searchFieldInValues - ; - -searchLiteralList - : searchLiteral (COMMA searchLiteral)* # searchLiterals - ; - -searchComparisonOperator - : EQUAL # equals - | NOT_EQUAL # notEquals - | LESS # lessThan - | NOT_GREATER # lessOrEqual - | GREATER # greaterThan - | NOT_LESS # greaterOrEqual - ; - - -describeCommand - : DESCRIBE tableSourceClause +fieldsummaryCommand + : FIELDSUMMARY (fieldsummaryParameter)* ; -showDataSourcesCommand - : SHOW DATASOURCES +fieldsummaryParameter + : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields + | NULLS EQUAL booleanLiteral # fieldsummaryNulls ; -whereCommand - : WHERE logicalExpression - ; - -fieldsCommand - : FIELDS fieldsCommandBody +describeCommand + : DESCRIBE tableSourceClause ; -// Table command - alias for fields command -tableCommand - : TABLE fieldsCommandBody - ; +explainCommand + : EXPLAIN explainMode + ; -fieldsCommandBody - : (PLUS | MINUS)? wcFieldList - ; +explainMode + : FORMATTED + | COST + | CODEGEN + | EXTENDED + | SIMPLE + ; -// Wildcard field list supporting both comma-separated and space-separated fields -wcFieldList - : selectFieldExpression (COMMA? selectFieldExpression)* - ; +showDataSourcesCommand + : SHOW DATASOURCES + ; -renameCommand - : RENAME renameClasue (COMMA? renameClasue)* - ; +whereCommand + : WHERE logicalExpression + ; -statsCommand - : STATS statsArgs statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (dedupSplitArg)? - ; +correlateCommand + : CORRELATE correlationType FIELDS LT_PRTHS fieldList RT_PRTHS (scopeClause)? mappingList + ; -statsArgs - : (partitionsArg | allnumArg | delimArg | bucketNullableArg)* - ; +correlationType + : SELF + | EXACT + | APPROXIMATE + ; -partitionsArg - : PARTITIONS EQUAL partitions = integerLiteral - ; +scopeClause + : SCOPE LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS + ; -allnumArg - : ALLNUM EQUAL allnum = booleanLiteral - ; +mappingList + : MAPPING LT_PRTHS ( mappingClause (COMMA mappingClause)* ) RT_PRTHS + ; -delimArg - : DELIM EQUAL delim = stringLiteral - ; +mappingClause + : left = qualifiedName comparisonOperator right = qualifiedName # mappingCompareExpr + ; -bucketNullableArg - : BUCKET_NULLABLE EQUAL bucket_nullable = booleanLiteral +fieldsCommand + : FIELDS (PLUS | MINUS)? fieldList ; -dedupSplitArg - : DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral +renameCommand + : RENAME renameClasue (COMMA renameClasue)* ; -eventstatsCommand - : EVENTSTATS eventstatsAggTerm (COMMA eventstatsAggTerm)* (statsByClause)? +statsCommand + : (STATS | EVENTSTATS) (PARTITIONS EQUAL partitions = integerLiteral)? (ALLNUM EQUAL allnum = booleanLiteral)? (DELIM EQUAL delim = stringLiteral)? statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral)? ; dedupCommand @@ -237,30 +172,7 @@ dedupCommand ; sortCommand - : SORT (count = integerLiteral)? sortbyClause (ASC | A | DESC | D)? - ; - -reverseCommand - : REVERSE - ; - -timechartCommand - : TIMECHART timechartParameter* statsFunction (BY fieldExpression)? - ; - -timechartParameter - : (spanClause | SPAN EQUAL spanLiteral) - | timechartArg - ; - -timechartArg - : LIMIT EQUAL integerLiteral - | USEOTHER EQUAL (booleanLiteral | ident) - ; - -spanLiteral - : integerLiteral timespanUnit - | stringLiteral + : SORT sortbyClause ; evalCommand @@ -271,42 +183,12 @@ headCommand : HEAD (number = integerLiteral)? (FROM from = integerLiteral)? ; -binCommand - : BIN fieldExpression binOption* (AS alias = qualifiedName)? - ; - -binOption - : SPAN EQUAL span = spanValue - | BINS EQUAL bins = integerLiteral - | MINSPAN EQUAL minspan = literalValue (minspanUnit = timespanUnit)? - | ALIGNTIME EQUAL aligntime = aligntimeValue - | START EQUAL start = numericLiteral - | END EQUAL end = numericLiteral - ; - -aligntimeValue - : EARLIEST - | LATEST - | literalValue - ; - -spanValue - : literalValue (timespanUnit)? # numericSpanValue - | logSpanValue # logBasedSpanValue - | ident timespanUnit # extendedTimeSpanValue - | ident # identifierSpanValue - ; - -logSpanValue - : LOG_WITH_BASE # logWithBaseSpan - ; - topCommand - : TOP (number = integerLiteral)? (COUNTFIELD EQUAL countfield = stringLiteral)? (SHOWCOUNT EQUAL showcount = booleanLiteral)? fieldList (byClause)? + : (TOP | TOP_APPROX) (number = integerLiteral)? fieldList (byClause)? ; rareCommand - : RARE (number = integerLiteral)? (COUNTFIELD EQUAL countfield = stringLiteral)? (SHOWCOUNT EQUAL showcount = booleanLiteral)? fieldList (byClause)? + : (RARE | RARE_APPROX) (number = integerLiteral)? fieldList (byClause)? ; grokCommand @@ -317,73 +199,20 @@ parseCommand : PARSE (source_field = expression) (pattern = stringLiteral) ; -spathCommand - : SPATH spathParameter* - ; - -spathParameter - : (INPUT EQUAL input = expression) - | (OUTPUT EQUAL output = expression) - | ((PATH EQUAL)? path = indexablePath) - ; - -indexablePath - : pathElement (DOT pathElement)* - ; - -pathElement - : ident pathArrayAccess? +patternsCommand + : PATTERNS (patternsParameter)* (source_field = expression) ; -pathArrayAccess - : LT_CURLY (INTEGER_LITERAL)? RT_CURLY +patternsParameter + : (NEW_FIELD EQUAL new_field = stringLiteral) + | (PATTERN EQUAL pattern = stringLiteral) ; -regexCommand - : REGEX regexExpr - ; - -regexExpr - : field=qualifiedName operator=(EQUAL | NOT_EQUAL) pattern=stringLiteral - ; - -rexCommand - : REX rexExpr - ; -rexExpr - : FIELD EQUAL field=qualifiedName (rexOption)* pattern=stringLiteral (rexOption)* - ; - -rexOption - : MAX_MATCH EQUAL maxMatch=integerLiteral - | MODE EQUAL (EXTRACT | SED) - | OFFSET_FIELD EQUAL offsetField=qualifiedName - ; patternsMethod : PUNCT | REGEX ; -patternsCommand - : PATTERNS (source_field = expression) (statsByClause)? (METHOD EQUAL method = patternMethod)? (MODE EQUAL pattern_mode = patternMode)? (MAX_SAMPLE_COUNT EQUAL max_sample_count = integerLiteral)? (BUFFER_LIMIT EQUAL buffer_limit = integerLiteral)? (NEW_FIELD EQUAL new_field = stringLiteral)? (patternsParameter)* - ; - -patternsParameter - : (PATTERN EQUAL pattern = stringLiteral) - | (VARIABLE_COUNT_THRESHOLD EQUAL variable_count_threshold = integerLiteral) - | (FREQUENCY_THRESHOLD_PERCENTAGE EQUAL frequency_threshold_percentage = decimalLiteral) - ; - -patternMethod - : SIMPLE_PATTERN - | BRAIN - ; - -patternMode - : LABEL - | AGGREGATION - ; - // lookup lookupCommand : LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)? @@ -406,28 +235,36 @@ lookupPair ; fillnullCommand - : FILLNULL fillNullWith - | FILLNULL fillNullUsing + : FILLNULL (fillNullWithTheSameValue + | fillNullWithFieldVariousValues) ; -fillNullWith - : WITH replacement = valueExpression (IN fieldList)? +fillNullWithTheSameValue + : WITH nullReplacement = valueExpression IN nullableFieldList = fieldList ; -fillNullUsing - : USING replacementPair (COMMA replacementPair)* +fillNullWithFieldVariousValues + : USING nullableReplacementExpression (COMMA nullableReplacementExpression)* ; -replacementPair - : fieldExpression EQUAL replacement = valueExpression +nullableReplacementExpression + : nullableField = fieldExpression EQUAL nullableReplacement = valueExpression ; +expandCommand + : EXPAND fieldExpression (AS alias = qualifiedName)? + ; + +flattenCommand + : FLATTEN fieldExpression (AS alias = identifierSeq)? + ; + trendlineCommand : TRENDLINE (SORT sortField)? trendlineClause (trendlineClause)* ; trendlineClause - : trendlineType LT_PRTHS numberOfDataPoints = integerLiteral COMMA field = fieldExpression RT_PRTHS (AS alias = qualifiedName)? + : trendlineType LT_PRTHS numberOfDataPoints = INTEGER_LITERAL COMMA field = fieldExpression RT_PRTHS (AS alias = qualifiedName)? ; trendlineType @@ -435,22 +272,10 @@ trendlineType | WMA ; -expandCommand - : EXPAND fieldExpression (AS alias = qualifiedName)? - ; - -flattenCommand - : FLATTEN fieldExpression (AS aliases = identifierSeq)? - ; - appendcolCommand : APPENDCOL (OVERRIDE EQUAL override = booleanLiteral)? LT_SQR_PRTHS commands (PIPE commands)* RT_SQR_PRTHS ; -appendCommand - : APPEND LT_SQR_PRTHS searchCommand? (PIPE commands)* RT_SQR_PRTHS - ; - kmeansCommand : KMEANS (kmeansParameter)* ; @@ -492,10 +317,6 @@ mlArg fromClause : SOURCE EQUAL tableOrSubqueryClause | INDEX EQUAL tableOrSubqueryClause - | SOURCE EQUAL tableFunction - | INDEX EQUAL tableFunction - | SOURCE EQUAL dynamicSourceClause - | INDEX EQUAL dynamicSourceClause ; tableOrSubqueryClause @@ -503,64 +324,36 @@ tableOrSubqueryClause | tableSourceClause ; +// One tableSourceClause will generate one Relation node with/without one alias +// even if the relation contains more than one table sources. +// These table sources in one relation will be readed one by one in OpenSearch. +// But it may have different behaivours in different execution backends. +// For example, a Spark UnresovledRelation node only accepts one data source. tableSourceClause : tableSource (COMMA tableSource)* (AS alias = qualifiedName)? ; -dynamicSourceClause - : LT_SQR_PRTHS sourceReferences (COMMA sourceFilterArgs)? RT_SQR_PRTHS - ; - -sourceReferences - : sourceReference (COMMA sourceReference)* - ; - -sourceReference - : (CLUSTER)? wcQualifiedName - ; - -sourceFilterArgs - : sourceFilterArg (COMMA sourceFilterArg)* - ; - -sourceFilterArg - : ident EQUAL literalValue - | ident IN valueList - ; - // join joinCommand - : JOIN (joinOption)* (fieldList)? right = tableOrSubqueryClause - | sqlLikeJoinType? JOIN (joinOption)* sideAlias joinHintList? joinCriteria right = tableOrSubqueryClause + : (joinType) JOIN sideAlias joinHintList? joinCriteria? right = tableOrSubqueryClause ; -sqlLikeJoinType - : INNER +joinType + : INNER? | CROSS - | (LEFT OUTER? | OUTER) + | LEFT OUTER? | RIGHT OUTER? | FULL OUTER? | LEFT? SEMI | LEFT? ANTI ; -joinType - : INNER - | CROSS - | OUTER - | LEFT - | RIGHT - | FULL - | SEMI - | ANTI - ; - sideAlias : (LEFT EQUAL leftAlias = qualifiedName)? COMMA? (RIGHT EQUAL rightAlias = qualifiedName)? ; joinCriteria - : (ON | WHERE) logicalExpression + : ON logicalExpression ; joinHintList @@ -572,14 +365,8 @@ hintPair | rightHintKey = RIGHT_HINT DOT ID EQUAL rightHintValue = ident #rightHint ; -joinOption - : OVERWRITE EQUAL booleanLiteral # overwriteOption - | TYPE EQUAL joinType # typeOption - | MAX EQUAL integerLiteral # maxOption - ; - renameClasue - : orignalField = renameFieldExpression AS renamedField = renameFieldExpression + : orignalField = wcFieldExpression AS renamedField = wcFieldExpression ; byClause @@ -590,7 +377,6 @@ statsByClause : BY fieldList | BY bySpanClause | BY bySpanClause COMMA fieldList - | BY fieldList COMMA bySpanClause ; bySpanClause @@ -606,34 +392,12 @@ sortbyClause ; evalClause - : fieldExpression EQUAL logicalExpression + : fieldExpression EQUAL expression + | geoipCommand ; -eventstatsAggTerm - : windowFunction (AS alias = wcFieldExpression)? - ; - -windowFunction - : windowFunctionName LT_PRTHS functionArgs RT_PRTHS - ; - -windowFunctionName - : statsFunctionName - | scalarWindowFunctionName - ; - -scalarWindowFunctionName - : ROW_NUMBER - | RANK - | DENSE_RANK - | PERCENT_RANK - | CUME_DIST - | FIRST - | LAST - | NTH - | NTILE - | DISTINCT_COUNT - | DC +geoipCommand + : fieldExpression EQUAL GEOIP LT_PRTHS ipAddress = functionArg (COMMA properties = geoIpPropertyList)? RT_PRTHS ; // aggregation terms @@ -643,13 +407,10 @@ statsAggTerm // aggregation functions statsFunction - : (COUNT | C) LT_PRTHS evalExpression RT_PRTHS # countEvalFunctionCall - | (COUNT | C) (LT_PRTHS RT_PRTHS)? # countAllFunctionCall - | PERCENTILE_SHORTCUT LT_PRTHS valueExpression RT_PRTHS # percentileShortcutFunctionCall - | (DISTINCT_COUNT | DC | DISTINCT_COUNT_APPROX) LT_PRTHS valueExpression RT_PRTHS # distinctCountFunctionCall - | takeAggFunction # takeAggFunctionCall - | percentileApproxFunction # percentileApproxFunctionCall - | statsFunctionName LT_PRTHS functionArgs RT_PRTHS # statsFunctionCall + : statsFunctionName LT_PRTHS valueExpression RT_PRTHS # statsFunctionCall + | COUNT LT_PRTHS RT_PRTHS # countAllFunctionCall + | (DISTINCT_COUNT | DC | DISTINCT_COUNT_APPROX) LT_PRTHS valueExpression RT_PRTHS # distinctCountFunctionCall + | percentileFunctionName = (PERCENTILE | PERCENTILE_APPROX) LT_PRTHS valueExpression COMMA percent = integerLiteral RT_PRTHS # percentileFunctionCall ; statsFunctionName @@ -658,89 +419,72 @@ statsFunctionName | SUM | MIN | MAX - | VAR_SAMP - | VAR_POP | STDDEV_SAMP | STDDEV_POP - | PERCENTILE - | PERCENTILE_APPROX - | MEDIAN - | LIST - | FIRST - | EARLIEST - | LATEST - | LAST ; -takeAggFunction - : TAKE LT_PRTHS fieldExpression (COMMA size = integerLiteral)? RT_PRTHS - ; - -percentileApproxFunction - : (PERCENTILE | PERCENTILE_APPROX) LT_PRTHS aggField = valueExpression - COMMA percent = numericLiteral (COMMA compression = numericLiteral)? RT_PRTHS +// expressions +expression + : logicalExpression + | valueExpression ; -numericLiteral - : integerLiteral - | decimalLiteral - | doubleLiteral - | floatLiteral - ; - -// predicates logicalExpression : NOT logicalExpression # logicalNot - | left = logicalExpression AND right = logicalExpression # logicalAnd - | left = logicalExpression XOR right = logicalExpression # logicalXor + | LT_PRTHS logicalExpression RT_PRTHS # parentheticLogicalExpr + | comparisonExpression # comparsion + | left = logicalExpression (AND)? right = logicalExpression # logicalAnd | left = logicalExpression OR right = logicalExpression # logicalOr - | expression # logicalExpr + | left = logicalExpression XOR right = logicalExpression # logicalXor + | booleanExpression # booleanExpr ; -expression - : valueExpression # valueExpr - | relevanceExpression # relevanceExpr - | left = expression comparisonOperator right = expression # compareExpr - | expression NOT? IN valueList # inExpr - | expression NOT? BETWEEN expression AND expression # between +comparisonExpression + : left = valueExpression comparisonOperator right = valueExpression # compareExpr + | valueExpression NOT? IN valueList # inExpr + | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between ; -valueExpression - : left = valueExpression binaryOperator = (STAR | DIVIDE | MODULE) right = valueExpression # binaryArithmetic - | left = valueExpression binaryOperator = (PLUS | MINUS) right = valueExpression # binaryArithmetic - | literalValue # literalValueExpr - | functionCall # functionCallExpr - | lambda # lambdaExpr - | LT_SQR_PRTHS subSearch RT_SQR_PRTHS # scalarSubqueryExpr - | valueExpression NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS # inSubqueryExpr - | LT_PRTHS valueExpression (COMMA valueExpression)* RT_PRTHS NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS # inSubqueryExpr - | EXISTS LT_SQR_PRTHS subSearch RT_SQR_PRTHS # existsSubqueryExpr - | fieldExpression # fieldExpr - | LT_PRTHS logicalExpression RT_PRTHS # nestedValueExpr - ; - -evalExpression - : EVAL LT_PRTHS logicalExpression RT_PRTHS - ; +valueExpressionList + : valueExpression + | LT_PRTHS valueExpression (COMMA valueExpression)* RT_PRTHS + ; -functionCall +valueExpression + : left = valueExpression binaryOperator = (STAR | DIVIDE | MODULE) right = valueExpression # binaryArithmetic + | left = valueExpression binaryOperator = (PLUS | MINUS) right = valueExpression # binaryArithmetic + | primaryExpression # valueExpressionDefault + | positionFunction # positionFunctionCall + | caseFunction # caseExpr + | timestampFunction # timestampFunctionCall + | LT_PRTHS valueExpression RT_PRTHS # parentheticValueExpr + | LT_SQR_PRTHS subSearch RT_SQR_PRTHS # scalarSubqueryExpr + | ident ARROW expression # lambda + | LT_PRTHS ident (COMMA ident)+ RT_PRTHS ARROW expression # lambda + ; + +primaryExpression : evalFunctionCall + | fieldExpression + | literalValue | dataTypeFunctionCall - | positionFunctionCall - | caseFunctionCall - | timestampFunctionCall - | extractFunctionCall - | getFormatFunctionCall ; -positionFunctionCall +positionFunction : positionFunctionName LT_PRTHS functionArg IN functionArg RT_PRTHS ; -caseFunctionCall - : CASE LT_PRTHS logicalExpression COMMA valueExpression (COMMA logicalExpression COMMA valueExpression)* (ELSE valueExpression)? RT_PRTHS +booleanExpression + : booleanFunctionCall # booleanFunctionCallExpr + | valueExpressionList NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS # inSubqueryExpr + | EXISTS LT_SQR_PRTHS subSearch RT_SQR_PRTHS # existsSubqueryExpr + | cidrMatchFunctionCall # cidrFunctionCallExpr ; + caseFunction + : CASE LT_PRTHS logicalExpression COMMA valueExpression (COMMA logicalExpression COMMA valueExpression)* (ELSE valueExpression)? RT_PRTHS + ; + relevanceExpression : singleFieldRelevanceFunction | multiFieldRelevanceFunction @@ -753,7 +497,7 @@ singleFieldRelevanceFunction // Field is a list of columns multiFieldRelevanceFunction - : multiFieldRelevanceFunctionName LT_PRTHS (LT_SQR_PRTHS field = relevanceFieldAndWeight (COMMA field = relevanceFieldAndWeight)* RT_SQR_PRTHS COMMA)? query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS + : multiFieldRelevanceFunctionName LT_PRTHS LT_SQR_PRTHS field = relevanceFieldAndWeight (COMMA field = relevanceFieldAndWeight)* RT_SQR_PRTHS COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS ; // tables @@ -763,12 +507,16 @@ tableSource ; tableFunction - : qualifiedName LT_PRTHS namedFunctionArgs RT_PRTHS + : qualifiedName LT_PRTHS functionArgs RT_PRTHS ; // fields fieldList - : fieldExpression ((COMMA)? fieldExpression)* + : fieldExpression (COMMA fieldExpression)* + ; + +wcFieldList + : wcFieldExpression (COMMA wcFieldExpression)* ; sortField @@ -777,6 +525,8 @@ sortField sortFieldExpression : fieldExpression + + // TODO #963: Implement 'num', 'str', and 'ip' sort syntax | AUTO LT_PRTHS fieldExpression RT_PRTHS | STR LT_PRTHS fieldExpression RT_PRTHS | IP LT_PRTHS fieldExpression RT_PRTHS @@ -791,16 +541,6 @@ wcFieldExpression : wcQualifiedName ; -selectFieldExpression - : wcQualifiedName - | STAR - ; - -renameFieldExpression - : wcQualifiedName - | STAR - ; - // functions evalFunctionCall : evalFunctionName LT_PRTHS functionArgs RT_PRTHS @@ -808,7 +548,16 @@ evalFunctionCall // cast function dataTypeFunctionCall - : CAST LT_PRTHS logicalExpression AS convertedDataType RT_PRTHS + : CAST LT_PRTHS expression AS convertedDataType RT_PRTHS + ; + +// boolean functions +booleanFunctionCall + : conditionFunctionBase LT_PRTHS functionArgs RT_PRTHS + ; + +cidrMatchFunctionCall + : CIDRMATCH LT_PRTHS ipAddress = functionArg COMMA cidrBlock = functionArg RT_PRTHS ; convertedDataType @@ -822,48 +571,28 @@ convertedDataType | typeName = FLOAT | typeName = STRING | typeName = BOOLEAN - | typeName = IP - | typeName = JSON ; evalFunctionName : mathematicalFunctionName | dateTimeFunctionName | textFunctionName - | conditionFunctionName - | flowControlFunctionName + | conditionFunctionBase | systemFunctionName | positionFunctionName + | coalesceFunctionName | cryptographicFunctionName | jsonFunctionName - | geoipFunctionName | collectionFunctionName + | lambdaFunctionName ; functionArgs : (functionArg (COMMA functionArg)*)? ; -namedFunctionArgs - : (namedFunctionArg (COMMA namedFunctionArg)*)? - ; - functionArg - : functionArgExpression - ; - -namedFunctionArg - : (ident EQUAL)? functionArgExpression - ; - -functionArgExpression - : lambda - | logicalExpression - ; - -lambda - : ident ARROW logicalExpression - | LT_PRTHS ident (COMMA ident)+ RT_PRTHS ARROW logicalExpression + : (ident EQUAL)? valueExpression ; relevanceArg @@ -915,8 +644,6 @@ relevanceFieldAndWeight relevanceFieldWeight : integerLiteral | decimalLiteral - | doubleLiteral - | floatLiteral ; relevanceField @@ -935,10 +662,6 @@ relevanceArgValue mathematicalFunctionName : ABS - | PLUS_FUCTION - | MINUS_FUCTION - | STAR_FUNCTION - | DIVIDE_FUNCTION | CBRT | CEIL | CEILING @@ -946,72 +669,37 @@ mathematicalFunctionName | CRC32 | E | EXP - | EXPM1 | FLOOR | LN | LOG - | LOG_WITH_BASE + | LOG10 + | LOG2 | MOD - | MODULUS | PI | POW | POWER | RAND | ROUND | SIGN + | SIGNUM | SQRT | TRUNCATE - | RINT - | SIGNUM - | SUM - | AVG | trigonometricFunctionName ; -geoipFunctionName - : GEOIP - ; - -collectionFunctionName - : ARRAY - | ARRAY_LENGTH - | MVJOIN - | FORALL - | EXISTS - | FILTER - | TRANSFORM - | REDUCE - ; - - trigonometricFunctionName : ACOS | ASIN | ATAN | ATAN2 | COS - | COSH | COT | DEGREES | RADIANS | SIN - | SINH | TAN ; -jsonFunctionName - : JSON - | JSON_OBJECT - | JSON_ARRAY - | JSON_ARRAY_LENGTH - | JSON_EXTRACT - | JSON_KEYS - | JSON_SET - | JSON_DELETE - | JSON_APPEND - | JSON_EXTEND - ; - cryptographicFunctionName : MD5 | SHA1 @@ -1026,6 +714,7 @@ dateTimeFunctionName | CURRENT_DATE | CURRENT_TIME | CURRENT_TIMESTAMP + | CURRENT_TIMEZONE | CURTIME | DATE | DATEDIFF @@ -1049,6 +738,7 @@ dateTimeFunctionName | LOCALTIME | LOCALTIMESTAMP | MAKEDATE + | MAKE_DATE | MAKETIME | MICROSECOND | MINUTE @@ -1084,9 +774,16 @@ dateTimeFunctionName | WEEK_OF_YEAR | YEAR | YEARWEEK + | relativeTimeFunctionName ; -getFormatFunctionCall +relativeTimeFunctionName + : RELATIVE_TIMESTAMP + | EARLIEST + | LATEST + ; + +getFormatFunction : GET_FORMAT LT_PRTHS getFormatType COMMA functionArg RT_PRTHS ; @@ -1097,7 +794,7 @@ getFormatType | TIMESTAMP ; -extractFunctionCall +extractFunction : EXTRACT LT_PRTHS datetimePart FROM functionArg RT_PRTHS ; @@ -1132,7 +829,7 @@ datetimePart | complexDateTimePart ; -timestampFunctionCall +timestampFunction : timestampFunctionName LT_PRTHS simpleDateTimePart COMMA firstArg = functionArg COMMA secondArg = functionArg RT_PRTHS ; @@ -1142,26 +839,19 @@ timestampFunctionName ; // condition function return boolean value -conditionFunctionName +conditionFunctionBase : LIKE + | IF | ISNULL | ISNOTNULL - | CIDRMATCH - | REGEX_MATCH - | JSON_VALID + | IFNULL + | NULLIF | ISPRESENT - | ISEMPTY - | ISBLANK + | JSON_VALID | EARLIEST | LATEST - ; - -// flow control function return non-boolean value -flowControlFunctionName - : IF - | IFNULL - | NULLIF - | COALESCE + | ISEMPTY + | ISBLANK ; systemFunctionName @@ -1186,23 +876,75 @@ textFunctionName | LOCATE | REPLACE | REVERSE + | ISEMPTY + | ISBLANK + ; + +jsonFunctionName + : JSON + | JSON_OBJECT + | JSON_ARRAY + | JSON_ARRAY_LENGTH + | TO_JSON_STRING + | JSON_EXTRACT + | JSON_DELETE + | JSON_APPEND + | JSON_KEYS + | JSON_VALID + | JSON_EXTEND + | JSON_SET +// | JSON_ARRAY_ALL_MATCH +// | JSON_ARRAY_ANY_MATCH +// | JSON_ARRAY_FILTER +// | JSON_ARRAY_MAP +// | JSON_ARRAY_REDUCE ; +collectionFunctionName + : ARRAY + | ARRAY_LENGTH + ; + +lambdaFunctionName + : FORALL + | EXISTS + | FILTER + | TRANSFORM + | REDUCE + ; + positionFunctionName : POSITION ; +coalesceFunctionName + : COALESCE + ; + +geoIpPropertyList + : geoIpProperty (COMMA geoIpProperty)* + ; + +geoIpProperty + : COUNTRY_ISO_CODE + | COUNTRY_NAME + | CONTINENT_NAME + | REGION_ISO_CODE + | REGION_NAME + | CITY_NAME + | TIME_ZONE + | LOCATION + ; + // operators comparisonOperator : EQUAL - | DOUBLE_EQUAL | NOT_EQUAL | LESS | NOT_LESS | GREATER | NOT_GREATER | REGEXP - | LIKE ; singleFieldRelevanceFunctionName @@ -1220,14 +962,12 @@ multiFieldRelevanceFunctionName // literals and values literalValue - : intervalLiteral - | stringLiteral + : stringLiteral | integerLiteral | decimalLiteral - | doubleLiteral - | floatLiteral | booleanLiteral | datetimeLiteral //#datetime + | intervalLiteral ; intervalLiteral @@ -1247,14 +987,6 @@ decimalLiteral : (PLUS | MINUS)? DECIMAL_LITERAL ; -doubleLiteral - : (PLUS | MINUS)? DOUBLE_LITERAL - ; - -floatLiteral - : (PLUS | MINUS)? FLOAT_LITERAL - ; - booleanLiteral : TRUE | FALSE @@ -1320,20 +1052,6 @@ timespanUnit | MONTH | QUARTER | YEAR - | SEC - | SECS - | SECONDS - | MINS - | MINUTES - | HR - | HRS - | HOURS - | DAYS - | MON - | MONTHS - | US - | CS - | DS ; valueList @@ -1344,6 +1062,11 @@ qualifiedName : ident (DOT ident)* # identsAsQualifiedName ; +identifierSeq + : qualifiedName (COMMA qualifiedName)* # identsAsQualifiedNameSeq + | LT_PRTHS qualifiedName (COMMA qualifiedName)* RT_PRTHS # identsAsQualifiedNameSeq + ; + tableQualifiedName : tableIdent (DOT ident)* # identsAsTableQualifiedName ; @@ -1352,11 +1075,6 @@ wcQualifiedName : wildcard (DOT wildcard)* # identsAsWildcardQualifiedName ; -identifierSeq - : qualifiedName (COMMA qualifiedName)* # identsAsQualifiedNameSeq - | LT_PRTHS qualifiedName (COMMA qualifiedName)* RT_PRTHS # identsAsQualifiedNameSeq - ; - ident : (DOT)? ID | BACKTICK ident BACKTICK @@ -1376,49 +1094,40 @@ wildcard ; keywordsCanBeId - : searchableKeyWord - | IN - ; - -searchableKeyWord : D // OD SQL and ODBC special | timespanUnit | SPAN | evalFunctionName - | jsonFunctionName | relevanceArgName | intervalUnit - | trendlineType + | dateTimeFunctionName + | textFunctionName + | jsonFunctionName + | mathematicalFunctionName + | positionFunctionName + | cryptographicFunctionName | singleFieldRelevanceFunctionName | multiFieldRelevanceFunctionName | commandName - | collectionFunctionName - | REGEX + | comparisonOperator | explainMode - | REGEXP + | correlationType + | geoIpProperty // commands assist keywords - | CASE - | ELSE + | GEOIP + | OVERRIDE | ARROW - | BETWEEN - | EXISTS + | IN | SOURCE | INDEX - | A - | ASC | DESC | DATASOURCES | FROM | PATTERN | NEW_FIELD - | METHOD - | VARIABLE_COUNT_THRESHOLD - | FREQUENCY_THRESHOLD_PERCENTAGE - | MAX_SAMPLE_COUNT - | BUFFER_LIMIT + | SCOPE + | MAPPING | WITH - | REGEX - | PUNCT | USING | CAST | GET_FORMAT @@ -1426,12 +1135,8 @@ searchableKeyWord | INTERVAL | PLUS | MINUS - | OVERRIDE - // SORT FIELD KEYWORDS - | AUTO - | STR - | IP - | NUM + | INCLUDEFIELDS + | NULLS // ARGUMENT KEYWORDS | KEEPEMPTY | CONSECUTIVE @@ -1439,7 +1144,6 @@ searchableKeyWord | PARTITIONS | ALLNUM | DELIM - | BUCKET_NULLABLE | CENTROIDS | ITERATIONS | DISTANCE_TYPE @@ -1454,17 +1158,12 @@ searchableKeyWord | TIME_ZONE | TRAINING_DATA_SIZE | ANOMALY_SCORE_THRESHOLD - | COUNTFIELD - | SHOWCOUNT - | PATH - | INPUT - | OUTPUT - - // AGGREGATIONS AND WINDOW + // AGGREGATIONS | statsFunctionName - | windowFunctionName | DISTINCT_COUNT | DISTINCT_COUNT_APPROX + | PERCENTILE + | PERCENTILE_APPROX | ESTDC | ESTDC_ERROR | MEAN @@ -1477,6 +1176,8 @@ searchableKeyWord | VAR_SAMP | VAR_POP | TAKE + | FIRST + | LAST | LIST | VALUES | PER_DAY @@ -1496,7 +1197,12 @@ searchableKeyWord | FULL | SEMI | ANTI - | LEFT_HINT - | RIGHT_HINT - | PERCENTILE_SHORTCUT + | BETWEEN + | CIDRMATCH + | trendlineType + // SORT FIELD KEYWORDS + | AUTO + | STR + | IP + | NUM ;