Skip to content

Commit c539f07

Browse files
acarbonettogithub-actions[bot]
authored andcommitted
#639: Support OpenSearch metadata fields and the score OpenSearch function (#228) (#1456)
Allow metadata fields and score OpenSearch function. Signed-off-by: Andrew Carbonetto <[email protected]> (cherry picked from commit e805151)
1 parent a796433 commit c539f07

File tree

42 files changed

+1537
-62
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1537
-62
lines changed

core/src/main/java/org/opensearch/sql/analysis/Analyzer.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import org.opensearch.sql.ast.tree.Values;
6363
import org.opensearch.sql.data.model.ExprMissingValue;
6464
import org.opensearch.sql.data.type.ExprCoreType;
65+
import org.opensearch.sql.data.type.ExprType;
6566
import org.opensearch.sql.datasource.DataSourceService;
6667
import org.opensearch.sql.exception.SemanticCheckException;
6768
import org.opensearch.sql.expression.DSL;
@@ -150,6 +151,9 @@ public LogicalPlan visitRelation(Relation node, AnalysisContext context) {
150151
dataSourceSchemaIdentifierNameResolver.getIdentifierName());
151152
}
152153
table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v));
154+
table.getReservedFieldTypes().forEach(
155+
(k, v) -> curEnv.addReservedWord(new Symbol(Namespace.FIELD_NAME, k), v)
156+
);
153157

154158
// Put index name or its alias in index namespace on type environment so qualifier
155159
// can be removed when analyzing qualified name. The value (expr type) here doesn't matter.
@@ -193,6 +197,9 @@ public LogicalPlan visitTableFunction(TableFunction node, AnalysisContext contex
193197
TypeEnvironment curEnv = context.peek();
194198
Table table = tableFunctionImplementation.applyArguments();
195199
table.getFieldTypes().forEach((k, v) -> curEnv.define(new Symbol(Namespace.FIELD_NAME, k), v));
200+
table.getReservedFieldTypes().forEach(
201+
(k, v) -> curEnv.addReservedWord(new Symbol(Namespace.FIELD_NAME, k), v)
202+
);
196203
curEnv.define(new Symbol(Namespace.INDEX_NAME,
197204
dataSourceSchemaIdentifierNameResolver.getIdentifierName()), STRUCT);
198205
return new LogicalRelation(dataSourceSchemaIdentifierNameResolver.getIdentifierName(),

core/src/main/java/org/opensearch/sql/analysis/ExpressionAnalyzer.java

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88

99
import static org.opensearch.sql.ast.dsl.AstDSL.and;
1010
import static org.opensearch.sql.ast.dsl.AstDSL.compare;
11-
import static org.opensearch.sql.expression.function.BuiltinFunctionName.GTE;
12-
import static org.opensearch.sql.expression.function.BuiltinFunctionName.LTE;
1311

1412
import com.google.common.collect.ImmutableList;
1513
import com.google.common.collect.ImmutableMap;
@@ -31,6 +29,7 @@
3129
import org.opensearch.sql.ast.expression.Case;
3230
import org.opensearch.sql.ast.expression.Cast;
3331
import org.opensearch.sql.ast.expression.Compare;
32+
import org.opensearch.sql.ast.expression.DataType;
3433
import org.opensearch.sql.ast.expression.EqualTo;
3534
import org.opensearch.sql.ast.expression.Field;
3635
import org.opensearch.sql.ast.expression.Function;
@@ -42,6 +41,7 @@
4241
import org.opensearch.sql.ast.expression.Or;
4342
import org.opensearch.sql.ast.expression.QualifiedName;
4443
import org.opensearch.sql.ast.expression.RelevanceFieldList;
44+
import org.opensearch.sql.ast.expression.ScoreFunction;
4545
import org.opensearch.sql.ast.expression.Span;
4646
import org.opensearch.sql.ast.expression.UnresolvedArgument;
4747
import org.opensearch.sql.ast.expression.UnresolvedAttribute;
@@ -51,6 +51,7 @@
5151
import org.opensearch.sql.ast.expression.Xor;
5252
import org.opensearch.sql.common.antlr.SyntaxCheckException;
5353
import org.opensearch.sql.data.model.ExprValueUtils;
54+
import org.opensearch.sql.data.type.ExprCoreType;
5455
import org.opensearch.sql.data.type.ExprType;
5556
import org.opensearch.sql.exception.SemanticCheckException;
5657
import org.opensearch.sql.expression.DSL;
@@ -67,6 +68,7 @@
6768
import org.opensearch.sql.expression.function.BuiltinFunctionName;
6869
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
6970
import org.opensearch.sql.expression.function.FunctionName;
71+
import org.opensearch.sql.expression.function.OpenSearchFunctions;
7072
import org.opensearch.sql.expression.parse.ParseExpression;
7173
import org.opensearch.sql.expression.span.SpanExpression;
7274
import org.opensearch.sql.expression.window.aggregation.AggregateWindowFunction;
@@ -207,6 +209,65 @@ public Expression visitHighlightFunction(HighlightFunction node, AnalysisContext
207209
return new HighlightExpression(expr);
208210
}
209211

212+
/**
213+
* visitScoreFunction removes the score function from the AST and replaces it with the child
214+
* relevance function node. If the optional boost variable is provided, the boost argument
215+
* of the relevance function is combined.
216+
*
217+
* @param node score function node
218+
* @param context analysis context for the query
219+
* @return resolved relevance function
220+
*/
221+
public Expression visitScoreFunction(ScoreFunction node, AnalysisContext context) {
222+
Literal boostArg = node.getRelevanceFieldWeight();
223+
if (!boostArg.getType().equals(DataType.DOUBLE)) {
224+
throw new SemanticCheckException(String.format("Expected boost type '%s' but got '%s'",
225+
DataType.DOUBLE.name(), boostArg.getType().name()));
226+
}
227+
Double thisBoostValue = ((Double) boostArg.getValue());
228+
229+
// update the existing unresolved expression to add a boost argument if it doesn't exist
230+
// OR multiply the existing boost argument
231+
Function relevanceQueryUnresolvedExpr = (Function) node.getRelevanceQuery();
232+
List<UnresolvedExpression> relevanceFuncArgs = relevanceQueryUnresolvedExpr.getFuncArgs();
233+
234+
boolean doesFunctionContainBoostArgument = false;
235+
List<UnresolvedExpression> updatedFuncArgs = new ArrayList<>();
236+
for (UnresolvedExpression expr : relevanceFuncArgs) {
237+
String argumentName = ((UnresolvedArgument) expr).getArgName();
238+
if (argumentName.equalsIgnoreCase("boost")) {
239+
doesFunctionContainBoostArgument = true;
240+
Literal boostArgLiteral = (Literal) ((UnresolvedArgument) expr).getValue();
241+
Double boostValue =
242+
Double.parseDouble((String) boostArgLiteral.getValue()) * thisBoostValue;
243+
UnresolvedArgument newBoostArg = new UnresolvedArgument(
244+
argumentName,
245+
new Literal(boostValue.toString(), DataType.STRING)
246+
);
247+
updatedFuncArgs.add(newBoostArg);
248+
} else {
249+
updatedFuncArgs.add(expr);
250+
}
251+
}
252+
253+
// since nothing was found, add an argument
254+
if (!doesFunctionContainBoostArgument) {
255+
UnresolvedArgument newBoostArg = new UnresolvedArgument(
256+
"boost", new Literal(Double.toString(thisBoostValue), DataType.STRING));
257+
updatedFuncArgs.add(newBoostArg);
258+
}
259+
260+
// create a new function expression with boost argument and resolve it
261+
Function updatedRelevanceQueryUnresolvedExpr = new Function(
262+
relevanceQueryUnresolvedExpr.getFuncName(),
263+
updatedFuncArgs);
264+
OpenSearchFunctions.OpenSearchFunction relevanceQueryExpr =
265+
(OpenSearchFunctions.OpenSearchFunction) updatedRelevanceQueryUnresolvedExpr
266+
.accept(this, context);
267+
relevanceQueryExpr.setScoreTracked(true);
268+
return relevanceQueryExpr;
269+
}
270+
210271
@Override
211272
public Expression visitIn(In node, AnalysisContext context) {
212273
return visitIn(node.getField(), node.getValueList(), context);
@@ -297,6 +358,23 @@ public Expression visitAllFields(AllFields node, AnalysisContext context) {
297358
@Override
298359
public Expression visitQualifiedName(QualifiedName node, AnalysisContext context) {
299360
QualifierAnalyzer qualifierAnalyzer = new QualifierAnalyzer(context);
361+
362+
// check for reserved words in the identifier
363+
for (String part : node.getParts()) {
364+
for (TypeEnvironment typeEnv = context.peek();
365+
typeEnv != null;
366+
typeEnv = typeEnv.getParent()) {
367+
Optional<ExprType> exprType = typeEnv.getReservedSymbolTable().lookup(
368+
new Symbol(Namespace.FIELD_NAME, part));
369+
if (exprType.isPresent()) {
370+
return visitMetadata(
371+
qualifierAnalyzer.unqualified(node),
372+
(ExprCoreType) exprType.get(),
373+
context
374+
);
375+
}
376+
}
377+
}
300378
return visitIdentifier(qualifierAnalyzer.unqualified(node), context);
301379
}
302380

@@ -313,6 +391,19 @@ public Expression visitUnresolvedArgument(UnresolvedArgument node, AnalysisConte
313391
return new NamedArgumentExpression(node.getArgName(), node.getValue().accept(this, context));
314392
}
315393

394+
/**
395+
* If QualifiedName is actually a reserved metadata field, return the expr type associated
396+
* with the metadata field.
397+
* @param ident metadata field name
398+
* @param context analysis context
399+
* @return DSL reference
400+
*/
401+
private Expression visitMetadata(String ident,
402+
ExprCoreType exprCoreType,
403+
AnalysisContext context) {
404+
return DSL.ref(ident, exprCoreType);
405+
}
406+
316407
private Expression visitIdentifier(String ident, AnalysisContext context) {
317408
// ParseExpression will always override ReferenceExpression when ident conflicts
318409
for (NamedExpression expr : context.getNamedParseExpressions()) {

core/src/main/java/org/opensearch/sql/analysis/ExpressionReferenceOptimizer.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.opensearch.sql.expression.conditional.cases.CaseClause;
2020
import org.opensearch.sql.expression.conditional.cases.WhenClause;
2121
import org.opensearch.sql.expression.function.BuiltinFunctionRepository;
22+
import org.opensearch.sql.expression.function.OpenSearchFunctions;
2223
import org.opensearch.sql.planner.logical.LogicalAggregation;
2324
import org.opensearch.sql.planner.logical.LogicalPlan;
2425
import org.opensearch.sql.planner.logical.LogicalPlanNodeVisitor;
@@ -70,8 +71,17 @@ public Expression visitFunction(FunctionExpression node, AnalysisContext context
7071
final List<Expression> args =
7172
node.getArguments().stream().map(expr -> expr.accept(this, context))
7273
.collect(Collectors.toList());
73-
return (Expression) repository.compile(context.getFunctionProperties(),
74-
node.getFunctionName(), args);
74+
Expression optimizedFunctionExpression = (Expression) repository.compile(
75+
context.getFunctionProperties(),
76+
node.getFunctionName(),
77+
args
78+
);
79+
// Propagate scoreTracked for OpenSearch functions
80+
if (optimizedFunctionExpression instanceof OpenSearchFunctions.OpenSearchFunction) {
81+
((OpenSearchFunctions.OpenSearchFunction) optimizedFunctionExpression).setScoreTracked(
82+
((OpenSearchFunctions.OpenSearchFunction)node).isScoreTracked());
83+
}
84+
return optimizedFunctionExpression;
7585
}
7686
}
7787

core/src/main/java/org/opensearch/sql/analysis/TypeEnvironment.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,30 @@ public class TypeEnvironment implements Environment<Symbol, ExprType> {
2929
private final TypeEnvironment parent;
3030
private final SymbolTable symbolTable;
3131

32+
@Getter
33+
private final SymbolTable reservedSymbolTable;
34+
35+
/**
36+
* Constructor with empty symbol tables.
37+
*
38+
* @param parent parent environment
39+
*/
3240
public TypeEnvironment(TypeEnvironment parent) {
3341
this.parent = parent;
3442
this.symbolTable = new SymbolTable();
43+
this.reservedSymbolTable = new SymbolTable();
3544
}
3645

46+
/**
47+
* Constructor with empty reserved symbol table.
48+
*
49+
* @param parent parent environment
50+
* @param symbolTable type table
51+
*/
3752
public TypeEnvironment(TypeEnvironment parent, SymbolTable symbolTable) {
3853
this.parent = parent;
3954
this.symbolTable = symbolTable;
55+
this.reservedSymbolTable = new SymbolTable();
4056
}
4157

4258
/**
@@ -59,6 +75,7 @@ public ExprType resolve(Symbol symbol) {
5975

6076
/**
6177
* Resolve all fields in the current environment.
78+
*
6279
* @param namespace a namespace
6380
* @return all symbols in the namespace
6481
*/
@@ -102,7 +119,11 @@ public void remove(ReferenceExpression ref) {
102119
* Clear all fields in the current environment.
103120
*/
104121
public void clearAllFields() {
105-
lookupAllFields(FIELD_NAME).keySet().stream()
106-
.forEach(v -> remove(new Symbol(Namespace.FIELD_NAME, v)));
122+
lookupAllFields(FIELD_NAME).keySet().forEach(
123+
v -> remove(new Symbol(Namespace.FIELD_NAME, v)));
124+
}
125+
126+
public void addReservedWord(Symbol symbol, ExprType type) {
127+
reservedSymbolTable.store(symbol, type);
107128
}
108129
}

core/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.opensearch.sql.ast.expression.Or;
3030
import org.opensearch.sql.ast.expression.QualifiedName;
3131
import org.opensearch.sql.ast.expression.RelevanceFieldList;
32+
import org.opensearch.sql.ast.expression.ScoreFunction;
3233
import org.opensearch.sql.ast.expression.Span;
3334
import org.opensearch.sql.ast.expression.UnresolvedArgument;
3435
import org.opensearch.sql.ast.expression.UnresolvedAttribute;
@@ -278,6 +279,10 @@ public T visitHighlightFunction(HighlightFunction node, C context) {
278279
return visitChildren(node, context);
279280
}
280281

282+
public T visitScoreFunction(ScoreFunction node, C context) {
283+
return visitChildren(node, context);
284+
}
285+
281286
public T visitStatement(Statement node, C context) {
282287
return visit(node, context);
283288
}

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.opensearch.sql.ast.expression.Or;
3535
import org.opensearch.sql.ast.expression.ParseMethod;
3636
import org.opensearch.sql.ast.expression.QualifiedName;
37+
import org.opensearch.sql.ast.expression.ScoreFunction;
3738
import org.opensearch.sql.ast.expression.Span;
3839
import org.opensearch.sql.ast.expression.SpanUnit;
3940
import org.opensearch.sql.ast.expression.UnresolvedArgument;
@@ -60,7 +61,6 @@
6061
import org.opensearch.sql.ast.tree.TableFunction;
6162
import org.opensearch.sql.ast.tree.UnresolvedPlan;
6263
import org.opensearch.sql.ast.tree.Values;
63-
import org.opensearch.sql.expression.function.BuiltinFunctionName;
6464

6565
/**
6666
* Class of static methods to create specific node instances.
@@ -285,6 +285,11 @@ public UnresolvedExpression highlight(UnresolvedExpression fieldName,
285285
return new HighlightFunction(fieldName, arguments);
286286
}
287287

288+
public UnresolvedExpression score(UnresolvedExpression relevanceQuery,
289+
Literal relevanceFieldWeight) {
290+
return new ScoreFunction(relevanceQuery, relevanceFieldWeight);
291+
}
292+
288293
public UnresolvedExpression window(UnresolvedExpression function,
289294
List<UnresolvedExpression> partitionByList,
290295
List<Pair<SortOption, UnresolvedExpression>> sortList) {
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.ast.expression;
7+
8+
import java.util.List;
9+
import lombok.AllArgsConstructor;
10+
import lombok.EqualsAndHashCode;
11+
import lombok.Getter;
12+
import lombok.ToString;
13+
import org.opensearch.sql.ast.AbstractNodeVisitor;
14+
15+
/**
16+
* Expression node of Score function.
17+
* Score takes a relevance-search expression as an argument and returns it
18+
*/
19+
@AllArgsConstructor
20+
@EqualsAndHashCode(callSuper = false)
21+
@Getter
22+
@ToString
23+
public class ScoreFunction extends UnresolvedExpression {
24+
private final UnresolvedExpression relevanceQuery;
25+
private final Literal relevanceFieldWeight;
26+
27+
@Override
28+
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
29+
return nodeVisitor.visitScoreFunction(this, context);
30+
}
31+
32+
@Override
33+
public List<UnresolvedExpression> getChild() {
34+
return List.of(relevanceQuery);
35+
}
36+
}

core/src/main/java/org/opensearch/sql/expression/DSL.java

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,19 @@ public static FunctionExpression match_bool_prefix(Expression... args) {
862862
}
863863

864864
public static FunctionExpression wildcard_query(Expression... args) {
865-
return compile(FunctionProperties.None,BuiltinFunctionName.WILDCARD_QUERY, args);
865+
return compile(FunctionProperties.None, BuiltinFunctionName.WILDCARD_QUERY, args);
866+
}
867+
868+
public static FunctionExpression score(Expression... args) {
869+
return compile(FunctionProperties.None, BuiltinFunctionName.SCORE, args);
870+
}
871+
872+
public static FunctionExpression scorequery(Expression... args) {
873+
return compile(FunctionProperties.None, BuiltinFunctionName.SCOREQUERY, args);
874+
}
875+
876+
public static FunctionExpression score_query(Expression... args) {
877+
return compile(FunctionProperties.None, BuiltinFunctionName.SCORE_QUERY, args);
866878
}
867879

868880
public static FunctionExpression now(FunctionProperties functionProperties,

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ public enum BuiltinFunctionName {
122122
WEEK_OF_YEAR(FunctionName.of("week_of_year")),
123123
YEAR(FunctionName.of("year")),
124124
YEARWEEK(FunctionName.of("yearweek")),
125+
125126
// `now`-like functions
126127
NOW(FunctionName.of("now")),
127128
CURDATE(FunctionName.of("curdate")),
@@ -132,6 +133,7 @@ public enum BuiltinFunctionName {
132133
CURRENT_TIMESTAMP(FunctionName.of("current_timestamp")),
133134
LOCALTIMESTAMP(FunctionName.of("localtimestamp")),
134135
SYSDATE(FunctionName.of("sysdate")),
136+
135137
/**
136138
* Text Functions.
137139
*/
@@ -255,6 +257,10 @@ public enum BuiltinFunctionName {
255257
MATCH_BOOL_PREFIX(FunctionName.of("match_bool_prefix")),
256258
HIGHLIGHT(FunctionName.of("highlight")),
257259
MATCH_PHRASE_PREFIX(FunctionName.of("match_phrase_prefix")),
260+
SCORE(FunctionName.of("score")),
261+
SCOREQUERY(FunctionName.of("scorequery")),
262+
SCORE_QUERY(FunctionName.of("score_query")),
263+
258264
/**
259265
* Legacy Relevance Function.
260266
*/

0 commit comments

Comments
 (0)