Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import lombok.Data;
import lombok.Getter;

/** Log parser Brain algorithm implementation. See: https://ieeexplore.ieee.org/document/10109145 */
public class BrainLogParser {
Expand Down Expand Up @@ -56,18 +58,36 @@ public class BrainLogParser {
private static final String GROUP_TOKEN_SET_KEY_FORMAT = "%d-%s-%d";
// By default, algorithm treats more than 2 different tokens in the group per position as variable
// token
private static final int DEFAULT_VARIABLE_COUNT_THRESHOLD = 5;
public static final int DEFAULT_VARIABLE_COUNT_THRESHOLD = 5;
/*
* By default, algorithm treats the longest word combinations as the group root, no matter what its frequency is.
* Otherwise, the longest word combination will be selected when frequency >= highest frequency of log * threshold percentage
*/
private static final float DEFAULT_FREQUENCY_THRESHOLD_PERCENTAGE = 0.3f;
public static final float DEFAULT_FREQUENCY_THRESHOLD_PERCENTAGE = 0.3f;

private final Map<String, Long> tokenFreqMap;
private final Map<String, Set<String>> groupTokenSetMap;
private final Map<String, String> logIdGroupCandidateMap;
private final int variableCountThreshold;
private final float thresholdPercentage;
/**
* -- GETTER -- Get token histogram
*
* @return map of token per position key and its frequency
*/
@Getter private final Map<String, Long> tokenFreqMap;

/**
* -- GETTER -- Get group per length per position to its token set map
*
* @return map of pattern group per length per position key and its token set
*/
@Getter private final Map<String, Set<String>> groupTokenSetMap;

/**
* -- GETTER -- Get logId to its group candidate map
*
* @return map of logId and group candidate
*/
@Getter private final Map<String, String> logIdGroupCandidateMap;

@Getter private int variableCountThreshold;
@Getter private float thresholdPercentage;
private final Map<Pattern, String> filterPatternVariableMap;
private final List<String> delimiters;

Expand Down Expand Up @@ -240,8 +260,22 @@ void calculateGroupTokenFreq(List<List<String>> preprocessedLogs) {
* @return parsed log pattern that is a list of string
*/
public List<String> parseLogPattern(List<String> tokens) {
return parseLogPattern(
tokens,
this.tokenFreqMap,
this.groupTokenSetMap,
this.logIdGroupCandidateMap,
this.variableCountThreshold);
}

public static List<String> parseLogPattern(
List<String> tokens,
Map<String, Long> tokenFreqMap,
Map<String, Set<String>> groupTokenSetMap,
Map<String, String> logIdGroupCandidateMap,
int variableCountThreshold) {
String logId = tokens.get(tokens.size() - 1);
String groupCandidateStr = this.logIdGroupCandidateMap.get(logId);
String groupCandidateStr = logIdGroupCandidateMap.get(logId);
String[] groupCandidate = groupCandidateStr.split(",");
Long repFreq = Long.parseLong(groupCandidate[0]); // representative frequency of the group
return IntStream.range(0, tokens.size() - 1)
Expand All @@ -252,26 +286,26 @@ public List<String> parseLogPattern(List<String> tokens) {
String token = entry.getValue();
String tokenKey =
String.format(Locale.ROOT, POSITIONED_TOKEN_KEY_FORMAT, index, token);
assert this.tokenFreqMap.get(tokenKey) != null
assert tokenFreqMap.get(tokenKey) != null
: String.format(Locale.ROOT, "Not found token: %s on position %d", token, index);

boolean isHigherFrequency = this.tokenFreqMap.get(tokenKey) > repFreq;
boolean isLowerFrequency = this.tokenFreqMap.get(tokenKey) < repFreq;
boolean isHigherFrequency = tokenFreqMap.get(tokenKey) > repFreq;
boolean isLowerFrequency = tokenFreqMap.get(tokenKey) < repFreq;
String groupTokenKey =
String.format(
Locale.ROOT,
GROUP_TOKEN_SET_KEY_FORMAT,
tokens.size() - 1,
groupCandidateStr,
index);
assert this.groupTokenSetMap.get(groupTokenKey) != null
assert groupTokenSetMap.get(groupTokenKey) != null
: String.format(Locale.ROOT, "Not found any token in group: %s", groupTokenKey);

if (isHigherFrequency) {
// For higher frequency token that doesn't belong to word combination, it's likely
// to be constant token only if
// it's unique token on that position within the group
boolean isUniqueToken = this.groupTokenSetMap.get(groupTokenKey).size() == 1;
boolean isUniqueToken = groupTokenSetMap.get(groupTokenKey).size() == 1;
if (!isUniqueToken) {
return VARIABLE_DENOTER;
}
Expand All @@ -281,7 +315,7 @@ public List<String> parseLogPattern(List<String> tokens) {
// it doesn't exceed the preset variable count threshold. For example, some variable
// are limited number of enums,
// and sometimes they could be treated as constant tokens.
if (this.groupTokenSetMap.get(groupTokenKey).size() >= variableCountThreshold) {
if (groupTokenSetMap.get(groupTokenKey).size() >= variableCountThreshold) {
return VARIABLE_DENOTER;
}
}
Expand Down Expand Up @@ -309,31 +343,13 @@ public Map<String, List<String>> parseAllLogPatterns(List<String> logMessages) {
return logPatternMap;
}

/**
* Get token histogram
*
* @return map of token per position key and its frequency
*/
public Map<String, Long> getTokenFreqMap() {
return this.tokenFreqMap;
}

/**
* Get group per length per position to its token set map
*
* @return map of pattern group per length per position key and its token set
*/
public Map<String, Set<String>> getGroupTokenSetMap() {
return this.groupTokenSetMap;
}

/**
* Get logId to its group candidate map
*
* @return map of logId and group candidate
*/
public Map<String, String> getLogIdGroupCandidateMap() {
return this.logIdGroupCandidateMap;
@Data
public static final class GroupTokenAggregationInfo {
private Map<String, Long> tokenFreqMap;
private Map<String, Set<String>> groupTokenSetMap;
private Map<String, String> logIdGroupCandidateMap;
private int variableCountThreshold;
private Map<String, List<String>> messageToProcessedLogMap;
}

private Map<Long, Integer> getWordOccurrences(List<String> tokens) {
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/java/org/opensearch/sql/analysis/Analyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Relation;
Expand All @@ -70,7 +71,6 @@
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.ast.tree.Window;
import org.opensearch.sql.common.antlr.SyntaxCheckException;
import org.opensearch.sql.data.model.ExprMissingValue;
import org.opensearch.sql.data.type.ExprCoreType;
Expand Down Expand Up @@ -490,7 +490,7 @@ public LogicalPlan visitParse(Parse node, AnalysisContext context) {
}

@Override
public LogicalPlan visitWindow(Window node, AnalysisContext context) {
public LogicalPlan visitPatterns(Patterns node, AnalysisContext context) {
LogicalPlan child = node.getChild().get(0).accept(this, context);
WindowExpressionAnalyzer windowAnalyzer =
new WindowExpressionAnalyzer(expressionAnalyzer, child);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Relation;
Expand All @@ -68,7 +69,6 @@
import org.opensearch.sql.ast.tree.TableFunction;
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.ast.tree.Window;

/** AST nodes visitor Defines the traverse path. */
public abstract class AbstractNodeVisitor<T, C> {
Expand Down Expand Up @@ -338,8 +338,8 @@ public T visitFillNull(FillNull fillNull, C context) {
return visitChildren(fillNull, context);
}

public T visitWindow(Window window, C context) {
return visitChildren(window, context);
public T visitPatterns(Patterns patterns, C context) {
return visitChildren(patterns, context);
}

public T visitJoin(Join node, C context) {
Expand Down
8 changes: 5 additions & 3 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.opensearch.sql.ast.tree.Head;
import org.opensearch.sql.ast.tree.Limit;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.RareTopN.CommandType;
Expand All @@ -71,7 +72,6 @@
import org.opensearch.sql.ast.tree.Trendline;
import org.opensearch.sql.ast.tree.UnresolvedPlan;
import org.opensearch.sql.ast.tree.Values;
import org.opensearch.sql.ast.tree.Window;

/** Class of static methods to create specific node instances. */
@UtilityClass
Expand Down Expand Up @@ -504,7 +504,7 @@ public static Parse parse(
return new Parse(parseMethod, sourceField, pattern, arguments, input);
}

public static Window window(
public static Patterns patterns(
UnresolvedPlan input,
PatternMethod patternMethod,
UnresolvedExpression sourceField,
Expand All @@ -513,14 +513,16 @@ public static Window window(
List<UnresolvedExpression> funArgs = new ArrayList<>();
funArgs.add(sourceField);
funArgs.addAll(arguments);
return new Window(
return new Patterns(
new Alias(
alias,
new WindowFunction(
new Function(patternMethod.name().toLowerCase(Locale.ROOT), funArgs),
List.of(),
List.of()),
alias),
sourceField,
patternMethod,
input);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,29 @@
import lombok.Setter;
import lombok.ToString;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.PatternMethod;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

/**
* Logical plan node of Patterns command to represent complex nested function calling than single
* window function.
*/
@Getter
@Setter
@ToString
@EqualsAndHashCode(callSuper = false)
@RequiredArgsConstructor
@AllArgsConstructor
public class Window extends UnresolvedPlan {
public class Patterns extends UnresolvedPlan {

private final UnresolvedExpression windowFunction;
private final UnresolvedExpression sourceField;
private final PatternMethod patternMethod;

private UnresolvedPlan child;

@Override
public Window attach(UnresolvedPlan child) {
public Patterns attach(UnresolvedPlan child) {
this.child = child;
return this;
}
Expand All @@ -41,6 +48,6 @@ public List<UnresolvedPlan> getChild() {

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitWindow(this, context);
return nodeVisitor.visitPatterns(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.expression.AggregateFunction;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.Function;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.calcite.utils.AggregateUtils;

Expand Down Expand Up @@ -41,6 +42,21 @@ public AggCall visitAggregateFunction(AggregateFunction node, CalcitePlanContext
for (UnresolvedExpression arg : node.getArgList()) {
argList.add(rexNodeVisitor.analyze(arg, context));
}
return AggregateUtils.translate(node, field, context, argList);
return AggregateUtils.translate(
node.getFuncName(), node.getDistinct(), field, context, argList);
}

// Visit special UDAFs that are derived from command. For example, patterns command generates
// brain function.
@Override
public AggCall visitFunction(Function node, CalcitePlanContext context) {
List<RexNode> argList = new ArrayList<>();
assert !node.getFuncArgs().isEmpty()
: "UDAF should at least have one argument like target field";
RexNode field = rexNodeVisitor.analyze(node.getFuncArgs().get(0), context);
for (int i = 1; i < node.getFuncArgs().size(); i++) {
argList.add(rexNodeVisitor.analyze(node.getFuncArgs().get(i), context));
}
return AggregateUtils.translate(node.getFuncName(), false, field, context, argList);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,14 @@
import org.checkerframework.checker.nullness.qual.Nullable;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.Node;
import org.opensearch.sql.ast.expression.Alias;
import org.opensearch.sql.ast.expression.AllFields;
import org.opensearch.sql.ast.expression.Argument;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.Let;
import org.opensearch.sql.ast.expression.Literal;
import org.opensearch.sql.ast.expression.ParseMethod;
import org.opensearch.sql.ast.expression.PatternMethod;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.ast.expression.subquery.SubqueryExpression;
import org.opensearch.sql.ast.tree.AD;
Expand All @@ -64,6 +66,7 @@
import org.opensearch.sql.ast.tree.ML;
import org.opensearch.sql.ast.tree.Paginate;
import org.opensearch.sql.ast.tree.Parse;
import org.opensearch.sql.ast.tree.Patterns;
import org.opensearch.sql.ast.tree.Project;
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Relation;
Expand All @@ -77,17 +80,20 @@
import org.opensearch.sql.calcite.utils.JoinAndLookupUtils;
import org.opensearch.sql.exception.CalciteUnsupportedException;
import org.opensearch.sql.exception.SemanticCheckException;
import org.opensearch.sql.expression.function.BuiltinFunctionName;
import org.opensearch.sql.expression.function.PPLFuncImpTable;
import org.opensearch.sql.utils.ParseUtils;

public class CalciteRelNodeVisitor extends AbstractNodeVisitor<RelNode, CalcitePlanContext> {

private final CalciteRexNodeVisitor rexVisitor;
private final CalciteAggCallVisitor aggVisitor;
private final CalciteWindowVisitor windowVisitor;

public CalciteRelNodeVisitor() {
this.rexVisitor = new CalciteRexNodeVisitor(this);
this.aggVisitor = new CalciteAggCallVisitor(rexVisitor);
this.windowVisitor = new CalciteWindowVisitor(rexVisitor, aggVisitor);
}

public RelNode analyze(UnresolvedPlan unresolved, CalcitePlanContext context) {
Expand Down Expand Up @@ -279,6 +285,27 @@ public RelNode visitParse(Parse node, CalcitePlanContext context) {
return context.relBuilder.peek();
}

@Override
public RelNode visitPatterns(Patterns node, CalcitePlanContext context) {
visitChildren(node, context);
Alias alias = (Alias) node.getWindowFunction();
RexNode windowNode = windowVisitor.analyze(alias.getDelegated(), context);
RexNode nestedNode = windowNode;
if (PatternMethod.BRAIN.equals(node.getPatternMethod())) {
nestedNode =
PPLFuncImpTable.INSTANCE.resolve(
context.rexBuilder,
BuiltinFunctionName.BRAIN_LOG_PARSER,
rexVisitor.analyze(node.getSourceField(), context),
windowNode);
}
context.relBuilder.projectPlus(
context.relBuilder.alias(
nestedNode,
Strings.isNullOrEmpty(alias.getAlias()) ? alias.getName() : alias.getAlias()));
return context.relBuilder.peek();
}

@Override
public RelNode visitEval(Eval node, CalcitePlanContext context) {
visitChildren(node, context);
Expand Down
Loading
Loading