Skip to content

Commit

Permalink
Addressed comments from @chenboat
Browse files Browse the repository at this point in the history
  • Loading branch information
jackluo923 committed Jul 30, 2024
1 parent 99d40f7 commit 5b56ee5
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
Callable<MutableRoaringBitmap> searchCallable = () -> {
IndexSearcher indexSearcher = null;
try {
// Lucene Query Parser is JavaCC based. It is stateful and should
// be instantiated per query. Analyzer on the other hand is stateless
// and can be created upfront.
// Lucene query parsers are generally stateful and a new instance must be created per query.
QueryParserBase parser = _queryParserClassConstructor.newInstance(_column, _analyzer);
if (_enablePrefixSuffixMatchingInPhraseQueries) {
// Note: Lucene's built-in QueryParser has limited wildcard functionality in phrase queries. It does not use
Expand Down Expand Up @@ -212,8 +210,8 @@ private Constructor<QueryParserBase> getQueryParserWithStringAndAnalyzerTypeCons
queryParserClass.getConstructor(String.class, Analyzer.class);
} catch (NoSuchMethodException ex) {
throw new NoSuchMethodException("The specified lucene query parser class " + queryParserClassName
+ " is not assignable from does not have the required constructor method with parameter type "
+ "[String.class, Analyzer.class]"
+ " is not assignable because the class does not have the required constructor method with parameter "
+ "type [String.class, Analyzer.class]"
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,7 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
MutableRoaringBitmap docIds = new MutableRoaringBitmap();
Collector docIDCollector = new LuceneDocIdCollector(docIds, _docIdTranslator);
try {
// Lucene Query Parser is JavaCC based. It is stateful and should
// be instantiated per query. Analyzer on the other hand is stateless
// and can be created upfront.
// Lucene query parsers are generally stateful and a new instance must be created per query.
QueryParserBase parser = _queryParserClassConstructor.newInstance(_column, _analyzer);
// Phrase search with prefix/suffix matching may have leading *. E.g., `*pache pinot` which can be stripped by
// the query parser. To support the feature, we need to explicitly set the config to be true.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ private static List<String> parseEntryAsString(@Nullable Map<String, String> col
.collect(Collectors.toList());
}

/**
* Retrieves the Lucene Analyzer class instance via reflection from the fully qualified class name of the text config.
* If the class name is not specified in the config, the default StandardAnalyzer is instantiated.
*
* @param config Pinot TextIndexConfig to fetch the configuration from
* @return Lucene Analyzer class instance
* @throws ReflectiveOperationException if instantiation via reflection fails
*/
public static Analyzer getAnalyzer(TextIndexConfig config) throws ReflectiveOperationException {
String luceneAnalyzerClassName = config.getLuceneAnalyzerClass();
List<String> luceneAnalyzerClassArgs = config.getLuceneAnalyzerClassArgs();
Expand All @@ -127,37 +135,37 @@ public static Analyzer getAnalyzer(TextIndexConfig config) throws ReflectiveOper
// use existing logic to obtain an instance of StandardAnalyzer with customized stop words
return TextIndexUtils.getStandardAnalyzerWithCustomizedStopWords(
config.getStopWordsInclude(), config.getStopWordsExclude());
} else {
// Custom analyzer + custom configs via reflection
if (luceneAnalyzerClassArgs.size() != luceneAnalyzerClassArgTypes.size()) {
throw new ReflectiveOperationException("Mismatch of the number of analyzer arguments and arguments types.");
}
}

// Generate args type list
List<Class<?>> argClasses = new ArrayList<>();
for (String argType : luceneAnalyzerClassArgTypes) {
argClasses.add(parseSupportedTypes(argType));
}
// Custom analyzer + custom configs via reflection
if (luceneAnalyzerClassArgs.size() != luceneAnalyzerClassArgTypes.size()) {
throw new ReflectiveOperationException("Mismatch of the number of analyzer arguments and arguments types.");
}

// Best effort coercion to the analyzer argument type
// Note only a subset of class types is supported, unsupported ones can be added in the future
List<Object> argValues = new ArrayList<>();
for (int i = 0; i < luceneAnalyzerClassArgs.size(); i++) {
argValues.add(parseSupportedTypeValues(luceneAnalyzerClassArgs.get(i), argClasses.get(i)));
}
// Generate args type list
List<Class<?>> argClasses = new ArrayList<>();
for (String argType : luceneAnalyzerClassArgTypes) {
argClasses.add(parseSupportedTypes(argType));
}

// Initialize the custom analyzer class with custom analyzer args
Class<?> luceneAnalyzerClass = Class.forName(luceneAnalyzerClassName);
if (!Analyzer.class.isAssignableFrom(luceneAnalyzerClass)) {
String exceptionMessage = "Custom analyzer must be a child of " + Analyzer.class.getCanonicalName();
LOGGER.error(exceptionMessage);
throw new ReflectiveOperationException(exceptionMessage);
}
// Best effort coercion to the analyzer argument type
// Note only a subset of class types is supported, unsupported ones can be added in the future
List<Object> argValues = new ArrayList<>();
for (int i = 0; i < luceneAnalyzerClassArgs.size(); i++) {
argValues.add(parseSupportedTypeValues(luceneAnalyzerClassArgs.get(i), argClasses.get(i)));
}

// Return a new instance of custom lucene analyzer class
return (Analyzer) luceneAnalyzerClass.getConstructor(argClasses.toArray(new Class<?>[0]))
.newInstance(argValues.toArray(new Object[0]));
// Initialize the custom analyzer class with custom analyzer args
Class<?> luceneAnalyzerClass = Class.forName(luceneAnalyzerClassName);
if (!Analyzer.class.isAssignableFrom(luceneAnalyzerClass)) {
String exceptionMessage = "Custom analyzer must be a child of " + Analyzer.class.getCanonicalName();
LOGGER.error(exceptionMessage);
throw new ReflectiveOperationException(exceptionMessage);
}

// Return a new instance of custom lucene analyzer class
return (Analyzer) luceneAnalyzerClass.getConstructor(argClasses.toArray(new Class<?>[0]))
.newInstance(argValues.toArray(new Object[0]));
}

/**
Expand Down

0 comments on commit 5b56ee5

Please sign in to comment.