From 5b56ee5556ca41b708ba8e0a3048d37d12cb3b59 Mon Sep 17 00:00:00 2001
From: Jack Luo <jack.luo@mail.utoronto.ca>
Date: Wed, 31 Jul 2024 03:08:14 +0800
Subject: [PATCH] Addressed comments from @chenboat

---
 .../RealtimeLuceneTextIndex.java              |  8 +--
 .../readers/text/LuceneTextIndexReader.java   |  4 +-
 .../local/segment/store/TextIndexUtils.java   | 60 +++++++++++--------
 3 files changed, 38 insertions(+), 34 deletions(-)
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
index 4507987c824e..7c97bca68f91 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/realtime/impl/invertedindex/RealtimeLuceneTextIndex.java
@@ -127,9 +127,7 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
     Callable<MutableRoaringBitmap> searchCallable = () -> {
       IndexSearcher indexSearcher = null;
       try {
-        // Lucene Query Parser is JavaCC based. It is stateful and should
-        // be instantiated per query. Analyzer on the other hand is stateless
-        // and can be created upfront.
+        // Lucene query parsers are generally stateful and a new instance must be created per query.
         QueryParserBase parser = _queryParserClassConstructor.newInstance(_column, _analyzer);
         if (_enablePrefixSuffixMatchingInPhraseQueries) {
           // Note: Lucene's built-in QueryParser has limited wildcard functionality in phrase queries. It does not use
@@ -212,8 +210,8 @@ private Constructor<QueryParserBase> getQueryParserWithStringAndAnalyzerTypeCons
       queryParserClass.getConstructor(String.class, Analyzer.class);
     } catch (NoSuchMethodException ex) {
       throw new NoSuchMethodException("The specified lucene query parser class " + queryParserClassName
-              + " is not assignable from does not have the required constructor method with parameter type "
-              + "[String.class, Analyzer.class]"
+              + " is not assignable because the class does not have the required constructor method with parameter "
+              + "type [String.class, Analyzer.class]"
       );
     }
 
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
index 9ba2a2ab0af5..9e47f1cbd649 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/LuceneTextIndexReader.java
@@ -154,9 +154,7 @@ public MutableRoaringBitmap getDocIds(String searchQuery) {
     MutableRoaringBitmap docIds = new MutableRoaringBitmap();
     Collector docIDCollector = new LuceneDocIdCollector(docIds, _docIdTranslator);
     try {
-      // Lucene Query Parser is JavaCC based. It is stateful and should
-      // be instantiated per query. Analyzer on the other hand is stateless
-      // and can be created upfront.
+      // Lucene query parsers are generally stateful and a new instance must be created per query.
       QueryParserBase parser = _queryParserClassConstructor.newInstance(_column, _analyzer);
       // Phrase search with prefix/suffix matching may have leading *. E.g., `*pache pinot` which can be stripped by
       // the query parser. To support the feature, we need to explicitly set the config to be true.
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java
index 39d41c92b074..a5a883879037 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java
@@ -115,6 +115,14 @@ private static List<String> parseEntryAsString(@Nullable Map<String, String> col
         .collect(Collectors.toList());
   }
 
+  /**
+   * Retrieves the Lucene Analyzer class instance via reflection from the fully qualified class name of the text config.
+   * If the class name is not specified in the config, the default StandardAnalyzer is instantiated.
+   *
+   * @param config Pinot TextIndexConfig to fetch the configuration from
+   * @return Lucene Analyzer class instance
+   * @throws ReflectiveOperationException if instantiation via reflection fails
+   */
   public static Analyzer getAnalyzer(TextIndexConfig config) throws ReflectiveOperationException {
     String luceneAnalyzerClassName = config.getLuceneAnalyzerClass();
     List<String> luceneAnalyzerClassArgs = config.getLuceneAnalyzerClassArgs();
@@ -127,37 +135,37 @@ public static Analyzer getAnalyzer(TextIndexConfig config) throws ReflectiveOper
       // use existing logic to obtain an instance of StandardAnalyzer with customized stop words
       return TextIndexUtils.getStandardAnalyzerWithCustomizedStopWords(
               config.getStopWordsInclude(), config.getStopWordsExclude());
-    } else {
-      // Custom analyzer + custom configs via reflection
-      if (luceneAnalyzerClassArgs.size() != luceneAnalyzerClassArgTypes.size()) {
-        throw new ReflectiveOperationException("Mismatch of the number of analyzer arguments and arguments types.");
-      }
+    }
 
-      // Generate args type list
-      List<Class<?>> argClasses = new ArrayList<>();
-      for (String argType : luceneAnalyzerClassArgTypes) {
-        argClasses.add(parseSupportedTypes(argType));
-      }
+    // Custom analyzer + custom configs via reflection
+    if (luceneAnalyzerClassArgs.size() != luceneAnalyzerClassArgTypes.size()) {
+      throw new ReflectiveOperationException("Mismatch of the number of analyzer arguments and arguments types.");
+    }
 
-      // Best effort coercion to the analyzer argument type
-      // Note only a subset of class types is supported, unsupported ones can be added in the future
-      List<Object> argValues = new ArrayList<>();
-      for (int i = 0; i < luceneAnalyzerClassArgs.size(); i++) {
-        argValues.add(parseSupportedTypeValues(luceneAnalyzerClassArgs.get(i), argClasses.get(i)));
-      }
+    // Generate args type list
+    List<Class<?>> argClasses = new ArrayList<>();
+    for (String argType : luceneAnalyzerClassArgTypes) {
+      argClasses.add(parseSupportedTypes(argType));
+    }
 
-      // Initialize the custom analyzer class with custom analyzer args
-      Class<?> luceneAnalyzerClass = Class.forName(luceneAnalyzerClassName);
-      if (!Analyzer.class.isAssignableFrom(luceneAnalyzerClass)) {
-        String exceptionMessage = "Custom analyzer must be a child of " + Analyzer.class.getCanonicalName();
-        LOGGER.error(exceptionMessage);
-        throw new ReflectiveOperationException(exceptionMessage);
-      }
+    // Best effort coercion to the analyzer argument type
+    // Note only a subset of class types is supported, unsupported ones can be added in the future
+    List<Object> argValues = new ArrayList<>();
+    for (int i = 0; i < luceneAnalyzerClassArgs.size(); i++) {
+      argValues.add(parseSupportedTypeValues(luceneAnalyzerClassArgs.get(i), argClasses.get(i)));
+    }
 
-      // Return a new instance of custom lucene analyzer class
-      return (Analyzer) luceneAnalyzerClass.getConstructor(argClasses.toArray(new Class<?>[0]))
-              .newInstance(argValues.toArray(new Object[0]));
+    // Initialize the custom analyzer class with custom analyzer args
+    Class<?> luceneAnalyzerClass = Class.forName(luceneAnalyzerClassName);
+    if (!Analyzer.class.isAssignableFrom(luceneAnalyzerClass)) {
+      String exceptionMessage = "Custom analyzer must be a child of " + Analyzer.class.getCanonicalName();
+      LOGGER.error(exceptionMessage);
+      throw new ReflectiveOperationException(exceptionMessage);
     }
+
+    // Return a new instance of custom lucene analyzer class
+    return (Analyzer) luceneAnalyzerClass.getConstructor(argClasses.toArray(new Class<?>[0]))
+            .newInstance(argValues.toArray(new Object[0]));
   }
 
   /**