diff --git a/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw-onnx.md b/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw-onnx.md
index e98ee2e74e..b08012b307 100644
--- a/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw-onnx.md
+++ b/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw-onnx.md
@@ -57,7 +57,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-cos-dpr-distil \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-cos-dpr-distil/ \
-  -threads 16 -M 16 -efC 100 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-cos-dpr-distil &
 ```
 
diff --git a/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw.md b/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw.md
index aed4b16719..dc625e14d0 100644
--- a/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw.md
+++ b/docs/regressions/regressions-dl19-passage-cos-dpr-distil-hnsw.md
@@ -57,7 +57,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-cos-dpr-distil \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-cos-dpr-distil/ \
-  -threads 16 -M 16 -efC 100 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-cos-dpr-distil &
 ```
 
diff --git a/docs/regressions/regressions-dl19-passage-openai-ada2.md b/docs/regressions/regressions-dl19-passage-openai-ada2.md
index cecd54ea8e..c5382dc3c5 100644
--- a/docs/regressions/regressions-dl19-passage-openai-ada2.md
+++ b/docs/regressions/regressions-dl19-passage-openai-ada2.md
@@ -57,7 +57,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-openai-ada2 \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-openai-ada2/ \
-  -threads 16 -M 16 -efC 100 -memorybuffer 65536 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-openai-ada2 &
 ```
 
diff --git a/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw-onnx.md b/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw-onnx.md
index 045417dab5..f040a9ce41 100644
--- a/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw-onnx.md
+++ b/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw-onnx.md
@@ -57,7 +57,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-cos-dpr-distil \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-cos-dpr-distil/ \
-  -threads 16 -M 16 -efC 100 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-cos-dpr-distil &
 ```
 
diff --git a/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw.md b/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw.md
index 7408687a8c..c2f46b422c 100644
--- a/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw.md
+++ b/docs/regressions/regressions-dl20-passage-cos-dpr-distil-hnsw.md
@@ -57,7 +57,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-cos-dpr-distil \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-cos-dpr-distil/ \
-  -threads 16 -M 16 -efC 100 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-cos-dpr-distil &
 ```
 
diff --git a/docs/regressions/regressions-dl20-passage-openai-ada2.md b/docs/regressions/regressions-dl20-passage-openai-ada2.md
index 57220c1465..f3e93c63ef 100644
--- a/docs/regressions/regressions-dl20-passage-openai-ada2.md
+++ b/docs/regressions/regressions-dl20-passage-openai-ada2.md
@@ -57,7 +57,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-openai-ada2 \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-openai-ada2/ \
-  -threads 16 -M 16 -efC 100 -memorybuffer 65536 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-openai-ada2 &
 ```
 
diff --git a/docs/regressions/regressions-mb11.md b/docs/regressions/regressions-mb11.md
index e748171dad..7da300a807 100644
--- a/docs/regressions/regressions-mb11.md
+++ b/docs/regressions/regressions-mb11.md
@@ -25,7 +25,7 @@ target/appassembler/bin/IndexCollection \
   -input /path/to/mb11 \
   -generator TweetGenerator \
   -index indexes/lucene-index.mb11/ \
-  -threads 44 -storePositions -storeDocvectors -storeRaw -uniqueDocid -tweet.keepUrls -tweet.stemming \
+  -threads 44 -storePositions -storeDocvectors -storeRaw -tweet.keepUrls -tweet.stemming \
   >& logs/log.mb11 &
 ```
 
diff --git a/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw-onnx.md b/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw-onnx.md
index a19e8abdd5..6df98970f1 100644
--- a/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw-onnx.md
+++ b/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw-onnx.md
@@ -54,7 +54,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-cos-dpr-distil \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-cos-dpr-distil/ \
-  -threads 16 -M 16 -efC 100 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-cos-dpr-distil &
 ```
 
diff --git a/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw.md b/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw.md
index d813573d06..cf41e9645a 100644
--- a/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw.md
+++ b/docs/regressions/regressions-msmarco-passage-cos-dpr-distil-hnsw.md
@@ -54,7 +54,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-cos-dpr-distil \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-cos-dpr-distil/ \
-  -threads 16 -M 16 -efC 100 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-cos-dpr-distil &
 ```
 
diff --git a/docs/regressions/regressions-msmarco-passage-openai-ada2.md b/docs/regressions/regressions-msmarco-passage-openai-ada2.md
index 8bd6c97658..c6ca60e2bc 100644
--- a/docs/regressions/regressions-msmarco-passage-openai-ada2.md
+++ b/docs/regressions/regressions-msmarco-passage-openai-ada2.md
@@ -54,7 +54,7 @@ target/appassembler/bin/IndexHnswDenseVectors \
   -input /path/to/msmarco-passage-openai-ada2 \
   -generator HnswDenseVectorDocumentGenerator \
   -index indexes/lucene-hnsw.msmarco-passage-openai-ada2/ \
-  -threads 16 -M 16 -efC 100 -memorybuffer 65536 \
+  -threads 16 -M 16 -efC 100 -memoryBuffer 65536 \
   >& logs/log.msmarco-passage-openai-ada2 &
 ```
 
diff --git a/src/main/java/io/anserini/index/AbstractIndexer.java b/src/main/java/io/anserini/index/AbstractIndexer.java
new file mode 100644
index 0000000000..76a37d986d
--- /dev/null
+++ b/src/main/java/io/anserini/index/AbstractIndexer.java
@@ -0,0 +1,338 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.index;
+
+import io.anserini.collection.DocumentCollection;
+import io.anserini.collection.FileSegment;
+import io.anserini.collection.SourceDocument;
+import io.anserini.index.generator.EmptyDocumentException;
+import io.anserini.index.generator.InvalidDocumentException;
+import io.anserini.index.generator.LuceneDocumentGenerator;
+import io.anserini.index.generator.SkippedDocumentException;
+import org.apache.commons.lang3.time.DurationFormatUtils;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.kohsuke.args4j.Option;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+
+public abstract class AbstractIndexer implements Runnable {
+  private static final Logger LOG = LogManager.getLogger(AbstractIndexer.class);
+
+  public static class Args {
+    @Option(name = "-collection", metaVar = "[class]", required = true, usage = "Collection class in io.anserini.collection.")
+    public String collectionClass;
+
+    @Option(name = "-input", metaVar = "[path]", required = true, usage = "Input collection.")
+    public String input;
+
+    @Option(name = "-index", metaVar = "[path]", required = true, usage = "Index path.")
+    public String index;
+
+    @Option(name = "-uniqueDocid", usage = "Removes duplicate documents with the same docid during indexing.")
+    public boolean uniqueDocid = false;
+
+    @Option(name = "-optimize", usage = "Optimizes index by merging into a single index segment.")
+    public boolean optimize = false;
+
+    @Option(name = "-memoryBuffer", metaVar = "[mb]", usage = "Memory buffer size in MB.")
+    public int memoryBuffer = 4096;
+
+    @Option(name = "-threads", metaVar = "[num]", usage = "Number of indexing threads.")
+    public int threads = 4;
+
+    @Option(name = "-verbose", forbids = {"-quiet"}, usage = "Enables verbose logging for each indexing thread.")
+    public boolean verbose = false;
+
+    @Option(name = "-quiet", forbids = {"-verbose"}, usage = "Turns off all logging.")
+    public boolean quiet = false;
+
+    @Option(name = "-options", usage = "Print information about options.")
+    public Boolean options = false;
+
+    @Option(name = "-shard.count", metaVar = "[n]",
+            usage = "Number of shards to partition the document collection into.")
+    public int shardCount = -1;
+
+    @Option(name = "-shard.current", metaVar = "[n]",
+            usage = "The current shard number to generate (indexed from 0).")
+    public int shardCurrent = -1;
+  }
+
+  public class IndexerThread extends Thread {
+    private final Path inputFile;
+    private final LuceneDocumentGenerator<SourceDocument> generator;
+    private final Set<String> whitelistDocids;
+
+    public IndexerThread(Path inputFile, LuceneDocumentGenerator<SourceDocument> generator) {
+      this(inputFile, generator, null);
+    }
+
+    public IndexerThread(Path inputFile, LuceneDocumentGenerator<SourceDocument> generator, Set<String> docids) {
+      this.inputFile = inputFile;
+      this.generator = generator;
+      this.whitelistDocids = docids;
+
+      setName(inputFile.getFileName().toString());
+    }
+
+    @Override
+    public void run() {
+      try(FileSegment<? extends SourceDocument> segment = collection.createFileSegment(inputFile)) {
+        // We keep track of two separate counts: the total count of documents in this file segment (cnt),
+        // and the number of documents in this current "batch" (batch). We update the global counter every
+        // 10k documents: this is so that we get intermediate updates, which is informative if a collection
+        // has only one file segment; see https://github.com/castorini/anserini/issues/683
+        int cnt = 0;
+        int batch = 0;
+
+        for (SourceDocument d : segment) {
+          if (!d.indexable()) {
+            counters.unindexable.incrementAndGet();
+            continue;
+          }
+
+          try {
+            if (whitelistDocids != null && !whitelistDocids.contains(d.id())) {
+              counters.skipped.incrementAndGet();
+              continue;
+            }
+
+            Document doc = generator.createDocument(d);
+            if (args.uniqueDocid) {
+              // Note that we're reading the config directly, which is within scope.
+              writer.updateDocument(new Term("id", d.id()), doc);
+            } else {
+              writer.addDocument(doc);
+            }
+
+            cnt++;
+            batch++;
+          } catch (EmptyDocumentException e1) {
+            counters.empty.incrementAndGet();
+            continue;
+          } catch (SkippedDocumentException e2) {
+            counters.skipped.incrementAndGet();
+            continue;
+          } catch (InvalidDocumentException e3) {
+            counters.errors.incrementAndGet();
+            continue;
+          }
+
+          // Add the counts from this batch, reset batch counter.
+          if (batch % 10000 == 0) {
+            counters.indexed.addAndGet(batch);
+            batch = 0;
+          }
+        }
+
+        // Add the remaining documents.
+        counters.indexed.addAndGet(batch);
+
+        int skipped = segment.getSkippedCount();
+        if (skipped > 0) {
+          counters.skipped.addAndGet(skipped);
+          LOG.warn(inputFile.getParent().getFileName().toString() + File.separator +
+              inputFile.getFileName().toString() + ": " + skipped + " docs skipped.");
+        }
+
+        if (segment.getErrorStatus()) {
+          counters.errors.incrementAndGet();
+          LOG.error(inputFile.getParent().getFileName().toString() + File.separator +
+              inputFile.getFileName().toString() + ": error iterating through segment.");
+        }
+
+        // Log at the debug level because this can be quite noisy if there are lots of file segments.
+        LOG.debug(inputFile.getParent().getFileName().toString() + File.separator +
+            inputFile.getFileName().toString() + ": " + cnt + " docs added.");
+      } catch (Exception e) {
+        LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e.getMessage());
+      }
+    }
+  }
+
+  protected final Args args;
+  protected Counters counters = new Counters();
+  protected Path collectionPath;
+  protected DocumentCollection<? extends SourceDocument> collection;
+  protected Class<LuceneDocumentGenerator<? extends SourceDocument>> generatorClass;
+  protected IndexWriter writer;
+
+  @SuppressWarnings("unchecked")
+  public AbstractIndexer(Args args) {
+    this.args = args;
+
+    if (args.verbose) {
+      // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages.
+      Configurator.setRootLevel(Level.DEBUG);
+      LOG.info("Setting log level to " + Level.DEBUG);
+    } else if (args.quiet) {
+      // If quiet mode enabled, only report warnings and above.
+      Configurator.setRootLevel(Level.WARN);
+    } else {
+      // Otherwise, we get the standard set of log messages.
+      Configurator.setRootLevel(Level.INFO);
+      LOG.info("Setting log level to " + Level.INFO);
+    }
+
+    LOG.info("============ Loading Index Configuration ============");
+    LOG.info("AbstractIndexer settings:");
+    LOG.info(" + DocumentCollection path: " + args.input);
+    LOG.info(" + CollectionClass: " + args.collectionClass);
+    LOG.info(" + Index path: " + args.index);
+    LOG.info(" + Threads: " + args.threads);
+    LOG.info(" + Optimize (merge segments)? " + args.optimize);
+
+    // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work,
+    // we assume collections/ as the path location.
+    String pathStr = args.input;
+    if (pathStr.startsWith("/path/to")) {
+      pathStr = pathStr.replace("/path/to", "collections");
+    }
+    this.collectionPath = Paths.get(pathStr);
+    if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) {
+      throw new IllegalArgumentException(String.format("Invalid collection path \"%s\".", collectionPath));
+    }
+
+    try {
+      Class<? extends DocumentCollection<?>> collectionClass = (Class<? extends DocumentCollection<?>>)
+          Class.forName("io.anserini.collection." + args.collectionClass);
+      this.collection = collectionClass.getConstructor(Path.class).newInstance(collectionPath);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(String.format("Unable to load collection class \"%s\".", args.collectionClass));
+    }
+  }
+
+  @Override
+  public void run() {
+    LOG.info("============ Indexing Collection ============");
+    final long start = System.nanoTime();
+
+    final List<Path> segmentPaths = args.shardCount > 1 ?
+            collection.getSegmentPaths(args.shardCount, args.shardCurrent) :
+            collection.getSegmentPaths();
+    final int segmentCnt = segmentPaths.size();
+
+    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.threads);
+    LOG.info(String.format("Thread pool with %s threads initialized.", args.threads));
+    LOG.info(String.format("%,d %s found in %s ", segmentCnt, (segmentCnt == 1 ? "file" : "files"), collectionPath));
+    LOG.info("Starting to index...");
+
+    // Dispatch to default method to process the segments; subclasses can override this method if desired.
+    processSegments(executor, segmentPaths);
+    executor.shutdown();
+
+    try {
+      // Wait for existing tasks to terminate.
+      while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
+        if (segmentCnt == 1) {
+          LOG.info(String.format("%,d documents indexed", counters.indexed.get()));
+        } else {
+          LOG.info(String.format("%.2f%% of files completed, %,d documents indexed",
+              (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get()));
+        }
+      }
+    } catch (InterruptedException ie) {
+      // (Re-)Cancel if current thread also interrupted.
+      executor.shutdownNow();
+      // Preserve interrupt status.
+      Thread.currentThread().interrupt();
+    }
+
+    if (segmentCnt != executor.getCompletedTaskCount()) {
+      throw new RuntimeException("totalFiles = " + segmentCnt +
+          " is not equal to completedTaskCount =  " + executor.getCompletedTaskCount());
+    }
+
+    long numIndexed = writer.getDocStats().maxDoc;
+    if (numIndexed != counters.indexed.get()) {
+      // We want to log a warning here, as opposed to throw an exception, because for certain collections,
+      // this might be expected. For example, when indexing tweets - if a tweet is delivered multiple times
+      // (i.e., same docid), with -uniqueDocid we're going to update the doc in the index in place, leading
+      // to differences between the counts.
+      LOG.warn(String.format("Unexpected difference between number of indexed documents (%d) and index maxDoc (%d).",
+          numIndexed, counters.indexed.get()));
+    }
+
+    // Do a final commit.
+    try {
+      writer.commit();
+      if (args.optimize) {
+        writer.forceMerge(1);
+      }
+    } catch (IOException e) {
+      // It is possible that this happens... but nothing much we can do at this point,
+      // so just log the error and move on.
+      LOG.error(e);
+    } finally {
+      try {
+        writer.close();
+      } catch (IOException e) {
+        // It is possible that this happens... but nothing much we can do at this point,
+        // so just log the error and move on.
+        LOG.error(e);
+      }
+    }
+
+    LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed));
+    LOG.info("============ Final Counter Values ============");
+    LOG.info(String.format("indexed:     %,12d", counters.indexed.get()));
+    LOG.info(String.format("unindexable: %,12d", counters.unindexable.get()));
+    LOG.info(String.format("empty:       %,12d", counters.empty.get()));
+    LOG.info(String.format("skipped:     %,12d", counters.skipped.get()));
+    LOG.info(String.format("errors:      %,12d", counters.errors.get()));
+
+    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
+    LOG.info(String.format("Total %,d documents indexed in %s", numIndexed,
+        DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss")));
+  }
+
+  // Default method to process the segments; subclasses can override this method if desired.
+  protected void processSegments(ThreadPoolExecutor executor, List<Path> segmentPaths) {
+    segmentPaths.forEach((segmentPath) -> {
+      try {
+        // Each thread gets its own document generator, so we don't need to make any assumptions about its thread safety.
+        @SuppressWarnings("unchecked")
+        LuceneDocumentGenerator<SourceDocument> generator = (LuceneDocumentGenerator<SourceDocument>)
+                generatorClass.getDeclaredConstructor((Class<?> []) null).newInstance();
+
+        executor.execute(new IndexerThread(segmentPath, generator));
+      } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
+        throw new IllegalArgumentException(String.format("Unable to load LuceneDocumentGenerator \"%s\".", generatorClass.getSimpleName()));
+      }
+    });
+  }
+
+  public Counters getCounters() {
+    return this.counters;
+  }
+}
diff --git a/src/main/java/io/anserini/index/IndexCollection.java b/src/main/java/io/anserini/index/IndexCollection.java
index e2d6baa996..f8775d92f0 100644
--- a/src/main/java/io/anserini/index/IndexCollection.java
+++ b/src/main/java/io/anserini/index/IndexCollection.java
@@ -17,95 +17,50 @@
 package io.anserini.index;
 
 import io.anserini.analysis.AnalyzerMap;
+import io.anserini.analysis.AutoCompositeAnalyzer;
 import io.anserini.analysis.CompositeAnalyzer;
-import io.anserini.analysis.HuggingFaceTokenizerAnalyzer;
 import io.anserini.analysis.DefaultEnglishAnalyzer;
-import io.anserini.analysis.AutoCompositeAnalyzer;
+import io.anserini.analysis.HuggingFaceTokenizerAnalyzer;
 import io.anserini.analysis.TweetAnalyzer;
-import io.anserini.collection.DocumentCollection;
-import io.anserini.collection.FileSegment;
 import io.anserini.collection.SourceDocument;
-import io.anserini.index.generator.EmptyDocumentException;
-import io.anserini.index.generator.InvalidDocumentException;
 import io.anserini.index.generator.LuceneDocumentGenerator;
-import io.anserini.index.generator.SkippedDocumentException;
 import io.anserini.search.similarity.AccurateBM25Similarity;
 import io.anserini.search.similarity.ImpactSimilarity;
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.time.DurationFormatUtils;
-import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.logging.log4j.core.config.Configurator;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.BM25Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
-import org.kohsuke.args4j.OptionHandlerFilter;
 import org.kohsuke.args4j.ParserProperties;
 import org.kohsuke.args4j.spi.StringArrayOptionHandler;
 
 import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
+import java.lang.reflect.InvocationTargetException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicLong;
 
-public final class IndexCollection {
+public final class IndexCollection extends AbstractIndexer {
   private static final Logger LOG = LogManager.getLogger(IndexCollection.class);
 
   // This is the default analyzer used, unless another stemming algorithm or language is specified.
   public static final Analyzer DEFAULT_ANALYZER = DefaultEnglishAnalyzer.newDefaultInstance();
 
-  public static class Args {
-
-    private static final int TIMEOUT = 600 * 1000;
-
-    // required arguments
-
-    @Option(name = "-input", metaVar = "[path]", required = true,
-        usage = "Location of input collection.")
-    public String input;
-
-    @Option(name = "-collection", metaVar = "[class]", required = true,
-        usage = "Collection class in package 'io.anserini.collection'.")
-    public String collectionClass;
-
-    @Option(name = "-index", metaVar = "[path]", usage = "Index path.", required = true)
-    public String index;
-
-    // optional general arguments
-
-    @Option(name = "-verbose", forbids = {"-quiet"},
-        usage = "Enables verbose logging for each indexing thread; can be noisy if collection has many small file segments.")
-    public boolean verbose = false;
-
-    @Option(name = "-quiet", forbids = {"-verbose"},
-        usage = "Turns off all logging.")
-    public boolean quiet = false;
-
-    // optional arguments
-
-    @Option(name = "-threads", metaVar = "[num]", usage = "Number of indexing threads.")
-    public int threads = 8;
-
+  public static class Args extends AbstractIndexer.Args {
     @Option(name = "-append", usage = "Append documents.")
     public boolean append = false;
 
@@ -133,10 +88,6 @@ public static class Args {
         usage = "Boolean switch to store raw source documents.")
     public boolean storeRaw = false;
 
-    @Option(name = "-optimize",
-        usage = "Boolean switch to optimize index (i.e., force merge) into a single segment; costly for large collections.")
-    public boolean optimize = false;
-
     @Option(name = "-keepStopwords",
         usage = "Boolean switch to keep stopwords.")
     public boolean keepStopwords = false;
@@ -149,15 +100,6 @@ public static class Args {
         usage = "Stemmer: one of the following {porter, krovetz, none}; defaults to 'porter'.")
     public String stemmer = "porter";
 
-    @Option(name = "-uniqueDocid",
-        usage = "Removes duplicate documents with the same docid during indexing. This significantly slows indexing throughput " +
-            "but may be needed for tweet collections since the streaming API might deliver a tweet multiple times.")
-    public boolean uniqueDocid = false;
-
-    @Option(name = "-memorybuffer", metaVar = "[mb]",
-        usage = "Memory buffer size (in MB).")
-    public int memorybufferSize = 2048;
-
     @Option(name = "-whitelist", metaVar = "[file]",
         usage = "File containing list of docids, one per line; only these docids will be indexed.")
     public String whitelist = null;
@@ -211,189 +153,35 @@ public static class Args {
     @Option(name = "-tweet.deletedIdsFile", metaVar = "[file]",
         usage = "File that contains deleted tweet ids (longs), one per line; these tweets will be skipped during indexing.")
     public String tweetDeletedIdsFile = "";
-
-    // Sharding options
-
-    @Option(name = "-shard.count", metaVar = "[n]",
-        usage = "Number of shards to partition the document collection into.")
-    public int shardCount = -1;
-
-    @Option(name = "-shard.current", metaVar = "[n]",
-        usage = "The current shard number to generate (indexed from 0).")
-    public int shardCurrent = -1;
-  }
-
-  private final class LocalIndexerThread extends Thread {
-    final private Path inputFile;
-    final private IndexWriter writer;
-    final private DocumentCollection collection;
-    private FileSegment fileSegment;
-
-    private LocalIndexerThread(IndexWriter writer, DocumentCollection collection, Path inputFile) {
-      this.writer = writer;
-      this.collection = collection;
-      this.inputFile = inputFile;
-      setName(inputFile.getFileName().toString());
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void run() {
-      try {
-        LuceneDocumentGenerator generator = (LuceneDocumentGenerator)
-            generatorClass.getDeclaredConstructor(Args.class).newInstance(args);
-
-        // We keep track of two separate counts: the total count of documents in this file segment (cnt),
-        // and the number of documents in this current "batch" (batch). We update the global counter every
-        // 10k documents: this is so that we get intermediate updates, which is informative if a collection
-        // has only one file segment; see https://github.com/castorini/anserini/issues/683
-        int cnt = 0;
-        int batch = 0;
-
-        FileSegment<SourceDocument> segment = collection.createFileSegment(inputFile);
-        // in order to call close() and clean up resources in case of exception
-        this.fileSegment = segment;
-
-        for (SourceDocument d : segment) {
-          if (!d.indexable()) {
-            counters.unindexable.incrementAndGet();
-            continue;
-          }
-
-          Document doc;
-          try {
-            doc = generator.createDocument(d);
-          } catch (EmptyDocumentException e1) {
-            counters.empty.incrementAndGet();
-            continue;
-          } catch (SkippedDocumentException e2) {
-            counters.skipped.incrementAndGet();
-            continue;
-          } catch (InvalidDocumentException e3) {
-            counters.errors.incrementAndGet();
-            continue;
-          }
-
-          if (whitelistDocids != null && !whitelistDocids.contains(d.id())) {
-            counters.skipped.incrementAndGet();
-            continue;
-          }
-
-          if (args.uniqueDocid) {
-            writer.updateDocument(new Term("id", d.id()), doc);
-          } else {
-            writer.addDocument(doc);
-          }
-          cnt++;
-          batch++;
-
-          // And the counts from this batch, reset batch counter.
-          if (batch % 10000 == 0) {
-            counters.indexed.addAndGet(batch);
-            batch = 0;
-          }
-        }
-
-        // Add the remaining documents.
-        counters.indexed.addAndGet(batch);
-
-        int skipped = segment.getSkippedCount();
-        if (skipped > 0) {
-          // When indexing tweets, this is normal, because there are delete messages that are skipped over.
-          counters.skipped.addAndGet(skipped);
-          LOG.warn(inputFile.getParent().getFileName().toString() + File.separator +
-              inputFile.getFileName().toString() + ": " + skipped + " docs skipped.");
-        }
-
-        if (segment.getErrorStatus()) {
-          counters.errors.incrementAndGet();
-          LOG.error(inputFile.getParent().getFileName().toString() + File.separator +
-              inputFile.getFileName().toString() + ": error iterating through segment.");
-        }
-
-        // Log at the debug level because this can be quite noisy if there are lots of file segments.
-        LOG.debug(inputFile.getParent().getFileName().toString() + File.separator +
-            inputFile.getFileName().toString() + ": " + cnt + " docs added.");
-      } catch (Exception e) {
-        LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e);
-      } finally {
-        if (fileSegment != null) {
-            fileSegment.close();
-        }
-      }
-    }
   }
 
-  private final Args args;
-  private final Path collectionPath;
-  private final Set whitelistDocids;
-  private final Class collectionClass;
-  private final Class generatorClass;
-  private final DocumentCollection collection;
-  private final Counters counters;
-  private Path indexPath;
+  private final Set<String> whitelistDocids;
 
   @SuppressWarnings("unchecked")
   public IndexCollection(Args args) throws Exception {
-    this.args = args;
-
-    if (args.verbose) {
-      // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages.
-      Configurator.setRootLevel(Level.DEBUG);
-      LOG.info("Setting log level to " + Level.DEBUG);
-    } else if (args.quiet) {
-      // If quiet mode enabled, only report warnings and above.
-      Configurator.setRootLevel(Level.WARN);
-    } else {
-      // Otherwise, we get the standard set of log messages.
-      Configurator.setRootLevel(Level.INFO);
-      LOG.info("Setting log level to " + Level.INFO);
-    }
-
-    LOG.info("Starting indexer...");
-    LOG.info("============ Loading Parameters ============");
-    LOG.info("DocumentCollection path: " + args.input);
-    LOG.info("CollectionClass: " + args.collectionClass);
-    LOG.info("Generator: " + args.generatorClass);
-    LOG.info("Threads: " + args.threads);
-    LOG.info("Language: " + args.language);
-    LOG.info("Stemmer: " + args.stemmer);
-    LOG.info("Keep stopwords? " + args.keepStopwords);
-    LOG.info("Stopwords: " + args.stopwords);
-    LOG.info("Store positions? " + args.storePositions);
-    LOG.info("Store docvectors? " + args.storeDocvectors);
-    LOG.info("Store document \"contents\" field? " + args.storeContents);
-    LOG.info("Store document \"raw\" field? " + args.storeRaw);
-    LOG.info("Additional fields to index: " + Arrays.toString(args.fields));
-    LOG.info("Optimize (merge segments)? " + args.optimize);
-    LOG.info("Whitelist: " + args.whitelist);
-    LOG.info("Pretokenized?: " + args.pretokenized);
-    LOG.info("Index path: " + args.index);
-
-    if (args.index != null) {
-      this.indexPath = Paths.get(args.index);
-      if (!Files.exists(this.indexPath)) {
-        Files.createDirectories(this.indexPath);
-      }
-    }
+    super(args);
+
+    LOG.info("IndexCollection settings:");
+    LOG.info(" + Generator: " + args.generatorClass);
+    LOG.info(" + Language: " + args.language);
+    LOG.info(" + Stemmer: " + args.stemmer);
+    LOG.info(" + Keep stopwords? " + args.keepStopwords);
+    LOG.info(" + Stopwords: " + args.stopwords);
+    LOG.info(" + Store positions? " + args.storePositions);
+    LOG.info(" + Store docvectors? " + args.storeDocvectors);
+    LOG.info(" + Store document \"contents\" field? " + args.storeContents);
+    LOG.info(" + Store document \"raw\" field? " + args.storeRaw);
+    LOG.info(" + Additional fields to index: " + Arrays.toString(args.fields));
+    LOG.info(" + Whitelist: " + args.whitelist);
+    LOG.info(" + Pretokenized?: " + args.pretokenized);
 
-    // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work, we assume
-    // collections/ as the path location.
-    String pathStr = args.input;
-    if (pathStr.startsWith("/path/to")) {
-      pathStr = pathStr.replace("/path/to", "collections");
-    }
-    collectionPath = Paths.get(pathStr);
-    if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) {
-      throw new RuntimeException("Document directory " + collectionPath.toString() + " does not exist or is not readable, please check the path");
+    try {
+      super.generatorClass = (Class<LuceneDocumentGenerator<? extends SourceDocument>>)
+              Class.forName("io.anserini.index.generator." + args.generatorClass);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(String.format("Unable to load generator class \"%s\".", args.generatorClass));
     }
 
-    this.generatorClass = Class.forName("io.anserini.index.generator." + args.generatorClass);
-    this.collectionClass = Class.forName("io.anserini.collection." + args.collectionClass);
-
-    // Initialize the collection.
-    collection = (DocumentCollection) this.collectionClass.getConstructor(Path.class).newInstance(collectionPath);
-
     if (args.whitelist != null) {
       List<String> lines = FileUtils.readLines(new File(args.whitelist), "utf-8");
       this.whitelistDocids = new HashSet<>(lines);
@@ -401,179 +189,110 @@ public IndexCollection(Args args) throws Exception {
       this.whitelistDocids = null;
     }
 
-    this.counters = new Counters();
+    final Directory dir = FSDirectory.open(Paths.get(args.index));
+    final IndexWriterConfig config = new IndexWriterConfig(getAnalyzer());
+
+    if (args.bm25Accurate) {
+      // Necessary during indexing as the norm used in BM25 is already determined at index time.
+      config.setSimilarity(new AccurateBM25Similarity());
+    } if (args.impact ) {
+      config.setSimilarity(new ImpactSimilarity());
+    } else {
+      config.setSimilarity(new BM25Similarity());
+    }
+    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+    config.setRAMBufferSizeMB(args.memoryBuffer);
+    config.setUseCompoundFile(false);
+    config.setMergeScheduler(new ConcurrentMergeScheduler());
+
+    super.writer = new IndexWriter(dir, config);
   }
 
   private Analyzer getAnalyzer() {
     try {
-      if (args.collectionClass.equals("TweetCollection")) {
-        return new TweetAnalyzer(args.tweetStemming);
-      } else if (args.useAutoCompositeAnalyzer) {
+      // args is stored in the super-class; here, explicitly get from super-class and down-cast.
+      Args castedArgs = (Args) super.args;
+      if (castedArgs.collectionClass.equals("TweetCollection")) {
+        return new TweetAnalyzer(castedArgs.tweetStemming);
+      } else if (castedArgs.useAutoCompositeAnalyzer) {
         LOG.info("Using AutoCompositeAnalyzer");
-        return AutoCompositeAnalyzer.getAnalyzer(args.language, args.analyzeWithHuggingFaceTokenizer);
-      } else if (args.useCompositeAnalyzer) {
+        return AutoCompositeAnalyzer.getAnalyzer(castedArgs.language, castedArgs.analyzeWithHuggingFaceTokenizer);
+      } else if (castedArgs.useCompositeAnalyzer) {
         final Analyzer languageSpecificAnalyzer;
-        if (AnalyzerMap.analyzerMap.containsKey(args.language)) {
-          languageSpecificAnalyzer = AnalyzerMap.getLanguageSpecificAnalyzer(args.language);
-        } else if (args.language.equals("en")) {
-          languageSpecificAnalyzer = DefaultEnglishAnalyzer.fromArguments(args.stemmer, args.keepStopwords, args.stopwords);
+        if (AnalyzerMap.analyzerMap.containsKey(castedArgs.language)) {
+          languageSpecificAnalyzer = AnalyzerMap.getLanguageSpecificAnalyzer(castedArgs.language);
+        } else if (castedArgs.language.equals("en")) {
+          languageSpecificAnalyzer = DefaultEnglishAnalyzer.fromArguments(castedArgs.stemmer, castedArgs.keepStopwords, castedArgs.stopwords);
         } else {
           languageSpecificAnalyzer = new WhitespaceAnalyzer();
         }
         String message = "Using CompositeAnalyzer with HF Tokenizer: %s & Analyzer %s";
-        LOG.info(String.format(message, args.analyzeWithHuggingFaceTokenizer, languageSpecificAnalyzer.getClass().getName()));
-        return new CompositeAnalyzer(args.analyzeWithHuggingFaceTokenizer, languageSpecificAnalyzer);
-      } else if (args.analyzeWithHuggingFaceTokenizer!= null) {
-        return new HuggingFaceTokenizerAnalyzer(args.analyzeWithHuggingFaceTokenizer);
-      } else if (AnalyzerMap.analyzerMap.containsKey(args.language)) {
+        LOG.info(String.format(message, castedArgs.analyzeWithHuggingFaceTokenizer, languageSpecificAnalyzer.getClass().getName()));
+        return new CompositeAnalyzer(castedArgs.analyzeWithHuggingFaceTokenizer, languageSpecificAnalyzer);
+      } else if (castedArgs.analyzeWithHuggingFaceTokenizer!= null) {
+        return new HuggingFaceTokenizerAnalyzer(castedArgs.analyzeWithHuggingFaceTokenizer);
+      } else if (AnalyzerMap.analyzerMap.containsKey(castedArgs.language)) {
         LOG.info("Using language-specific analyzer");
-        LOG.info("Language: " + args.language);
-        return AnalyzerMap.getLanguageSpecificAnalyzer(args.language);
-      } else if ( Arrays.asList("ha","so","sw","yo").contains(args.language)) {
+        LOG.info("Language: " + castedArgs.language);
+        return AnalyzerMap.getLanguageSpecificAnalyzer(castedArgs.language);
+      } else if ( Arrays.asList("ha","so","sw","yo").contains(castedArgs.language)) {
         return new WhitespaceAnalyzer();
-      } else if (args.pretokenized) {
+      } else if (castedArgs.pretokenized) {
         return new WhitespaceAnalyzer();
       } else {
         // Default to English
         LOG.info("Using DefaultEnglishAnalyzer");
-        LOG.info("Stemmer: " + args.stemmer);
-        LOG.info("Keep stopwords? " + args.keepStopwords);
-        LOG.info("Stopwords file: " + args.stopwords);
-        return DefaultEnglishAnalyzer.fromArguments(args.stemmer, args.keepStopwords, args.stopwords);
+        LOG.info("Stemmer: " + castedArgs.stemmer);
+        LOG.info("Keep stopwords? " + castedArgs.keepStopwords);
+        LOG.info("Stopwords file: " + castedArgs.stopwords);
+        return DefaultEnglishAnalyzer.fromArguments(castedArgs.stemmer, castedArgs.keepStopwords, castedArgs.stopwords);
       }
     } catch (Exception e) {
       return null;
     }
   }
 
-  public Counters run() throws IOException {
-    final long start = System.nanoTime();
-    LOG.info("============ Indexing Collection ============");
-
-    int numThreads = args.threads;
-    IndexWriter writer = null;
-
-    // Used for LocalIndexThread
-    if (indexPath != null) {
-      final Directory dir = FSDirectory.open(indexPath);
-      final IndexWriterConfig config;
-      final Analyzer analyzer;
-      analyzer = getAnalyzer();
-      config = new IndexWriterConfig(analyzer);
-
-      if (args.bm25Accurate) {
-        config.setSimilarity(new AccurateBM25Similarity()); // necessary during indexing as the norm used in BM25 is already determined at index time.
-      } if (args.impact ) {
-        config.setSimilarity(new ImpactSimilarity());
-      } else {
-        config.setSimilarity(new BM25Similarity());
-      }
-      config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
-      config.setRAMBufferSizeMB(args.memorybufferSize);
-      config.setUseCompoundFile(false);
-      config.setMergeScheduler(new ConcurrentMergeScheduler());
-
-      writer = new IndexWriter(dir, config);
-    }
-
-    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(numThreads);
-    LOG.info("Thread pool with " + numThreads + " threads initialized.");
-
-    LOG.info("Initializing collection in " + collectionPath.toString());
-
-    List<?> segmentPaths = collection.getSegmentPaths();
-    // when we want sharding to be done
-    if (args.shardCount > 1) {
-      segmentPaths = collection.getSegmentPaths(args.shardCount, args.shardCurrent);
-    }
-    final int segmentCnt = segmentPaths.size();
-
-    LOG.info(String.format("%,d %s found", segmentCnt, (segmentCnt == 1 ? "file" : "files" )));
-    LOG.info("Starting to index...");
-
-    for (int i = 0; i < segmentCnt; i++) {
-      executor.execute(new LocalIndexerThread(writer, collection, (Path) segmentPaths.get(i)));
-    }
-
-    executor.shutdown();
-
-    try {
-      // Wait for existing tasks to terminate
-      while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
-        if (segmentCnt == 1) {
-          LOG.info(String.format("%,d documents indexed", counters.indexed.get()));
-        } else {
-          LOG.info(String.format("%.2f%% of files completed, %,d documents indexed",
-              (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get()));
-        }
-      }
-    } catch (InterruptedException ie) {
-      // (Re-)Cancel if current thread also interrupted
-      executor.shutdownNow();
-      // Preserve interrupt status
-      Thread.currentThread().interrupt();
-    }
-
-    if (segmentCnt != executor.getCompletedTaskCount()) {
-      throw new RuntimeException("totalFiles = " + segmentCnt +
-          " is not equal to completedTaskCount =  " + executor.getCompletedTaskCount());
-    }
-
-    long numIndexed = writer.getDocStats().maxDoc;
-
-    // Do a final commit
-    try {
-      if (writer != null) {
-        writer.commit();
-        if (args.optimize) {
-          writer.forceMerge(1);
-        }
-      }
-    } finally {
+  protected void processSegments(ThreadPoolExecutor executor, List<Path> segmentPaths) {
+    segmentPaths.forEach((segmentPath) -> {
       try {
-        if (writer != null) {
-          writer.close();
-        }
-      } catch (IOException e) {
-        // It is possible that this happens... but nothing much we can do at this point,
-        // so just log the error and move on.
-        LOG.error(e);
+        // Each thread gets its own document generator, so we don't need to make any assumptions about its thread safety.
+        @SuppressWarnings("unchecked")
+        LuceneDocumentGenerator<SourceDocument> generator = (LuceneDocumentGenerator<SourceDocument>)
+                generatorClass.getDeclaredConstructor(Args.class).newInstance(this.args);
+
+        executor.execute(new AbstractIndexer.IndexerThread(segmentPath, generator, whitelistDocids));
+      } catch (InstantiationException | IllegalAccessException | InvocationTargetException | NoSuchMethodException e) {
+        throw new IllegalArgumentException(String.format("Unable to load LuceneDocumentGenerator \"%s\".", generatorClass.getSimpleName()));
       }
-    }
-
-    if (numIndexed != counters.indexed.get()) {
-      LOG.warn("Unexpected difference between number of indexed documents and index maxDoc.");
-    }
-
-    LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed));
-    LOG.info("============ Final Counter Values ============");
-    LOG.info(String.format("indexed:     %,12d", counters.indexed.get()));
-    LOG.info(String.format("unindexable: %,12d", counters.unindexable.get()));
-    LOG.info(String.format("empty:       %,12d", counters.empty.get()));
-    LOG.info(String.format("skipped:     %,12d", counters.skipped.get()));
-    LOG.info(String.format("errors:      %,12d", counters.errors.get()));
-
-    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
-    LOG.info(String.format("Total %,d documents indexed in %s", numIndexed,
-        DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss")));
-
-    return counters;
+    });
   }
 
   public static void main(String[] args) throws Exception {
-    Args indexCollectionArgs = new Args();
-    CmdLineParser parser = new CmdLineParser(indexCollectionArgs, ParserProperties.defaults().withUsageWidth(100));
+    Args indexArgs = new Args();
+    CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(120));
 
     try {
       parser.parseArgument(args);
     } catch (CmdLineException e) {
-      System.err.println(e.getMessage());
-      parser.printUsage(System.err);
-      System.err.println("Example: " + IndexCollection.class.getSimpleName() +
-          parser.printExample(OptionHandlerFilter.REQUIRED));
+      if (indexArgs.options) {
+        System.err.printf("Options for %s:\n\n", IndexCollection.class.getSimpleName());
+        parser.printUsage(System.err);
+
+        List<String> required = new ArrayList<>();
+        parser.getOptions().forEach((option) -> {
+          if (option.option.required()) {
+            required.add(option.option.toString());
+          }
+        });
+
+        System.err.printf("\nRequired options are %s\n", required);
+      } else {
+        System.err.printf("Error: %s. For help, use \"-options\" to print out information about options.\n", e.getMessage());
+      }
+
       return;
     }
 
-    new IndexCollection(indexCollectionArgs).run();
+    new IndexCollection(indexArgs).run();
   }
 }
diff --git a/src/main/java/io/anserini/index/IndexHnswDenseVectors.java b/src/main/java/io/anserini/index/IndexHnswDenseVectors.java
index 0dbd80a288..10e7c15640 100644
--- a/src/main/java/io/anserini/index/IndexHnswDenseVectors.java
+++ b/src/main/java/io/anserini/index/IndexHnswDenseVectors.java
@@ -16,18 +16,10 @@
 
 package io.anserini.index;
 
-import io.anserini.collection.DocumentCollection;
-import io.anserini.collection.FileSegment;
 import io.anserini.collection.SourceDocument;
-import io.anserini.index.generator.EmptyDocumentException;
-import io.anserini.index.generator.InvalidDocumentException;
 import io.anserini.index.generator.LuceneDocumentGenerator;
-import io.anserini.index.generator.SkippedDocumentException;
-import org.apache.commons.lang3.time.DurationFormatUtils;
-import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.logging.log4j.core.config.Configurator;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
@@ -44,215 +36,86 @@
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
-import org.kohsuke.args4j.OptionHandlerFilter;
 import org.kohsuke.args4j.ParserProperties;
 
-import java.io.File;
 import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.List;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
 
-public final class IndexHnswDenseVectors {
+public final class IndexHnswDenseVectors extends AbstractIndexer {
   private static final Logger LOG = LogManager.getLogger(IndexHnswDenseVectors.class);
 
-  public static final class Args {
-    @Option(name = "-collection", metaVar = "[class]", required = true, usage = "Collection class in io.anserini.collection.")
-    public String collectionClass;
-
-    @Option(name = "-input", metaVar = "[path]", required = true, usage = "Input collection.")
-    public String input;
-
+  public static final class Args extends AbstractIndexer.Args {
     @Option(name = "-generator", metaVar = "[class]", usage = "Document generator class in io.anserini.index.generator.")
     public String generatorClass = "HnswDenseVectorDocumentGenerator";
 
-    @Option(name = "-index", metaVar = "[path]", required = true, usage = "Index path.")
-    public String index;
-
     @Option(name = "-M", metaVar = "[num]", usage = "HNSW parameters M")
     public int M = 16;
   
     @Option(name = "-efC", metaVar = "[num]", usage = "HNSW parameters ef Construction")
     public int efC = 100;
 
-    @Option(name = "-optimize", usage = "Optimizes index by merging into a single index segment.")
-    public boolean optimize = false;
-
-    @Option(name = "-memorybuffer", metaVar = "[mb]", usage = "Memory buffer size in MB.")
-    public int memorybufferSize = 65536;
-
     @Option(name = "-storeVectors", usage = "Boolean switch to store raw raw vectors.")
     public boolean storeVectors = false;
-
-    @Option(name = "-threads", metaVar = "[num]", usage = "Number of indexing threads.")
-    public int threads = 4;
-
-    @Option(name = "-verbose", forbids = {"-quiet"}, usage = "Enables verbose logging for each indexing thread.")
-    public boolean verbose = false;
-
-    @Option(name = "-quiet", forbids = {"-verbose"}, usage = "Turns off all logging.")
-    public boolean quiet = false;
-  }
-
-  private final class LocalIndexerThread extends Thread {
-    final private Path inputFile;
-    final private IndexWriter writer;
-    final private DocumentCollection<? extends SourceDocument> collection;
-
-    private LocalIndexerThread(IndexWriter writer, DocumentCollection<? extends SourceDocument> collection, Path inputFile) {
-      this.writer = writer;
-      this.collection = collection;
-      this.inputFile = inputFile;
-      setName(inputFile.getFileName().toString());
-    }
-
-    @Override
-    public void run() {
-      FileSegment<? extends SourceDocument> segment = null;
-
-      try {
-        @SuppressWarnings("unchecked")
-        LuceneDocumentGenerator<SourceDocument> generator = (LuceneDocumentGenerator<SourceDocument>)
-            generatorClass.getDeclaredConstructor(Args.class).newInstance(args);
-
-        // We keep track of two separate counts: the total count of documents in this file segment (cnt),
-        // and the number of documents in this current "batch" (batch). We update the global counter every
-        // 10k documents: this is so that we get intermediate updates, which is informative if a collection
-        // has only one file segment; see https://github.com/castorini/anserini/issues/683
-        int cnt = 0;
-        int batch = 0;
-
-        segment = collection.createFileSegment(inputFile);
-
-        for (SourceDocument d : segment) {
-          if (!d.indexable()) {
-            counters.unindexable.incrementAndGet();
-            continue;
-          }
-
-          try {
-            writer.addDocument(generator.createDocument(d));
-
-            cnt++;
-            batch++;
-          } catch (EmptyDocumentException e1) {
-            counters.empty.incrementAndGet();
-            continue;
-          } catch (SkippedDocumentException e2) {
-            counters.skipped.incrementAndGet();
-            continue;
-          } catch (InvalidDocumentException e3) {
-            counters.errors.incrementAndGet();
-            continue;
-          }
-
-          // And the counts from this batch, reset batch counter.
-          if (batch % 10000 == 0) {
-            counters.indexed.addAndGet(batch);
-            batch = 0;
-          }
-        }
-
-        // Add the remaining documents.
-        counters.indexed.addAndGet(batch);
-
-        int skipped = segment.getSkippedCount();
-        if (skipped > 0) {
-          // When indexing tweets, this is normal, because there are delete messages that are skipped over.
-          counters.skipped.addAndGet(skipped);
-          LOG.warn(inputFile.getParent().getFileName().toString() + File.separator +
-              inputFile.getFileName().toString() + ": " + skipped + " docs skipped.");
-        }
-
-        if (segment.getErrorStatus()) {
-          counters.errors.incrementAndGet();
-          LOG.error(inputFile.getParent().getFileName().toString() + File.separator +
-              inputFile.getFileName().toString() + ": error iterating through segment.");
-        }
-
-        // Log at the debug level because this can be quite noisy if there are lots of file segments.
-        LOG.debug(inputFile.getParent().getFileName().toString() + File.separator +
-            inputFile.getFileName().toString() + ": " + cnt + " docs added.");
-      } catch (Exception e) {
-        LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e);
-      } finally {
-        segment.close();
-      }
-    }
   }
 
-  private final Args args;
-  private final Path collectionPath;
-  private final Class<LuceneDocumentGenerator<? extends SourceDocument>> generatorClass;
-  private final DocumentCollection<? extends SourceDocument> collection;
-  private final Counters counters;
-  private final Path indexPath;
-
   @SuppressWarnings("unchecked")
   public IndexHnswDenseVectors(Args args) throws Exception {
-    this.args = args;
+    super(args);
 
-    if (args.verbose) {
-      // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages.
-      Configurator.setRootLevel(Level.DEBUG);
-      LOG.info("Setting log level to " + Level.DEBUG);
-    } else if (args.quiet) {
-      // If quiet mode enabled, only report warnings and above.
-      Configurator.setRootLevel(Level.WARN);
-    } else {
-      // Otherwise, we get the standard set of log messages.
-      Configurator.setRootLevel(Level.INFO);
-      LOG.info("Setting log level to " + Level.INFO);
-    }
+    LOG.info("HnswIndexer settings:");
+    LOG.info(" + Generator: " + args.generatorClass);
+    LOG.info(" + M: " + args.M);
+    LOG.info(" + efC: " + args.efC);
+    LOG.info(" + Store document vectors? " + args.storeVectors);
 
-    LOG.info("Starting indexer...");
-    LOG.info("============ Loading Parameters ============");
-    LOG.info("DocumentCollection path: " + args.input);
-    LOG.info("CollectionClass: " + args.collectionClass);
-    LOG.info("Generator: " + args.generatorClass);
-    LOG.info("Threads: " + args.threads);
-    LOG.info("Store document vectors? " + args.storeVectors);
-    LOG.info("Optimize (merge segments)? " + args.optimize);
-    LOG.info("Index path: " + args.index);
-
-    this.indexPath = Paths.get(args.index);
-    if (!Files.exists(this.indexPath)) {
-        Files.createDirectories(this.indexPath);
-    }
-
-    // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work, we assume
-    // collections/ as the path location.
-    String pathStr = args.input;
-    if (pathStr.startsWith("/path/to")) {
-      pathStr = pathStr.replace("/path/to", "collections");
-    }
-    this.collectionPath = Paths.get(pathStr);
-    if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) {
-      throw new RuntimeException("Invalid collection path " + collectionPath + "!");
+    try {
+      super.generatorClass = (Class<LuceneDocumentGenerator<? extends SourceDocument>>)
+          Class.forName("io.anserini.index.generator." + args.generatorClass);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(String.format("Unable to load generator class \"%s\".", args.generatorClass));
     }
 
-    Class<? extends DocumentCollection<?>> collectionClass = (Class<? extends DocumentCollection<?>>)
-        Class.forName("io.anserini.collection." + args.collectionClass);
-    this.collection = collectionClass.getConstructor(Path.class).newInstance(collectionPath);
+    try {
+      final Directory dir = FSDirectory.open(Paths.get(args.index));
+      final IndexWriterConfig config = new IndexWriterConfig().setCodec(
+          new Lucene95Codec() {
+            @Override
+            public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+              return new DelegatingKnnVectorsFormat(
+                  new Lucene95HnswVectorsFormat(args.M, args.efC), 4096);
+            }
+          });
+
+      config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+      config.setRAMBufferSizeMB(args.memoryBuffer);
+      config.setUseCompoundFile(false);
+      config.setMergeScheduler(new ConcurrentMergeScheduler());
 
-    this.generatorClass = (Class<LuceneDocumentGenerator<? extends SourceDocument>>)
-        Class.forName("io.anserini.index.generator." + args.generatorClass);
+      if (args.optimize) {
+        // If we're going to merge down into a single segment at the end, skip intermediate merges,
+        // since they are a waste of time.
+        TieredMergePolicy mergePolicy = new TieredMergePolicy();
+        mergePolicy.setMaxMergeAtOnce(256);
+        mergePolicy.setSegmentsPerTier(256);
+        config.setMergePolicy(mergePolicy);
+      }
 
-    this.counters = new Counters();
+      this.writer = new IndexWriter(dir, config);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(String.format("Unable to create IndexWriter: %s.", e.getMessage()));
+    }
   }
 
   // Solution provided by Solr, see https://www.mail-archive.com/java-user@lucene.apache.org/msg52149.html
   // This class exists because Lucene95HnswVectorsFormat's getMaxDimensions method is final and we
   // need to workaround that constraint to allow more than the default number of dimensions.
-  private static final class OpenAiDelegatingKnnVectorsFormat extends KnnVectorsFormat {
+  private static final class DelegatingKnnVectorsFormat extends KnnVectorsFormat {
     private final KnnVectorsFormat delegate;
     private final int maxDimensions;
 
-    public OpenAiDelegatingKnnVectorsFormat(KnnVectorsFormat delegate, int maxDimensions) {
+    public DelegatingKnnVectorsFormat(KnnVectorsFormat delegate, int maxDimensions) {
       super(delegate.getName());
       this.delegate = delegate;
       this.maxDimensions = maxDimensions;
@@ -274,123 +137,32 @@ public int getMaxDimensions(String fieldName) {
     }
   }
 
-  public Counters run() throws IOException {
-    final long start = System.nanoTime();
-    LOG.info("============ Indexing Collection ============");
+  public static void main(String[] args) throws Exception {
+    Args indexArgs = new Args();
+    CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(120));
 
-    final Directory dir = FSDirectory.open(indexPath);
-    final IndexWriterConfig config = new IndexWriterConfig().setCodec(
-        new Lucene95Codec() {
-          @Override
-          public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
-            return new OpenAiDelegatingKnnVectorsFormat(
-                new Lucene95HnswVectorsFormat(args.M, args.efC), 4096);
+    try {
+      parser.parseArgument(args);
+    } catch (CmdLineException e) {
+      if (indexArgs.options) {
+        System.err.printf("Options for %s:\n\n", IndexHnswDenseVectors.class.getSimpleName());
+        parser.printUsage(System.err);
+
+        List<String> required = new ArrayList<>();
+        parser.getOptions().forEach((option) -> {
+          if (option.option.required()) {
+            required.add(option.option.toString());
           }
         });
 
-    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
-    config.setRAMBufferSizeMB(args.memorybufferSize);
-    config.setUseCompoundFile(false);
-    config.setMergeScheduler(new ConcurrentMergeScheduler());
-
-    if (args.optimize) {
-      // If we're going to merge down into a single segment at the end, skip intermediate merges,
-      // since they are a waste of time.
-      TieredMergePolicy mergePolicy = new TieredMergePolicy();
-      mergePolicy.setMaxMergeAtOnce(256);
-      mergePolicy.setSegmentsPerTier(256);
-      config.setMergePolicy(mergePolicy);
-    }
-
-    IndexWriter writer = new IndexWriter(dir, config);
-
-    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.threads);
-    LOG.info("Thread pool with " + args.threads + " threads initialized.");
-    LOG.info("Initializing collection in " + collectionPath.toString());
-
-    List<Path> segmentPaths = collection.getSegmentPaths();
-    final int segmentCnt = segmentPaths.size();
-
-    LOG.info(String.format("%,d %s found", segmentCnt, (segmentCnt == 1 ? "file" : "files" )));
-    LOG.info("Starting to index...");
-
-    segmentPaths.forEach((segmentPath) -> executor.execute(new LocalIndexerThread(writer, collection, segmentPath)));
-
-    executor.shutdown();
-
-    try {
-      // Wait for existing tasks to terminate
-      while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
-        if (segmentCnt == 1) {
-          LOG.info(String.format("%,d documents indexed", counters.indexed.get()));
-        } else {
-          LOG.info(String.format("%.2f%% of files completed, %,d documents indexed",
-              (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get()));
-        }
-      }
-    } catch (InterruptedException ie) {
-      // (Re-)Cancel if current thread also interrupted
-      executor.shutdownNow();
-      // Preserve interrupt status
-      Thread.currentThread().interrupt();
-    }
-
-    if (segmentCnt != executor.getCompletedTaskCount()) {
-      throw new RuntimeException("totalFiles = " + segmentCnt +
-          " is not equal to completedTaskCount =  " + executor.getCompletedTaskCount());
-    }
-
-    long numIndexed = writer.getDocStats().maxDoc;
-
-    // Do a final commit
-    try {
-      writer.commit();
-      if (args.optimize) {
-        writer.forceMerge(1);
-      }
-    } finally {
-      try {
-        writer.close();
-      } catch (IOException e) {
-        // It is possible that this happens... but nothing much we can do at this point,
-        // so just log the error and move on.
-        LOG.error(e);
+        System.err.printf("\nRequired options are %s\n", required);
+      } else {
+        System.err.printf("Error: %s. For help, use \"-options\" to print out information about options.\n", e.getMessage());
       }
-    }
-
-    if (numIndexed != counters.indexed.get()) {
-      LOG.warn("Unexpected difference between number of indexed documents and index maxDoc.");
-    }
 
-    LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed));
-    LOG.info("============ Final Counter Values ============");
-    LOG.info(String.format("indexed:     %,12d", counters.indexed.get()));
-    LOG.info(String.format("unindexable: %,12d", counters.unindexable.get()));
-    LOG.info(String.format("empty:       %,12d", counters.empty.get()));
-    LOG.info(String.format("skipped:     %,12d", counters.skipped.get()));
-    LOG.info(String.format("errors:      %,12d", counters.errors.get()));
-
-    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
-    LOG.info(String.format("Total %,d documents indexed in %s", numIndexed,
-        DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss")));
-
-    return counters;
-  }
-
-  public static void main(String[] args) throws Exception {
-    Args indexCollectionArgs = new Args();
-    CmdLineParser parser = new CmdLineParser(indexCollectionArgs, ParserProperties.defaults().withUsageWidth(100));
-
-    try {
-      parser.parseArgument(args);
-    } catch (CmdLineException e) {
-      System.err.println(e.getMessage());
-      parser.printUsage(System.err);
-      System.err.println("Example: " + IndexHnswDenseVectors.class.getSimpleName() +
-          parser.printExample(OptionHandlerFilter.REQUIRED));
       return;
     }
 
-    new IndexHnswDenseVectors(indexCollectionArgs).run();
+    new IndexHnswDenseVectors(indexArgs).run();
   }
 }
diff --git a/src/main/java/io/anserini/index/IndexInvertedDenseVectors.java b/src/main/java/io/anserini/index/IndexInvertedDenseVectors.java
index 54ed6b560d..e3a0c8e810 100644
--- a/src/main/java/io/anserini/index/IndexInvertedDenseVectors.java
+++ b/src/main/java/io/anserini/index/IndexInvertedDenseVectors.java
@@ -18,18 +18,10 @@
 
 import io.anserini.analysis.fw.FakeWordsEncoderAnalyzer;
 import io.anserini.analysis.lexlsh.LexicalLshAnalyzer;
-import io.anserini.collection.DocumentCollection;
-import io.anserini.collection.FileSegment;
 import io.anserini.collection.SourceDocument;
-import io.anserini.index.generator.EmptyDocumentException;
-import io.anserini.index.generator.InvalidDocumentException;
 import io.anserini.index.generator.LuceneDocumentGenerator;
-import io.anserini.index.generator.SkippedDocumentException;
-import org.apache.commons.lang3.time.DurationFormatUtils;
-import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.apache.logging.log4j.core.config.Configurator;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -42,41 +34,24 @@
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
-import org.kohsuke.args4j.OptionHandlerFilter;
 import org.kohsuke.args4j.ParserProperties;
 
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.Executors;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
 
-
-public final class IndexInvertedDenseVectors {
+public final class IndexInvertedDenseVectors extends AbstractIndexer {
   private static final Logger LOG = LogManager.getLogger(IndexInvertedDenseVectors.class);
 
   public static final String FW = "fw";
   public static final String LEXLSH = "lexlsh";
 
-  public static final class Args {
-    @Option(name = "-collection", metaVar = "[class]", required = true, usage = "Collection class in io.anserini.collection.")
-    public String collectionClass;
-
-    @Option(name = "-input", metaVar = "[path]", required = true, usage = "Input collection.")
-    public String input;
-
+  public static final class Args extends AbstractIndexer.Args {
     @Option(name = "-generator", metaVar = "[class]", usage = "Document generator class in io.anserini.index.generator.")
     public String generatorClass = "InvertedDenseVectorDocumentGenerator";
 
-    @Option(name = "-index", metaVar = "[path]", required = true, usage = "Index path.")
-    public String index;
-
     @Option(name = "-encoding", metaVar = "[word]", usage = "Encoding method: {'fw', 'lexlsh'}.")
     public String encoding = FW;
 
@@ -97,281 +72,75 @@ public static final class Args {
 
     @Option(name = "-lexlsh.b", metaVar = "[int]", usage = "LexLSH encoding: bucket count.")
     public int bucketCount = 300;
-
-    @Option(name = "-optimize", usage = "Optimizes index by merging into a single index segment.")
-    public boolean optimize = false;
-
-    @Option(name = "-memorybuffer", metaVar = "[mb]", usage = "Memory buffer size in MB.")
-    public int memorybufferSize = 4096;
-
-    @Option(name = "-threads", metaVar = "[num]", usage = "Number of indexing threads.")
-    public int threads = 4;
-
-    @Option(name = "-verbose", forbids = {"-quiet"}, usage = "Enables verbose logging for each indexing thread.")
-    public boolean verbose = false;
-
-    @Option(name = "-quiet", forbids = {"-verbose"}, usage = "Turns off all logging.")
-    public boolean quiet = false;
-  }
-
-  private final class LocalIndexerThread extends Thread {
-    final private Path inputFile;
-    final private IndexWriter writer;
-    final private DocumentCollection<? extends SourceDocument> collection;
-
-    private LocalIndexerThread(IndexWriter writer, DocumentCollection<? extends SourceDocument> collection, Path inputFile) {
-      this.writer = writer;
-      this.collection = collection;
-      this.inputFile = inputFile;
-      setName(inputFile.getFileName().toString());
-    }
-
-    @Override
-    public void run() {
-      FileSegment<? extends SourceDocument> segment = null;
-
-      try {
-        @SuppressWarnings("unchecked")
-        LuceneDocumentGenerator<SourceDocument> generator = (LuceneDocumentGenerator<SourceDocument>)
-            generatorClass.getDeclaredConstructor(Args.class).newInstance(args);
-
-        // We keep track of two separate counts: the total count of documents in this file segment (cnt),
-        // and the number of documents in this current "batch" (batch). We update the global counter every
-        // 10k documents: this is so that we get intermediate updates, which is informative if a collection
-        // has only one file segment; see https://github.com/castorini/anserini/issues/683
-        int cnt = 0;
-        int batch = 0;
-
-        segment = collection.createFileSegment(inputFile);
-
-        for (SourceDocument d : segment) {
-          if (!d.indexable()) {
-            counters.unindexable.incrementAndGet();
-            continue;
-          }
-
-          try {
-            writer.addDocument(generator.createDocument(d));
-
-            cnt++;
-            batch++;
-          } catch (EmptyDocumentException e1) {
-            counters.empty.incrementAndGet();
-            continue;
-          } catch (SkippedDocumentException e2) {
-            counters.skipped.incrementAndGet();
-            continue;
-          } catch (InvalidDocumentException e3) {
-            counters.errors.incrementAndGet();
-            continue;
-          }
-
-          // Add the counts from this batch, reset batch counter.
-          if (batch % 10000 == 0) {
-            counters.indexed.addAndGet(batch);
-            batch = 0;
-          }
-        }
-
-        // Add the remaining documents.
-        counters.indexed.addAndGet(batch);
-
-        int skipped = segment.getSkippedCount();
-        if (skipped > 0) {
-          counters.skipped.addAndGet(skipped);
-          LOG.warn(inputFile.getParent().getFileName().toString() + File.separator +
-              inputFile.getFileName().toString() + ": " + skipped + " docs skipped.");
-        }
-
-        if (segment.getErrorStatus()) {
-          counters.errors.incrementAndGet();
-          LOG.error(inputFile.getParent().getFileName().toString() + File.separator +
-              inputFile.getFileName().toString() + ": error iterating through segment.");
-        }
-
-        // Log at the debug level because this can be quite noisy if there are lots of file segments.
-        LOG.debug(inputFile.getParent().getFileName().toString() + File.separator +
-            inputFile.getFileName().toString() + ": " + cnt + " docs added.");
-      } catch (Exception e) {
-        LOG.error(Thread.currentThread().getName() + ": Unexpected Exception:", e);
-      } finally {
-        segment.close();
-      }
-    }
   }
 
-  private final Args args;
-  private final Path collectionPath;
-  private final Class<LuceneDocumentGenerator<? extends SourceDocument>> generatorClass;
-  private final DocumentCollection<? extends SourceDocument> collection;
-  private final Counters counters;
-  private final Path indexPath;
-
   @SuppressWarnings("unchecked")
-  public IndexInvertedDenseVectors(Args args) throws Exception {
-    this.args = args;
-
-    if (args.verbose) {
-      // If verbose logging enabled, changed default log level to DEBUG so we get per-thread logging messages.
-      Configurator.setRootLevel(Level.DEBUG);
-      LOG.info("Setting log level to " + Level.DEBUG);
-    } else if (args.quiet) {
-      // If quiet mode enabled, only report warnings and above.
-      Configurator.setRootLevel(Level.WARN);
-    } else {
-      // Otherwise, we get the standard set of log messages.
-      Configurator.setRootLevel(Level.INFO);
-      LOG.info("Setting log level to " + Level.INFO);
-    }
+  public IndexInvertedDenseVectors(Args args) {
+    super(args);
 
-    LOG.info("Starting indexer...");
-    LOG.info("============ Loading Parameters ============");
-    LOG.info("Collection class: " + args.collectionClass);
-    LOG.info("Collection path: " + args.input);
-    LOG.info("Generator: " + args.generatorClass);
-    LOG.info("Index path: " + args.index);
-    LOG.info("Encoding: " + args.encoding);
-    LOG.info("Threads: " + args.threads);
-    LOG.info("Optimize? " + args.optimize);
+    LOG.info("InvertedDenseIndexer settings:");
+    LOG.info(" + Generator: " + args.generatorClass);
+    LOG.info(" + Encoding: " + args.encoding);
 
-    this.indexPath = Paths.get(args.index);
-    if (!Files.exists(this.indexPath)) {
-      Files.createDirectories(this.indexPath);
-    }
-
-    // Our documentation uses /path/to/foo as a convention: to make copy and paste of the commands work,
-    // we assume collections/ as the path location.
-    String pathStr = args.input;
-    if (pathStr.startsWith("/path/to")) {
-      pathStr = pathStr.replace("/path/to", "collections");
-    }
-    this.collectionPath = Paths.get(pathStr);
-    if (!Files.exists(collectionPath) || !Files.isReadable(collectionPath) || !Files.isDirectory(collectionPath)) {
-      throw new RuntimeException("Invalid collection path " + collectionPath + "!");
+    try {
+      super.generatorClass = (Class<LuceneDocumentGenerator<? extends SourceDocument>>)
+          Class.forName("io.anserini.index.generator." + args.generatorClass);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(String.format("Unable to load generator class \"%s\".", args.generatorClass));
     }
 
-    Class<? extends DocumentCollection<?>> collectionClass = (Class<? extends DocumentCollection<?>>)
-        Class.forName("io.anserini.collection." + args.collectionClass);
-    this.collection = collectionClass.getConstructor(Path.class).newInstance(collectionPath);
-
-    this.generatorClass = (Class<LuceneDocumentGenerator<? extends SourceDocument>>)
-        Class.forName("io.anserini.index.generator." + args.generatorClass);
-
-    this.counters = new Counters();
-  }
-
-  public Counters run() throws IOException {
-    LOG.info("============ Indexing Collection ============");
-    final long start = System.nanoTime();
-
     Analyzer vectorAnalyzer;
     if (args.encoding.equalsIgnoreCase(FW)) {
       vectorAnalyzer = new FakeWordsEncoderAnalyzer(args.q);
     } else if (args.encoding.equalsIgnoreCase(LEXLSH)) {
       vectorAnalyzer = new LexicalLshAnalyzer(args.decimals, args.ngrams, args.hashCount, args.bucketCount, args.hashSetSize);
     } else {
-      throw new RuntimeException("Invalid encoding scheme!");
+      throw new IllegalArgumentException("Invalid encoding scheme.");
     }
 
     Map<String, Analyzer> map = new HashMap<>();
     map.put(Constants.VECTOR, vectorAnalyzer);
     Analyzer analyzer = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), map);
 
-    final Directory dir = FSDirectory.open(indexPath);
-    final IndexWriterConfig config = new IndexWriterConfig(analyzer).setCodec(new Lucene95Codec());
-    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
-    config.setRAMBufferSizeMB(args.memorybufferSize);
-    config.setUseCompoundFile(false);
-    config.setMergeScheduler(new ConcurrentMergeScheduler());
-    IndexWriter writer = new IndexWriter(dir, config);
-
-    final ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(args.threads);
-    LOG.info("Thread pool with " + args.threads + " threads initialized.");
-    LOG.info("Initializing collection in " + collectionPath);
-
-    List<Path> segmentPaths = collection.getSegmentPaths();
-    final int segmentCnt = segmentPaths.size();
-
-    LOG.info(String.format("%,d %s found", segmentCnt, (segmentCnt == 1 ? "file" : "files")));
-    LOG.info("Starting to index...");
-
-    segmentPaths.forEach((segmentPath) -> executor.execute(new LocalIndexerThread(writer, collection, segmentPath)));
-
-    executor.shutdown();
-
     try {
-      // Wait for existing tasks to terminate.
-      while (!executor.awaitTermination(1, TimeUnit.MINUTES)) {
-        if (segmentCnt == 1) {
-          LOG.info(String.format("%,d documents indexed", counters.indexed.get()));
-        } else {
-          LOG.info(String.format("%.2f%% of files completed, %,d documents indexed",
-              (double) executor.getCompletedTaskCount() / segmentCnt * 100.0d, counters.indexed.get()));
-        }
-      }
-    } catch (InterruptedException ie) {
-      // (Re-)Cancel if current thread also interrupted.
-      executor.shutdownNow();
-      // Preserve interrupt status.
-      Thread.currentThread().interrupt();
+      final Directory dir = FSDirectory.open(Paths.get(args.index));
+      final IndexWriterConfig config = new IndexWriterConfig(analyzer).setCodec(new Lucene95Codec());
+      config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
+      config.setRAMBufferSizeMB(args.memoryBuffer);
+      config.setUseCompoundFile(false);
+      config.setMergeScheduler(new ConcurrentMergeScheduler());
+      this.writer = new IndexWriter(dir, config);
+    } catch (Exception e) {
+      throw new IllegalArgumentException(String.format("Unable to create IndexWriter: %s.", e.getMessage()));
     }
-
-    if (segmentCnt != executor.getCompletedTaskCount()) {
-      throw new RuntimeException("totalFiles = " + segmentCnt +
-          " is not equal to completedTaskCount =  " + executor.getCompletedTaskCount());
-    }
-
-    long numIndexed = writer.getDocStats().maxDoc;
-
-    // Do a final commit.
-    try {
-      writer.commit();
-      if (args.optimize) {
-        writer.forceMerge(1);
-      }
-    } finally {
-      try {
-        writer.close();
-      } catch (IOException e) {
-        // It is possible that this happens... but nothing much we can do at this point,
-        // so just log the error and move on.
-        LOG.error(e);
-      }
-    }
-
-    if (numIndexed != counters.indexed.get()) {
-      LOG.warn("Unexpected difference between number of indexed documents and index maxDoc.");
-    }
-
-    LOG.info(String.format("Indexing Complete! %,d documents indexed", numIndexed));
-    LOG.info("============ Final Counter Values ============");
-    LOG.info(String.format("indexed:     %,12d", counters.indexed.get()));
-    LOG.info(String.format("unindexable: %,12d", counters.unindexable.get()));
-    LOG.info(String.format("empty:       %,12d", counters.empty.get()));
-    LOG.info(String.format("skipped:     %,12d", counters.skipped.get()));
-    LOG.info(String.format("errors:      %,12d", counters.errors.get()));
-
-    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
-    LOG.info(String.format("Total %,d documents indexed in %s", numIndexed,
-        DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss")));
-
-    return counters;
   }
 
   public static void main(String[] args) throws Exception {
-    Args indexCollectionArgs = new Args();
-    CmdLineParser parser = new CmdLineParser(indexCollectionArgs, ParserProperties.defaults().withUsageWidth(100));
+    Args indexArgs = new Args();
+    CmdLineParser parser = new CmdLineParser(indexArgs, ParserProperties.defaults().withUsageWidth(120));
 
     try {
       parser.parseArgument(args);
     } catch (CmdLineException e) {
-      System.err.println(e.getMessage());
-      parser.printUsage(System.err);
-      System.err.println("Example: " + IndexInvertedDenseVectors.class.getSimpleName() +
-          parser.printExample(OptionHandlerFilter.REQUIRED));
+      if (indexArgs.options) {
+        System.err.printf("Options for %s:\n\n", IndexInvertedDenseVectors.class.getSimpleName());
+        parser.printUsage(System.err);
+
+        List<String> required = new ArrayList<>();
+        parser.getOptions().forEach((option) -> {
+          if (option.option.required()) {
+            required.add(option.option.toString());
+          }
+        });
+
+        System.err.printf("\nRequired options are %s\n", required);
+      } else {
+        System.err.printf("Error: %s. For help, use \"-options\" to print out information about options.\n", e.getMessage());
+      }
+
       return;
     }
 
-    new IndexInvertedDenseVectors(indexCollectionArgs).run();
+    new IndexInvertedDenseVectors(indexArgs).run();
   }
-}
\ No newline at end of file
+}
diff --git a/src/main/java/io/anserini/index/SimpleIndexer.java b/src/main/java/io/anserini/index/SimpleIndexer.java
index a4e78632f7..d3a6302afc 100644
--- a/src/main/java/io/anserini/index/SimpleIndexer.java
+++ b/src/main/java/io/anserini/index/SimpleIndexer.java
@@ -20,12 +20,10 @@
 import io.anserini.analysis.AnalyzerMap;
 import io.anserini.analysis.DefaultEnglishAnalyzer;
 import io.anserini.analysis.HuggingFaceTokenizerAnalyzer;
-import io.anserini.collection.FileSegment;
 import io.anserini.collection.JsonCollection;
 import io.anserini.index.IndexCollection.Args;
 import io.anserini.index.generator.GeneratorException;
 import io.anserini.index.generator.LuceneDocumentGenerator;
-import org.apache.commons.lang3.time.DurationFormatUtils;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.analysis.Analyzer;
@@ -37,8 +35,6 @@
 import org.apache.lucene.store.FSDirectory;
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
-import org.kohsuke.args4j.OptionHandlerFilter;
-import org.kohsuke.args4j.ParserProperties;
 
 import java.io.IOException;
 import java.nio.file.Files;
diff --git a/src/main/java/io/anserini/index/generator/HnswDenseVectorDocumentGenerator.java b/src/main/java/io/anserini/index/generator/HnswDenseVectorDocumentGenerator.java
index 958425361c..86ccde63d0 100644
--- a/src/main/java/io/anserini/index/generator/HnswDenseVectorDocumentGenerator.java
+++ b/src/main/java/io/anserini/index/generator/HnswDenseVectorDocumentGenerator.java
@@ -40,18 +40,7 @@
  * @param <T> type of the source document
  */
 public class HnswDenseVectorDocumentGenerator<T extends SourceDocument> implements LuceneDocumentGenerator<T> {
-  protected IndexHnswDenseVectors.Args args;
-
-  protected HnswDenseVectorDocumentGenerator() {
-  }
-
-  /**
-   * Constructor with config and counters
-   *
-   * @param args configuration arguments
-   */
-  public HnswDenseVectorDocumentGenerator(IndexHnswDenseVectors.Args args) {
-    this.args = args;
+  public HnswDenseVectorDocumentGenerator() {
   }
 
   private float[] convertJsonArray(String vectorString) throws JsonProcessingException {
@@ -87,9 +76,7 @@ public Document createDocument(T src) throws InvalidDocumentException {
     document.add(new BinaryDocValuesField(Constants.ID, new BytesRef(id)));
 
     document.add(new KnnFloatVectorField(Constants.VECTOR, contents, VectorSimilarityFunction.DOT_PRODUCT));
-    if (args.storeVectors) {
-      document.add(new StoredField(Constants.RAW, src.raw()));
-    }
+
     return document;
   }
 }
diff --git a/src/main/java/io/anserini/index/generator/InvertedDenseVectorDocumentGenerator.java b/src/main/java/io/anserini/index/generator/InvertedDenseVectorDocumentGenerator.java
index a6ab22a0f4..487f296262 100644
--- a/src/main/java/io/anserini/index/generator/InvertedDenseVectorDocumentGenerator.java
+++ b/src/main/java/io/anserini/index/generator/InvertedDenseVectorDocumentGenerator.java
@@ -37,18 +37,7 @@
  * @param <T> type of the source document
  */
 public class InvertedDenseVectorDocumentGenerator<T extends SourceDocument> implements LuceneDocumentGenerator<T> {
-  protected IndexInvertedDenseVectors.Args args;
-
-  protected InvertedDenseVectorDocumentGenerator() {
-  }
-
-  /**
-   * Constructor with config and counters
-   *
-   * @param args configuration arguments
-   */
-  public InvertedDenseVectorDocumentGenerator(IndexInvertedDenseVectors.Args args) {
-    this.args = args;
+  public InvertedDenseVectorDocumentGenerator() {
   }
 
   private float[] convertJsonArray(String vectorString) throws JsonProcessingException {
diff --git a/src/main/java/io/anserini/search/SearchHnswDenseVectors.java b/src/main/java/io/anserini/search/SearchHnswDenseVectors.java
index f2b599e156..dd54d5c566 100644
--- a/src/main/java/io/anserini/search/SearchHnswDenseVectors.java
+++ b/src/main/java/io/anserini/search/SearchHnswDenseVectors.java
@@ -36,7 +36,6 @@
 import org.kohsuke.args4j.CmdLineException;
 import org.kohsuke.args4j.CmdLineParser;
 import org.kohsuke.args4j.Option;
-import org.kohsuke.args4j.OptionHandlerFilter;
 import org.kohsuke.args4j.ParserProperties;
 import org.kohsuke.args4j.spi.StringArrayOptionHandler;
 
@@ -235,7 +234,6 @@ public void close() throws IOException {
     reader.close();
   }
 
-  @SuppressWarnings("unchecked")
   @Override
   public void run() {
     LOG.info("============ Launching Search Threads ============");
@@ -343,20 +341,13 @@ public static void main(String[] args) throws Exception {
       return;
     }
 
-    final long start = System.nanoTime();
-
     // We're at top-level already inside a main; makes no sense to propagate exceptions further, so reformat the
     // exception messages and display on console.
-    try {
-      SearchHnswDenseVectors searcher = new SearchHnswDenseVectors(searchArgs);
+    try (SearchHnswDenseVectors searcher = new SearchHnswDenseVectors(searchArgs)) {
       searcher.run();
-      searcher.close();
     } catch (IllegalArgumentException e) {
       System.err.printf("Error: %s\n", e.getMessage());
       return;
     }
-
-    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
-    LOG.info("Total run time: " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
   }
 }
diff --git a/src/main/java/io/anserini/search/SearchInvertedDenseVectors.java b/src/main/java/io/anserini/search/SearchInvertedDenseVectors.java
index 1e696eaf22..0a70e36c14 100644
--- a/src/main/java/io/anserini/search/SearchInvertedDenseVectors.java
+++ b/src/main/java/io/anserini/search/SearchInvertedDenseVectors.java
@@ -327,20 +327,13 @@ public static void main(String[] args) throws Exception {
       return;
     }
 
-    final long start = System.nanoTime();
-
     // We're at top-level already inside a main; makes no sense to propagate exceptions further, so reformat the
     // exception messages and display on console.
-    try {
-      SearchInvertedDenseVectors searcher = new SearchInvertedDenseVectors(searchArgs);
+    try (SearchInvertedDenseVectors searcher = new SearchInvertedDenseVectors(searchArgs)) {
       searcher.run();
-      searcher.close();
     } catch (IllegalArgumentException e) {
       System.err.printf("Error: %s\n", e.getMessage());
       return;
     }
-
-    final long durationMillis = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
-    LOG.info("Total run time: " + DurationFormatUtils.formatDuration(durationMillis, "HH:mm:ss"));
   }
 }
\ No newline at end of file
diff --git a/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw-onnx.yaml b/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw-onnx.yaml
index 416e8efc63..31966a5457 100644
--- a/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw-onnx.yaml
+++ b/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw-onnx.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw.yaml b/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw.yaml
index 67abbbf149..8b38c5c1b2 100644
--- a/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw.yaml
+++ b/src/main/resources/regression/dl19-passage-cos-dpr-distil-hnsw.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/dl19-passage-openai-ada2.yaml b/src/main/resources/regression/dl19-passage-openai-ada2.yaml
index ea0701a5d0..9667d0907c 100644
--- a/src/main/resources/regression/dl19-passage-openai-ada2.yaml
+++ b/src/main/resources/regression/dl19-passage-openai-ada2.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100 -memorybuffer 65536
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw-onnx.yaml b/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw-onnx.yaml
index 15daab5abc..d2e0f89991 100644
--- a/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw-onnx.yaml
+++ b/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw-onnx.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw.yaml b/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw.yaml
index ba016059d6..f120eadd61 100644
--- a/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw.yaml
+++ b/src/main/resources/regression/dl20-passage-cos-dpr-distil-hnsw.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/dl20-passage-openai-ada2.yaml b/src/main/resources/regression/dl20-passage-openai-ada2.yaml
index e151b4f91b..152d18765c 100644
--- a/src/main/resources/regression/dl20-passage-openai-ada2.yaml
+++ b/src/main/resources/regression/dl20-passage-openai-ada2.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100 -memorybuffer 65536
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/mb11.yaml b/src/main/resources/regression/mb11.yaml
index 4a1a15e1f2..84db5dd7e7 100644
--- a/src/main/resources/regression/mb11.yaml
+++ b/src/main/resources/regression/mb11.yaml
@@ -6,7 +6,7 @@ index_path: indexes/lucene-index.mb11/
 collection_class: TweetCollection
 generator_class: TweetGenerator
 index_threads: 44
-index_options: -storePositions -storeDocvectors -storeRaw -uniqueDocid -tweet.keepUrls -tweet.stemming
+index_options: -storePositions -storeDocvectors -storeRaw -tweet.keepUrls -tweet.stemming
 index_stats:
   documents: 14950477
   documents (non-empty): 14950449
diff --git a/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw-onnx.yaml b/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw-onnx.yaml
index b333652350..4746f389f3 100644
--- a/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw-onnx.yaml
+++ b/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw-onnx.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100 
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw.yaml b/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw.yaml
index 65ffa3502d..c63c3b7972 100644
--- a/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw.yaml
+++ b/src/main/resources/regression/msmarco-passage-cos-dpr-distil-hnsw.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/main/resources/regression/msmarco-passage-openai-ada2.yaml b/src/main/resources/regression/msmarco-passage-openai-ada2.yaml
index 16d1cd8b5a..08289ce2c0 100644
--- a/src/main/resources/regression/msmarco-passage-openai-ada2.yaml
+++ b/src/main/resources/regression/msmarco-passage-openai-ada2.yaml
@@ -10,7 +10,7 @@ index_type: hnsw
 collection_class: JsonDenseVectorCollection
 generator_class: HnswDenseVectorDocumentGenerator
 index_threads: 16
-index_options: -M 16 -efC 100 -memorybuffer 65536
+index_options: -M 16 -efC 100 -memoryBuffer 65536
 
 metrics:
   - metric: AP@1000
diff --git a/src/test/java/io/anserini/index/IndexCollectionInovcationsTest.java b/src/test/java/io/anserini/index/IndexCollectionInovcationsTest.java
new file mode 100644
index 0000000000..3ec5b6f257
--- /dev/null
+++ b/src/test/java/io/anserini/index/IndexCollectionInovcationsTest.java
@@ -0,0 +1,118 @@
+/*
+ * Anserini: A Lucene toolkit for reproducible information retrieval research
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.anserini.index;
+
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.core.config.Configurator;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
+import static org.junit.Assert.assertTrue;
+
+public class IndexCollectionInovcationsTest {
+  private final ByteArrayOutputStream err = new ByteArrayOutputStream();
+  private PrintStream save;
+
+  private void redirectStderr() {
+    save = System.err;
+    err.reset();
+    System.setErr(new PrintStream(err));
+  }
+
+  private void restoreStderr() {
+    System.setErr(save);
+  }
+
+  @BeforeClass
+  public static void setupClass() {
+    Configurator.setLevel(AbstractIndexer.class.getName(), Level.ERROR);
+    Configurator.setLevel(IndexCollection.class.getName(), Level.ERROR);
+  }
+
+  @Test
+  public void testEmptyInvocation() throws Exception {
+    redirectStderr();
+    String[] indexArgs = new String[] {};
+
+    IndexCollection.main(indexArgs);
+    assertTrue(err.toString().contains("Error"));
+    assertTrue(err.toString().contains("is required"));
+
+    restoreStderr();
+  }
+
+  @Test
+  public void testAskForHelp() throws Exception {
+    redirectStderr();
+
+    IndexCollection.main(new String[] {"-options"});
+    assertTrue(err.toString().contains("Options for"));
+
+    restoreStderr();
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidCollection() throws Exception {
+    String[] indexArgs = new String[] {
+        "-collection", "FakeTrecCollection",
+        "-input", "src/test/resources/sample_docs/trec/collection2",
+        "-index", "target/idx-sample-trec-index" + System.currentTimeMillis(),
+        "-generator", "DefaultLuceneDocumentGenerator",
+    };
+
+    IndexCollection.main(indexArgs);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testCollectionPath() throws Exception {
+    String[] indexArgs = new String[] {
+        "-collection", "TrecCollection",
+        "-input", "src/test/resources/sample_docs/trec/collection2_fake_path",
+        "-index", "target/idx-sample-trec-index" + System.currentTimeMillis(),
+        "-generator", "DefaultLuceneDocumentGenerator",
+    };
+
+    IndexCollection.main(indexArgs);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidGenerator() throws Exception {
+    String[] indexArgs = new String[] {
+        "-collection", "TrecCollection",
+        "-input", "src/test/resources/sample_docs/trec/collection2",
+        "-index", "target/idx-sample-trec-index" + System.currentTimeMillis(),
+        "-generator", "FakeDefaultLuceneDocumentGenerator",
+    };
+
+    IndexCollection.main(indexArgs);
+  }
+
+  @Test
+  public void testDefaultGenerator() throws Exception {
+    String[] indexArgs = new String[] {
+        "-collection", "TrecCollection",
+        "-input", "src/test/resources/sample_docs/trec/collection2",
+        "-index", "target/idx-sample-trec-index" + System.currentTimeMillis()
+    };
+
+    IndexCollection.main(indexArgs);
+    // If this succeeded, then the default -generator of DefaultLuceneDocumentGenerator must have worked.
+  }
+}
\ No newline at end of file
diff --git a/src/test/java/io/anserini/index/IndexHnswDenseVectorsTest.java b/src/test/java/io/anserini/index/IndexHnswDenseVectorsTest.java
index 35ea033dfc..bade9e0366 100644
--- a/src/test/java/io/anserini/index/IndexHnswDenseVectorsTest.java
+++ b/src/test/java/io/anserini/index/IndexHnswDenseVectorsTest.java
@@ -49,6 +49,7 @@ private void restoreStderr() {
 
   @BeforeClass
   public static void setupClass() {
+    Configurator.setLevel(AbstractIndexer.class.getName(), Level.ERROR);
     Configurator.setLevel(IndexHnswDenseVectors.class.getName(), Level.ERROR);
   }
 
@@ -58,12 +59,23 @@ public void testEmptyInvocation() throws Exception {
     String[] indexArgs = new String[] {};
 
     IndexHnswDenseVectors.main(indexArgs);
-    assertTrue(err.toString().contains("Example: IndexHnswDenseVectors"));
+    assertTrue(err.toString().contains("Error"));
+    assertTrue(err.toString().contains("is required"));
 
     restoreStderr();
   }
 
-  @Test(expected = ClassNotFoundException.class)
+  @Test
+  public void testAskForHelp() throws Exception {
+    redirectStderr();
+
+    IndexHnswDenseVectors.main(new String[] {"-options"});
+    assertTrue(err.toString().contains("Options for"));
+
+    restoreStderr();
+  }
+
+  @Test(expected = IllegalArgumentException.class)
   public void testInvalidCollection() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "FakeJsonDenseVectorCollection",
@@ -77,7 +89,7 @@ public void testInvalidCollection() throws Exception {
     IndexHnswDenseVectors.main(indexArgs);
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test(expected = IllegalArgumentException.class)
   public void testCollectionPath() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "JsonDenseVectorCollection",
@@ -91,7 +103,7 @@ public void testCollectionPath() throws Exception {
     IndexHnswDenseVectors.main(indexArgs);
   }
 
-  @Test(expected = ClassNotFoundException.class)
+  @Test(expected = IllegalArgumentException.class)
   public void testInvalidGenerator() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "JsonDenseVectorCollection",
@@ -105,6 +117,20 @@ public void testInvalidGenerator() throws Exception {
     IndexHnswDenseVectors.main(indexArgs);
   }
 
+  @Test
+  public void testDefaultGenerator() throws Exception {
+    String[] indexArgs = new String[] {
+        "-collection", "JsonDenseVectorCollection",
+        "-input", "src/test/resources/sample_docs/openai_ada2/json_vector",
+        "-index", "target/idx-sample-hnsw" + System.currentTimeMillis(),
+        "-threads", "1",
+        "-M", "16", "-efC", "100"
+    };
+
+    IndexHnswDenseVectors.main(indexArgs);
+    // If this succeeded, then the default -generator of HnswDenseVectorDocumentGenerator must have worked.
+  }
+
   @Test
   public void test1() throws Exception {
     String indexPath = "target/idx-sample-hnsw" + System.currentTimeMillis();
diff --git a/src/test/java/io/anserini/index/IndexInvertedDenseVectorsTest.java b/src/test/java/io/anserini/index/IndexInvertedDenseVectorsTest.java
index b210c44074..2fda6f6cd6 100644
--- a/src/test/java/io/anserini/index/IndexInvertedDenseVectorsTest.java
+++ b/src/test/java/io/anserini/index/IndexInvertedDenseVectorsTest.java
@@ -49,6 +49,7 @@ private void restoreStderr() {
 
   @BeforeClass
   public static void setupClass() {
+    Configurator.setLevel(AbstractIndexer.class.getName(), Level.ERROR);
     Configurator.setLevel(IndexInvertedDenseVectors.class.getName(), Level.ERROR);
   }
 
@@ -58,12 +59,23 @@ public void testEmptyInvocation() throws Exception {
     String[] indexArgs = new String[] {};
 
     IndexInvertedDenseVectors.main(indexArgs);
-    assertTrue(err.toString().contains("Example: IndexInvertedDenseVectors"));
+    assertTrue(err.toString().contains("Error"));
+    assertTrue(err.toString().contains("is required"));
 
     restoreStderr();
   }
 
-  @Test(expected = ClassNotFoundException.class)
+  @Test
+  public void testAskForHelp() throws Exception {
+    redirectStderr();
+
+    IndexInvertedDenseVectors.main(new String[] {"-options"});
+    assertTrue(err.toString().contains("Options for"));
+
+    restoreStderr();
+  }
+
+  @Test(expected = IllegalArgumentException.class)
   public void testInvalidCollection() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "FakeCollection",
@@ -76,7 +88,7 @@ public void testInvalidCollection() throws Exception {
     IndexInvertedDenseVectors.main(indexArgs);
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test(expected = IllegalArgumentException.class)
   public void testCollectionPath() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "JsonDenseVectorCollection",
@@ -89,7 +101,7 @@ public void testCollectionPath() throws Exception {
     IndexInvertedDenseVectors.main(indexArgs);
   }
 
-  @Test(expected = ClassNotFoundException.class)
+  @Test(expected = IllegalArgumentException.class)
   public void testInvalidGenerator() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "JsonDenseVectorCollection",
@@ -102,7 +114,20 @@ public void testInvalidGenerator() throws Exception {
     IndexInvertedDenseVectors.main(indexArgs);
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
+  public void testDefaultGenerator() throws Exception {
+    String[] indexArgs = new String[] {
+        "-collection", "JsonDenseVectorCollection",
+        "-input", "src/test/resources/sample_docs/openai_ada2/json_vector",
+        "-index", "target/idx-sample-ll-vector" + System.currentTimeMillis(),
+        "-encoding", "lexlsh"
+    };
+
+    IndexInvertedDenseVectors.main(indexArgs);
+    // If this succeeded, then the default -generator of InvertedDenseVectorDocumentGenerator must have worked.
+  }
+
+  @Test(expected = IllegalArgumentException.class)
   public void testInvalidEncoding() throws Exception {
     String[] indexArgs = new String[] {
         "-collection", "JsonDenseVectorCollection",
diff --git a/src/test/java/io/anserini/integration/TweetEndToEndTest.java b/src/test/java/io/anserini/integration/TweetEndToEndTest.java
index cfcd30f750..0a47ae35ab 100644
--- a/src/test/java/io/anserini/integration/TweetEndToEndTest.java
+++ b/src/test/java/io/anserini/integration/TweetEndToEndTest.java
@@ -17,6 +17,7 @@
 package io.anserini.integration;
 
 import io.anserini.collection.TweetCollection;
+import io.anserini.index.AbstractIndexer;
 import io.anserini.index.IndexCollection;
 import io.anserini.index.generator.TweetGenerator;
 import org.apache.logging.log4j.Level;
@@ -28,6 +29,7 @@
 public class TweetEndToEndTest extends EndToEndTest {
   @BeforeClass
   public static void setupClass() {
+    Configurator.setLevel(AbstractIndexer.class.getName(), Level.ERROR);
     Configurator.setLevel(IndexCollection.class.getName(), Level.ERROR);
   }
 
diff --git a/src/test/java/io/anserini/search/SearchHnswDenseVectorsTest.java b/src/test/java/io/anserini/search/SearchHnswDenseVectorsTest.java
index d2f72b65b6..2f9bb98032 100644
--- a/src/test/java/io/anserini/search/SearchHnswDenseVectorsTest.java
+++ b/src/test/java/io/anserini/search/SearchHnswDenseVectorsTest.java
@@ -17,6 +17,7 @@
 package io.anserini.search;
 
 import io.anserini.TestUtils;
+import io.anserini.index.AbstractIndexer;
 import io.anserini.index.IndexHnswDenseVectors;
 import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.core.config.Configurator;
@@ -49,6 +50,7 @@ private void restoreStderr() {
 
   @BeforeClass
   public static void setupClass() {
+    Configurator.setLevel(AbstractIndexer.class.getName(), Level.ERROR);
     Configurator.setLevel(IndexHnswDenseVectors.class.getName(), Level.ERROR);
     Configurator.setLevel(SearchHnswDenseVectors.class.getName(), Level.ERROR);
   }
diff --git a/src/test/java/io/anserini/search/SearchInvertedDenseVectorsTest.java b/src/test/java/io/anserini/search/SearchInvertedDenseVectorsTest.java
index e6c514ee16..7189a8c6f6 100644
--- a/src/test/java/io/anserini/search/SearchInvertedDenseVectorsTest.java
+++ b/src/test/java/io/anserini/search/SearchInvertedDenseVectorsTest.java
@@ -17,6 +17,7 @@
 package io.anserini.search;
 
 import io.anserini.TestUtils;
+import io.anserini.index.AbstractIndexer;
 import io.anserini.index.IndexInvertedDenseVectors;
 import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.core.config.Configurator;
@@ -49,6 +50,7 @@ private void restoreStderr() {
 
   @BeforeClass
   public static void setupClass() {
+    Configurator.setLevel(AbstractIndexer.class.getName(), Level.ERROR);
     Configurator.setLevel(IndexInvertedDenseVectors.class.getName(), Level.ERROR);
     Configurator.setLevel(SearchInvertedDenseVectors.class.getName(), Level.ERROR);
   }