Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-9450: Use BinaryDocValue fields in the taxonomy index based on the existing index version #220

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {

private final Directory dir;
private final IndexWriter indexWriter;
private final boolean useOlderStoredFieldIndex;
private final TaxonomyWriterCache cache;
private final AtomicInteger cacheMisses = new AtomicInteger(0);

Expand Down Expand Up @@ -124,12 +125,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private volatile TaxonomyIndexArrays taxoArrays;
private volatile int nextID;

/** Reads the commit data from a Directory. */
private static Map<String, String> readCommitData(Directory dir) throws IOException {
SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
return infos.getUserData();
}

/**
* Construct a Taxonomy writer.
*
Expand Down Expand Up @@ -164,9 +159,16 @@ public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode, TaxonomyW
openMode = config.getOpenMode();
if (!DirectoryReader.indexExists(directory)) {
gautamworah96 marked this conversation as resolved.
Show resolved Hide resolved
indexEpoch = 1;
// no commit exists so we can safely use the new BinaryDocValues field
useOlderStoredFieldIndex = false;
} else {
String epochStr = null;
Map<String, String> commitData = readCommitData(directory);

SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
/* a previous commit exists, so check the version of the last commit */
useOlderStoredFieldIndex = infos.getIndexCreatedVersionMajor() <= 8;

Map<String, String> commitData = infos.getUserData();
if (commitData != null) {
epochStr = commitData.get(INDEX_EPOCH);
}
Expand Down Expand Up @@ -475,8 +477,15 @@ private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOEx

String fieldPath = FacetsConfig.pathToString(categoryPath.components, categoryPath.length);
fullPathField.setStringValue(fieldPath);

if (useOlderStoredFieldIndex) {
fullPathField = new StringField(Consts.FULL, fieldPath, Field.Store.YES);
gautamworah96 marked this conversation as resolved.
Show resolved Hide resolved
} else {
/* Lucene 9 switches to BinaryDocValuesField for storing taxonomy categories */
d.add(new BinaryDocValuesField(Consts.FULL, new BytesRef(fieldPath)));
}

d.add(fullPathField);
d.add(new BinaryDocValuesField(Consts.FULL, new BytesRef(fieldPath)));

// Note that we do no pass an Analyzer here because the fields that are
// added to the Document are untokenized or contains their own TokenStream.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
//
// Then move the zip file to your trunk checkout and use it in your test cases

public static final String oldTaxonomyIndexName = "taxonomy.8.6.3-cfs";
public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";

// LUCENE-9334 requires consistency of field data structures between documents.
// Old taxonomy index had $full_path$ field indexed only with postings,
// It is not allowed to add the same field $full_path$ indexed with BinaryDocValues
// for a new segment, that this test is trying to do.
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-9334")
gautamworah96 marked this conversation as resolved.
Show resolved Hide resolved
public void testCreateNewTaxonomy() throws IOException {
createNewTaxonomyIndex(oldTaxonomyIndexName);
}
Expand All @@ -68,8 +63,8 @@ private void createNewTaxonomyIndex(String dirName) throws IOException {

DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);

FacetLabel cp_b = new FacetLabel("b");
writer.addCategory(cp_b);
FacetLabel cp_c = new FacetLabel("c");
writer.addCategory(cp_c);
writer.getInternalIndexWriter().forceMerge(1);
writer.commit();

Expand All @@ -80,10 +75,15 @@ private void createNewTaxonomyIndex(String dirName) throws IOException {
// Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
assertNotNull(reader.getPath(ord1));

int ord2 = reader.getOrdinal(cp_b);
int ord2 = reader.getOrdinal(new FacetLabel("b"));
assert ord2 != TaxonomyReader.INVALID_ORDINAL;
// Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
assertNotNull(reader.getPath(ord2));

int ord3 = reader.getOrdinal(cp_c);
assert ord3 != TaxonomyReader.INVALID_ORDINAL;
assertNotNull(reader.getPath(ord3));

reader.close();
writer.close();
dir.close();
Expand All @@ -103,6 +103,7 @@ private void createOldTaxonomyIndex(String dirName) throws IOException {
TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);

writer.addCategory(new FacetLabel("a"));
writer.addCategory(new FacetLabel("b"));
writer.commit();
writer.close();
dir.close();
Expand Down
Binary file not shown.
Binary file not shown.