Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
e9fedcb
Add quantization state reader and writer
Aug 21, 2024
1f5c030
Make inner class private
Aug 21, 2024
627ac7e
Address PR Feedback
Aug 21, 2024
a81e99d
Fix tests
Aug 22, 2024
daa39ed
Address PR feedback
Aug 22, 2024
8abf3cd
Add writer tests
Aug 22, 2024
e7d5ac8
Add reader tests
Aug 22, 2024
c804652
Add changelog entry
Aug 22, 2024
f711e39
Remove extra line
Aug 22, 2024
425b920
Address PR Feedback
Aug 22, 2024
2ea5371
Fix javadocs
Aug 22, 2024
89e45de
Make reader methods static
Aug 22, 2024
8cd2ee3
Integrate with merge
Aug 22, 2024
d644c9b
Change field name writing to internal field number and change file su…
Aug 23, 2024
5fe570d
Merge branch 'main' into quantization_state_writer_reader
Aug 26, 2024
92bb539
Change integration with native engine writer
Aug 26, 2024
5b03f30
Fix tests
Aug 26, 2024
9b486d8
Integrate with query flow
Aug 26, 2024
0a7d80e
Remove duplicate writeFooter
Aug 26, 2024
de89987
Integrate with cache
Aug 26, 2024
59be504
Change implementation and fix tests
Aug 27, 2024
366072f
Add test for reading from QuantizationStateReadConfig
Aug 27, 2024
077a1b6
Add cache manager tests
Aug 28, 2024
3dbbad9
Port changes from feature branch to fix end to end flow
Aug 28, 2024
45b6fbf
Change integration with query flow
Aug 29, 2024
0e0eaf3
Remove unnecessary changes in KNNWeight
Aug 29, 2024
9f8ce0c
Merge branch 'main' into quantization_state_writer_reader
Aug 29, 2024
595ec6f
Merge branch 'main' into quantization_state_writer_reader
Sep 3, 2024
1f67103
Address PR Feedback and fix compile error from rebase
Sep 3, 2024
80e74e3
Abstract common functionality between read methods
Sep 3, 2024
60cd2fa
Avoid repeat calls to quantization cache manager get instance
Sep 3, 2024
c0b9e71
Address PR feedback
Sep 3, 2024
776d531
Merge branch 'main' into quantization_state_writer_reader
Sep 3, 2024
8c21304
Add unit tests for KNNWeight
Sep 4, 2024
b304e3c
Address PR Feedback
Sep 4, 2024
fe19fc0
Merge branch 'main' into quantization_state_writer_reader
Sep 4, 2024
d8959d0
Address feedbackK
Sep 4, 2024
ba931da
Fix bwc tests
Sep 4, 2024
fdfc301
Revert previous change
Sep 4, 2024
0e37d2d
Condense into one loop while reading
Sep 4, 2024
eaff8f0
Address PR Feedback
Sep 4, 2024
414b2e4
Address PR Feedback
Sep 4, 2024
2accfb1
Address feedback
Sep 4, 2024
a6c87e3
Revert "Address feedback"
Sep 4, 2024
ff07704
Address feedback
Sep 4, 2024
34cd60b
Address feedback
Sep 4, 2024
b82fb90
Merge branch 'main' into quantization_state_writer_reader
Sep 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public class KNNConstants {
public static final VectorDataType DEFAULT_VECTOR_DATA_TYPE_FIELD = VectorDataType.FLOAT;

public static final String RADIAL_SEARCH_KEY = "radial_search";
public static final String QUANTIZATION_STATE_FILE_SUFFIX = "qs";
Comment thread
ryanbogan marked this conversation as resolved.
Outdated

// Lucene specific constants
public static final String LUCENE_NAME = "lucene";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.quantization.models.quantizationState.QuantizationState;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Reads quantization states
*/
public class KNNQuantizationStateReader {
Comment thread
ryanbogan marked this conversation as resolved.
Outdated

/**
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
* Read quantization states and return list of fieldNames and bytes
*
* File format:
* Header
* QS1 state bytes
* QS2 state bytes
* Number of quantization states
* QS1 field name
* QS1 state bytes length
* QS1 position of state bytes
* QS2 field name
* QS2 state bytes length
* QS2 position of state bytes
* Position of index section (where QS1 field name is located)
* -1 (marker)
* Footer
*
* @param state the read state to read from
*/
public Map<String, byte[]> read(SegmentReadState state) {

@Vikasht34 Vikasht34 Aug 22, 2024

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's Reafctor the code between both reads. Most of the things are common

String quantizationStateFileName = IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
KNNConstants.QUANTIZATION_STATE_FILE_SUFFIX
);
Map<String, byte[]> readQuantizationStateInfos = new HashMap<>();

try (IndexInput input = state.directory.openInput(quantizationStateFileName, IOContext.READ)) {
Comment thread
jmazanec15 marked this conversation as resolved.
Outdated

Comment thread
navneet1v marked this conversation as resolved.
Outdated
int numFields = getNumFields(input);

List<String> fieldNames = new ArrayList<>();
List<Long> positions = new ArrayList<>();
List<Integer> lengths = new ArrayList<>();

// Read each field's metadata from the index section
for (int i = 0; i < numFields; i++) {
fieldNames.add(input.readString());
int length = input.readInt();
lengths.add(length);
long position = input.readVLong();
positions.add(position);
}
// Read each field's bytes
for (int i = 0; i < numFields; i++) {
input.seek(positions.get(i));
byte[] stateBytes = new byte[lengths.get(i)];
input.readBytes(stateBytes, 0, lengths.get(i));
readQuantizationStateInfos.put(fieldNames.get(i), stateBytes);
}
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
} catch (IOException e) {
throw new RuntimeException(e);
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
}
return readQuantizationStateInfos;
}

/**
* Reads an individual quantization state for a given field
* @param directory directory to open input
* @param segmentName segment name
* @param segmentSuffix segment suffix
* @param fieldInfo field information
* @return quantization state
*/
public QuantizationState read(Directory directory, String segmentName, String segmentSuffix, FieldInfo fieldInfo) throws IOException {
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
String quantizationStateFileName = IndexFileNames.segmentFileName(
segmentName,
segmentSuffix,
KNNConstants.QUANTIZATION_STATE_FILE_SUFFIX
);
String fieldName = fieldInfo.getName();

IndexInput input = directory.openInput(quantizationStateFileName, IOContext.READ);
CodecUtil.retrieveChecksum(input);
int numFields = getNumFields(input);

long position = -1;
int length = 0;

// Read each field's metadata from the index section
for (int i = 0; i < numFields; i++) {
String tempFieldName = input.readString();
int tempLength = input.readInt();
long tempPosition = input.readVLong();
if (tempFieldName.equals(fieldName)) {
position = tempPosition;
length = tempLength;
break;
}
}

if (position == -1 || length == 0) {
throw new IllegalArgumentException(String.format("Field %s not found", fieldName));
}

input.seek(position);
byte[] stateBytes = new byte[length];
input.readBytes(stateBytes, 0, length);
input.close();
// Deserialize the byte array to a quantization state object
// TODO: Get params from field info and deserialize
return null;
}

private int getNumFields(IndexInput input) throws IOException {
long footerStart = input.length() - CodecUtil.footerLength();
long markerAndIndexPosition = footerStart - Integer.BYTES - Long.BYTES;
input.seek(markerAndIndexPosition);
long indexStartPosition = input.readLong();
input.readInt();
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
input.seek(indexStartPosition);
return input.readInt();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.AllArgsConstructor;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.opensearch.knn.common.KNNConstants;
import org.opensearch.knn.quantization.models.quantizationState.QuantizationState;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Writes quantization states to off heap memory
*/
public class KNNQuantizationStateWriter {
Comment thread
ryanbogan marked this conversation as resolved.
Outdated

private final IndexOutput output;
private List<FieldQuantizationState> fieldQuantizationStates = new ArrayList<>();
Comment thread
ryanbogan marked this conversation as resolved.

/**
* Constructor
* @param segmentWriteState segment write state containing segment information
* @throws IOException exception could be thrown while creating the output
*/
public KNNQuantizationStateWriter(SegmentWriteState segmentWriteState) throws IOException {
String quantizationStateFileName = IndexFileNames.segmentFileName(
segmentWriteState.segmentInfo.name,
segmentWriteState.segmentSuffix,
KNNConstants.QUANTIZATION_STATE_FILE_SUFFIX
);

output = segmentWriteState.directory.createOutput(quantizationStateFileName, segmentWriteState.context);
}

/**
* Writes an index header
* @param segmentWriteState state containing segment information
* @throws IOException exception could be thrown while writing header
*/
public void writeHeader(SegmentWriteState segmentWriteState) throws IOException {
CodecUtil.writeIndexHeader(output, "QuantizationCodec", 0, segmentWriteState.segmentInfo.getId(), segmentWriteState.segmentSuffix);
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
}

/**
* Writes a quantization state as bytes
* @param fieldName field name
* @param quantizationState quantization state
* @throws IOException could be thrown while writing
*/
public void writeState(String fieldName, QuantizationState quantizationState) throws IOException {
byte[] stateBytes = quantizationState.toByteArray();
long position = output.getFilePointer();
output.writeBytes(stateBytes, stateBytes.length);
fieldQuantizationStates.add(new FieldQuantizationState(fieldName, stateBytes, position));
}

/**
* Writes index footer and other index information for parsing later
* @throws IOException could be thrown while writing
*/
public void writeFooter() throws IOException {
long indexStartPosition = output.getFilePointer();
output.writeInt(fieldQuantizationStates.size());
for (FieldQuantizationState fieldQuantizationState : fieldQuantizationStates) {
output.writeString(fieldQuantizationState.fieldName);
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
output.writeInt(fieldQuantizationState.stateBytes.length);
output.writeVLong(fieldQuantizationState.position);
}
output.writeLong(indexStartPosition);
output.writeInt(-1);
CodecUtil.writeFooter(output);
output.close();
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
fieldQuantizationStates = new ArrayList<>();
Comment thread
ryanbogan marked this conversation as resolved.
Outdated
}

@AllArgsConstructor
private static class FieldQuantizationState {
final String fieldName;
final byte[] stateBytes;
final Long position;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

package org.opensearch.knn.index.codec.KNN990Codec;

import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
import org.apache.lucene.codecs.KnnVectorsWriter;
Expand Down Expand Up @@ -39,14 +38,21 @@
* A KNNVectorsWriter class for writing the vector data strcutures and flat vectors for Native Engines.
*/
@Log4j2
@RequiredArgsConstructor
public class NativeEngines990KnnVectorsWriter extends KnnVectorsWriter {
private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(NativeEngines990KnnVectorsWriter.class);
private final SegmentWriteState segmentWriteState;
private final FlatVectorsWriter flatVectorsWriter;
private final KNNQuantizationStateWriter quantizationStateWriter;
private final List<NativeEngineFieldVectorsWriter<?>> fields = new ArrayList<>();
private boolean finished;

public NativeEngines990KnnVectorsWriter(SegmentWriteState segmentWriteState, FlatVectorsWriter flatVectorsWriter) throws IOException {
this.segmentWriteState = segmentWriteState;
this.flatVectorsWriter = flatVectorsWriter;
this.quantizationStateWriter = new KNNQuantizationStateWriter(segmentWriteState);

}

/**
* Add new field for indexing.
* In Lucene, we use single file for all the vector fields so here we need to see how we are going to make things
Expand All @@ -70,6 +76,9 @@ public KnnFieldVectorsWriter<?> addField(final FieldInfo fieldInfo) throws IOExc
public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
// simply write data in the flat file
flatVectorsWriter.flush(maxDoc, sortMap);

quantizationStateWriter.writeHeader(segmentWriteState);

for (final NativeEngineFieldVectorsWriter<?> field : fields) {
final VectorDataType vectorDataType = extractVectorDataType(field.getFieldInfo());
final KNNVectorValues<?> knnVectorValues = KNNVectorValuesFactory.getVectorValues(
Expand All @@ -78,8 +87,12 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException {
field.getVectors()
);

// TODO: Extract quantization state here, uncomment below line once implemented
// quantizationStateWriter.writeState(field.getFieldInfo().getName(), quantizationState);
Comment thread
ryanbogan marked this conversation as resolved.
Outdated

NativeIndexWriter.getWriter(field.getFieldInfo(), segmentWriteState).flushIndex(knnVectorValues);
}
quantizationStateWriter.writeFooter();
}

@Override
Expand Down
Loading