Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a994d14
test: add some es|ql and semantic text bfloat16 tests
mromaios Dec 5, 2025
201947f
[CI] Auto commit changes from spotless
Dec 5, 2025
dbef73a
add missing spec
mromaios Dec 5, 2025
a5a925a
Merge branch 'bfloat16_esql_and_semantic_text_tests' of github.com:mr…
mromaios Dec 5, 2025
d8f699a
drop column
mromaios Dec 5, 2025
32945e7
Merge branch 'main' of github.com:elastic/elasticsearch into bfloat16…
mromaios Dec 9, 2025
db5d9aa
Merge branch 'main' of github.com:elastic/elasticsearch into bfloat16…
mromaios Dec 9, 2025
cfbea9c
remove bfloat16 exception in mapper
mromaios Dec 9, 2025
43a1a39
fix es|ql csv test column name
mromaios Dec 9, 2025
3b7d930
adjust tests
mromaios Dec 9, 2025
c5746e7
Merge branch 'main' of github.com:elastic/elasticsearch into bfloat16…
mromaios Dec 9, 2025
88b3106
rename spec
mromaios Dec 9, 2025
c1af0a3
add separate mapping for bfloat16
mromaios Dec 9, 2025
eefee7b
[CI] Auto commit changes from spotless
Dec 9, 2025
a3277ec
remove csv filenames
mromaios Dec 9, 2025
55f46a4
Merge branch 'bfloat16_esql_and_semantic_text_tests' of github.com:mr…
mromaios Dec 9, 2025
b42a52c
fix dense_vector.csv format
mromaios Dec 10, 2025
a1d9fae
update expected
mromaios Dec 10, 2025
c9adbf4
adjust tests
mromaios Dec 10, 2025
78a32c4
[CI] Auto commit changes from spotless
Dec 10, 2025
16a8c65
update tests
mromaios Dec 10, 2025
aec2f99
Merge branch 'bfloat16_esql_and_semantic_text_tests' of github.com:mr…
mromaios Dec 10, 2025
29bc4de
[CI] Auto commit changes from spotless
Dec 10, 2025
c41b060
update expected values
mromaios Dec 10, 2025
35b38d2
Merge branch 'bfloat16_esql_and_semantic_text_tests' of github.com:mr…
mromaios Dec 10, 2025
0806bba
[CI] Auto commit changes from spotless
Dec 10, 2025
50469b6
add tests
mromaios Dec 11, 2025
4244ffc
Merge branch 'bfloat16_esql_and_semantic_text_tests' of github.com:mr…
mromaios Dec 11, 2025
a0e111e
[CI] Auto commit changes from spotless
Dec 11, 2025
cd2d7ba
Merge branch 'bfloat16_esql_and_semantic_text_tests' of github.com:mr…
mromaios Dec 11, 2025
eab02c9
revert changes to semantic_text
mromaios Dec 11, 2025
139a89b
update csv values
mromaios Dec 11, 2025
6eaa71b
Updated javadoc
Mikep86 Dec 12, 2025
7a086a6
Remove unnecessary commented out tests
Mikep86 Dec 12, 2025
06adfaa
Update analyzer tests
Mikep86 Dec 12, 2025
60a8c74
Make dense vector data sets private
Mikep86 Dec 12, 2025
d816446
Merge branch 'main' into bfloat16_esql_and_semantic_text_tests
Mikep86 Dec 12, 2025
e254da6
Update analyzer tests to use a dedicated dense vector mapping file
Mikep86 Dec 12, 2025
553897c
Merge branch 'main' into bfloat16_esql_and_semantic_text_tests
Mikep86 Dec 15, 2025
05a3568
Analyzer test adjustments
Mikep86 Dec 15, 2025
c9d553f
Updated VectorSimilarityFunctionsIT to test BFLOAT16
Mikep86 Dec 15, 2025
e4ef570
Updated VectorSimilarityFunction to handle BFLOAT16
Mikep86 Dec 15, 2025
3dbac87
Update docs/changelog/139113.yaml
mromaios Dec 15, 2025
4c7654c
Update changelog
Mikep86 Dec 15, 2025
2cf7c59
Refactor field property checks in CsvTestsDataLoader
Mikep86 Dec 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/139113.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 139113
summary: "[ES|QL]: Update Vector Similarity To Support BFLOAT16"
area: "ES|QL"
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,19 @@ protected boolean supportsTDigestField() {
}
}

@Override
protected boolean supportsBFloat16ElementType() {
try {
return RestEsqlTestCase.hasCapabilities(client(), List.of(EsqlCapabilities.Cap.GENERIC_VECTOR_FORMAT.capabilityName()))
&& RestEsqlTestCase.hasCapabilities(
remoteClusterClient(),
List.of(EsqlCapabilities.Cap.GENERIC_VECTOR_FORMAT.capabilityName())
);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

/**
* Convert index patterns and subqueries in FROM commands to use remote indices for a given test case.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ public void setup() {
supportsSemanticTextInference(),
false,
supportsExponentialHistograms(),
supportsTDigestField()
supportsTDigestField(),
supportsBFloat16ElementType()
);
return null;
});
Expand Down Expand Up @@ -318,6 +319,10 @@ protected boolean supportsTDigestField() {
return RestEsqlTestCase.hasCapabilities(client(), List.of(EsqlCapabilities.Cap.TDIGEST_FIELD_TYPE_SUPPORT_V3.capabilityName()));
}

protected boolean supportsBFloat16ElementType() {
return RestEsqlTestCase.hasCapabilities(client(), List.of(EsqlCapabilities.Cap.GENERIC_VECTOR_FORMAT.capabilityName()));
}

protected void doTest() throws Throwable {
doTest(testCase.query);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ private static List<String> originalTypes(Map<String, ?> x) {
}

private List<String> availableIndices() throws IOException {
return availableDatasetsForEs(true, supportsSourceFieldMapping(), false, requiresTimeSeries(), false, false).stream()
return availableDatasetsForEs(true, supportsSourceFieldMapping(), false, requiresTimeSeries(), false, false, false).stream()
.filter(x -> x.requiresInferenceEndpoint() == false)
.map(x -> x.indexName())
.toList();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ public class CsvTestsDataLoader {
private static final TestDataset DENSE_VECTOR_TEXT = new TestDataset("dense_vector_text");
private static final TestDataset MV_TEXT = new TestDataset("mv_text");
private static final TestDataset DENSE_VECTOR = new TestDataset("dense_vector");
private static final TestDataset DENSE_VECTOR_BFLOAT16 = new TestDataset("dense_vector_bfloat16");
private static final TestDataset COLORS = new TestDataset("colors");
private static final TestDataset COLORS_CMYK_LOOKUP = new TestDataset("colors_cmyk").withSetting("lookup-settings.json");
private static final TestDataset BASE_CONVERSION = new TestDataset("base_conversion");
Expand Down Expand Up @@ -248,6 +249,7 @@ public class CsvTestsDataLoader {
Map.entry(DENSE_VECTOR_TEXT.indexName, DENSE_VECTOR_TEXT),
Map.entry(MV_TEXT.indexName, MV_TEXT),
Map.entry(DENSE_VECTOR.indexName, DENSE_VECTOR),
Map.entry(DENSE_VECTOR_BFLOAT16.indexName, DENSE_VECTOR_BFLOAT16),
Map.entry(COLORS.indexName, COLORS),
Map.entry(COLORS_CMYK_LOOKUP.indexName, COLORS_CMYK_LOOKUP),
Map.entry(BASE_CONVERSION.indexName, BASE_CONVERSION),
Expand Down Expand Up @@ -348,7 +350,7 @@ public static void main(String[] args) throws IOException {
}

try (RestClient client = builder.build()) {
loadDataSetIntoEs(client, true, true, false, false, true, true, (restClient, indexName, indexMapping, indexSettings) -> {
loadDataSetIntoEs(client, true, true, false, false, true, true, true, (restClient, indexName, indexMapping, indexSettings) -> {
// don't use ESRestTestCase methods here or, if you do, test running the main method before making the change
StringBuilder jsonBody = new StringBuilder("{");
if (indexSettings != null && indexSettings.isEmpty() == false) {
Expand All @@ -373,7 +375,8 @@ public static Set<TestDataset> availableDatasetsForEs(
boolean inferenceEnabled,
boolean requiresTimeSeries,
boolean exponentialHistogramFieldSupported,
boolean tDigestFieldSupported
boolean tDigestFieldSupported,
boolean bFloat16ElementTypeSupported
) throws IOException {
Set<TestDataset> testDataSets = new HashSet<>();

Expand All @@ -383,7 +386,8 @@ public static Set<TestDataset> availableDatasetsForEs(
&& (supportsSourceFieldMapping || isSourceMappingDataset(dataset) == false)
&& (requiresTimeSeries == false || isTimeSeries(dataset))
&& (exponentialHistogramFieldSupported || containsExponentialHistogramFields(dataset) == false)
&& (tDigestFieldSupported || containsTDigestFields(dataset) == false)) {
&& (tDigestFieldSupported || containsTDigestFields(dataset) == false)
&& (bFloat16ElementTypeSupported || containsBFloat16ElementType(dataset) == false)) {
testDataSets.add(dataset);
}
}
Expand All @@ -408,44 +412,33 @@ private static boolean isSourceMappingDataset(TestDataset dataset) throws IOExce
}

private static boolean containsExponentialHistogramFields(TestDataset dataset) throws IOException {
if (dataset.mappingFileName() == null) {
return false;
}
String mappingJsonText = readTextFile(getResource("/" + dataset.mappingFileName()));
JsonNode mappingNode = new ObjectMapper().readTree(mappingJsonText);
JsonNode properties = mappingNode.get("properties");
if (properties != null) {
for (var fieldWithValue : properties.properties()) {
JsonNode fieldProperties = fieldWithValue.getValue();
if (fieldProperties != null) {
JsonNode typeNode = fieldProperties.get("type");
if (typeNode != null && typeNode.asText().equals("exponential_histogram")) {
return true;
}
}
}
}
return false;
return containsFieldWithProperties(dataset, Map.of("type", "exponential_histogram"));
}

private static boolean containsTDigestFields(TestDataset dataset) throws IOException {
if (dataset.mappingFileName() == null) {
return containsFieldWithProperties(dataset, Map.of("type", "tdigest"));
}

private static boolean containsBFloat16ElementType(TestDataset dataset) throws IOException {
return containsFieldWithProperties(dataset, Map.of("element_type", "bfloat16"));
}

private static boolean containsFieldWithProperties(TestDataset dataset, Map<String, Object> properties) throws IOException {
if (dataset.mappingFileName() == null || properties.isEmpty()) {
return false;
}

String mappingJsonText = readTextFile(getResource("/" + dataset.mappingFileName()));
JsonNode mappingNode = new ObjectMapper().readTree(mappingJsonText);
JsonNode properties = mappingNode.get("properties");
if (properties != null) {
for (var fieldWithValue : properties.properties()) {
JsonNode fieldProperties = fieldWithValue.getValue();
if (fieldProperties != null) {
JsonNode typeNode = fieldProperties.get("type");
if (typeNode != null && typeNode.asText().equals("tdigest")) {
return true;
}
Map<?, ?> mappingNode = new ObjectMapper().readValue(mappingJsonText, Map.class);
Object mappingProperties = mappingNode.get("properties");
if (mappingProperties instanceof Map<?, ?> mappingPropertiesMap) {
for (Object field : mappingPropertiesMap.values()) {
if (field instanceof Map<?, ?> fieldMap && fieldMap.entrySet().containsAll(properties.entrySet())) {
return true;
}
}
}

return false;
}

Expand All @@ -461,7 +454,7 @@ public static void loadDataSetIntoEs(
boolean supportsSourceFieldMapping,
boolean inferenceEnabled
) throws IOException {
loadDataSetIntoEs(client, supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, false, false, false);
loadDataSetIntoEs(client, supportsIndexModeLookup, supportsSourceFieldMapping, inferenceEnabled, false, false, false, false);
}

public static void loadDataSetIntoEs(
Expand All @@ -471,7 +464,8 @@ public static void loadDataSetIntoEs(
boolean inferenceEnabled,
boolean timeSeriesOnly,
boolean exponentialHistogramFieldSupported,
boolean tDigestFieldSupported
boolean tDigestFieldSupported,
boolean bFloat16ElementTypeSupported
) throws IOException {
loadDataSetIntoEs(
client,
Expand All @@ -481,6 +475,7 @@ public static void loadDataSetIntoEs(
timeSeriesOnly,
exponentialHistogramFieldSupported,
tDigestFieldSupported,
bFloat16ElementTypeSupported,
(restClient, indexName, indexMapping, indexSettings) -> {
ESRestTestCase.createIndex(restClient, indexName, indexSettings, indexMapping, null);
}
Expand All @@ -495,6 +490,7 @@ private static void loadDataSetIntoEs(
boolean timeSeriesOnly,
boolean exponentialHistogramFieldSupported,
boolean tDigestFieldSupported,
boolean bFloat16ElementTypeSupported,
IndexCreator indexCreator
) throws IOException {
Logger logger = LogManager.getLogger(CsvTestsDataLoader.class);
Expand All @@ -507,7 +503,8 @@ private static void loadDataSetIntoEs(
inferenceEnabled,
timeSeriesOnly,
exponentialHistogramFieldSupported,
tDigestFieldSupported
tDigestFieldSupported,
bFloat16ElementTypeSupported
)) {
load(client, dataset, logger, indexCreator);
loadedDatasets.add(dataset.indexName);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id:l, float_vector:dense_vector, byte_vector:dense_vector, bit_vector:dense_vector
0, [1.0, 2.0, 3.0], [10, 20, 30], [13, 112]
1, [4.0, 5.0, 6.0], [40, 50, 60], [45, 9]
2, [9.0, 8.0, 7.0], [90, 80, 70], [127, 0]
3, [0.054, 0.032, 0.012], [100, 110, 120], [88, 53]
0, [1.0, 2.0, 3.0], [10, 20, 30], [13, 112]
1, [4.0, 5.0, 6.0], [40, 50, 60], [45, 9]
2, [9.0, 8.0, 7.0], [90, 80, 70], [127, 0]
3, [0.054, 0.032, 0.012], [100, 110, 120], [88, 53]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
id:l, bfloat16_vector:dense_vector
0, [1.0, 2.0, 3.0]
1, [4.0, 5.0, 6.0]
2, [9.0, 8.0, 7.0]
3, [0.5390625, 0.3203125, 0.01202392578125]
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
retrieveDenseBFloat16VectorData
required_capability: dense_vector_field_type_released
required_capability: dense_vector_agg_metric_double_if_version
required_capability: l2_norm_vector_similarity_function
required_capability: generic_vector_format

FROM dense_vector_bfloat16
| KEEP id, bfloat16_vector
| SORT id
;

id:l | bfloat16_vector:dense_vector
0 | [1.0, 2.0, 3.0]
1 | [4.0, 5.0, 6.0]
2 | [9.0, 8.0, 7.0]
3 | [0.5390625, 0.3203125, 0.01202392578125]
;

denseBFloat16VectorWithEval
required_capability: dense_vector_agg_metric_double_if_version
required_capability: l2_norm_vector_similarity_function
required_capability: generic_vector_format

FROM dense_vector_bfloat16
| EVAL v = bfloat16_vector
| KEEP id, v
| SORT id
;

id:l | v:dense_vector
0 | [1.0, 2.0, 3.0]
1 | [4.0, 5.0, 6.0]
2 | [9.0, 8.0, 7.0]
3 | [0.5390625, 0.3203125, 0.01202392578125]
;

denseBFloat16VectorWithRenameAndDrop
required_capability: dense_vector_agg_metric_double_if_version
required_capability: l2_norm_vector_similarity_function
required_capability: generic_vector_format

FROM dense_vector_bfloat16
| EVAL v = bfloat16_vector
| RENAME v AS new_vector
| DROP bfloat16_vector
| SORT id
;

id:l | new_vector:dense_vector
0 | [1.0, 2.0, 3.0]
1 | [4.0, 5.0, 6.0]
2 | [9.0, 8.0, 7.0]
3 | [0.5390625, 0.3203125, 0.01202392578125]
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"properties": {
"id": {
"type": "long"
},
"float_vector": {
"type": "dense_vector",
"similarity": "l2_norm",
"index_options": {
"type": "hnsw",
"m": 16,
"ef_construction": 100
}
},
"byte_vector": {
"type": "dense_vector",
"similarity": "l2_norm",
"element_type": "byte",
"index_options": {
"type": "hnsw",
"m": 16,
"ef_construction": 100
}
},
"bit_vector": {
"type": "dense_vector",
"dims": 16,
"similarity": "l2_norm",
"element_type": "bit",
"index_options": {
"type": "hnsw",
"m": 16,
"ef_construction": 100
}
},
"bfloat16_vector": {
"type": "dense_vector",
"dims": 16,
"similarity": "l2_norm",
"element_type": "bfloat16",
"index_options": {
"type": "hnsw",
"m": 16,
"ef_construction": 100
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"properties": {
"id": {
"type": "long"
},
"bfloat16_vector": {
"type": "dense_vector",
"similarity": "l2_norm",
"element_type": "bfloat16",
"index_options": {
"type": "hnsw",
"m": 16,
"ef_construction": 100
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Set;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.CoreMatchers.containsString;
Expand All @@ -49,12 +48,12 @@ public class VectorSimilarityFunctionsIT extends AbstractEsqlIntegTestCase {
public static Iterable<Object[]> parameters() throws Exception {
List<Object[]> params = new ArrayList<>();

for (ElementType elementType : Set.of(ElementType.FLOAT, ElementType.BYTE, ElementType.BIT)) {
for (ElementType elementType : ElementType.values()) {
params.add(new Object[] { "v_cosine", CosineSimilarity.SIMILARITY_FUNCTION, elementType });
params.add(new Object[] { "v_dot_product", DotProduct.SIMILARITY_FUNCTION, elementType });
params.add(new Object[] { "v_l1_norm", L1Norm.SIMILARITY_FUNCTION, elementType });
params.add(new Object[] { "v_l2_norm", L2Norm.SIMILARITY_FUNCTION, elementType });
if (elementType != ElementType.FLOAT) {
if (elementType != ElementType.FLOAT && elementType != ElementType.BFLOAT16) {
params.add(new Object[] { "v_hamming", Hamming.EVALUATOR_SIMILARITY_FUNCTION, elementType });
}
}
Expand Down Expand Up @@ -236,7 +235,7 @@ private Double calculateSimilarity(
case BYTE, BIT -> {
return (double) similarityFunction.calculateSimilarity(asByteArray(randomVector), asByteArray(vector));
}
case FLOAT -> {
case FLOAT, BFLOAT16 -> {
return (double) similarityFunction.calculateSimilarity(asFloatArray(randomVector), asFloatArray(vector));
}
default -> throw new IllegalArgumentException("Unexpected element type: " + elementType);
Expand Down Expand Up @@ -335,7 +334,7 @@ private List<Number> randomVector(int numDims, boolean allowNull) {
List<Number> vector = new ArrayList<>(dimensions);
for (int j = 0; j < dimensions; j++) {
switch (elementType) {
case FLOAT -> {
case FLOAT, BFLOAT16 -> {
if (dimensions == 1) {
vector.add(randomValueOtherThan(0f, () -> randomFloat()));
} else {
Expand Down
Loading