Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Performance Improvement] Add custom bulk scorer for hybrid query (2-3x faster) ([#1289](https://github.com/opensearch-project/neural-search/pull/1289))
- [Stats] Add stats for text chunking processor algorithms ([#1308](https://github.com/opensearch-project/neural-search/pull/1308))
- Support custom weights in RRF normalization processor ([#1322](https://github.com/opensearch-project/neural-search/pull/1322))
- [Stats] Add stats tracking for semantic highlighting ([#1327](https://github.com/opensearch-project/neural-search/pull/1327))

### Bug Fixes
- Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import lombok.extern.log4j.Log4j2;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.neuralsearch.stats.events.EventStatName;
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
import org.opensearch.search.fetch.subphase.highlight.FieldHighlightContext;
import org.opensearch.search.fetch.subphase.highlight.HighlightField;
import org.opensearch.search.fetch.subphase.highlight.Highlighter;
Expand Down Expand Up @@ -46,6 +48,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) {
throw new IllegalStateException("SemanticHighlighter has not been initialized");
}

EventStatsManager.increment(EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT);

// Extract field text
String fieldText = semanticHighlighterEngine.getFieldText(fieldContext);

Expand All @@ -57,21 +61,28 @@ public HighlightField highlight(FieldHighlightContext fieldContext) {

if (originalQueryText == null || originalQueryText.isEmpty()) {
log.warn("No query text found for field {}", fieldContext.fieldName);
EventStatsManager.increment(EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT);
return null;
}

// The pre- and post- tags are provided by the user or defaulted to <em> and </em>
String[] preTags = fieldContext.field.fieldOptions().preTags();
String[] postTags = fieldContext.field.fieldOptions().postTags();

// Get highlighted text - allow any exceptions from this call to propagate
String highlightedResponse = semanticHighlighterEngine.getHighlightedSentences(
modelId,
originalQueryText,
fieldText,
preTags[0],
postTags[0]
);
String highlightedResponse;
try {
// Get highlighted text
highlightedResponse = semanticHighlighterEngine.getHighlightedSentences(
modelId,
originalQueryText,
fieldText,
preTags[0],
postTags[0]
);
} catch (Exception e) {
EventStatsManager.increment(EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT);
throw e;
}

if (highlightedResponse == null || highlightedResponse.isEmpty()) {
log.warn("No highlighted text found for field {}", fieldContext.fieldName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ public enum EventStatName implements StatName {
"processors.ingest",
EventStatType.TIMESTAMPED_EVENT_COUNTER
),
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER);
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
SEMANTIC_HIGHLIGHTING_REQUEST_COUNT("request_count", "semantic_highlighting", EventStatType.TIMESTAMPED_EVENT_COUNTER),
SEMANTIC_HIGHLIGHTING_ERROR_COUNT("error_count", "semantic_highlighting", EventStatType.TIMESTAMPED_EVENT_COUNTER);
Copy link
Contributor

@q-andy q-andy May 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should rename the stat names to semantic_highlighting_request_count and semantic_highlighting_error_count. The stat name is used as a unique identifier across all stats without the path. Maybe also shorten the path from semantic_highlighting -> highlighting?

Copy link
Member Author

@junqiu-lei junqiu-lei May 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should rename the stat names to semantic_highlighting_request_count and semantic_highlighting_error_count.

It can make the full path redundant in naming.

The EventStatName class already has a getFullPath() method that combines the path and name with a dot (e.g., semantic_highlighting.request_count), which seems like a more robust way to handle uniqueness?

The stat name is used as a unique identifier across all stats without the path. Maybe also shorten the path from semantic_highlighting -> highlighting?

I prefer to keep using with semantic_highlighting which is consistent with the origin feature name.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Offline talked with @q-andy, the stats filter is being used along with child stats name, like semantic_highlighting_request_count , this function is introduced since 3.0.0 OpenSearch. To keep the function same and backward compatibility, we'll use name semantic_highlighting_request_count and semantic_highlighting_error_count.


private final String nameString;
private final String path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static org.opensearch.neuralsearch.util.TestUtils.TEST_SPACE_TYPE;
import static org.opensearch.neuralsearch.util.TestUtils.createRandomVector;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand All @@ -20,6 +21,7 @@
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
import org.opensearch.index.query.QueryStringQueryBuilder;
import org.opensearch.index.query.TermQueryBuilder;
import org.opensearch.neuralsearch.stats.events.EventStatName;
import org.opensearch.search.builder.SearchSourceBuilder;

import com.google.common.primitives.Floats;
Expand Down Expand Up @@ -77,7 +79,10 @@ private void initializeTestIndex() {
* 5. Neural Query
* 6. Hybrid Query
*/
@SneakyThrows
public void testQueriesWithSemanticHighlighter() {
// Enable stats for the test
updateClusterSettings("plugins.neural_search.stats_enabled", true);
// Set up models for the test
String textEmbeddingModelId = prepareModel();
String sentenceHighlightingModelId = prepareSentenceHighlightingModel();
Expand Down Expand Up @@ -334,6 +339,27 @@ public void testQueriesWithSemanticHighlighter() {
sentenceHighlightingModelId
);
verifyHighlightResults(searchResponse, "artificial intelligence");

// Verify that invalid model ID is handled gracefully and error stats are incremented
String invalidModelId = "invalid-model-id";
searchResponse = searchWithSemanticHighlighter(TEST_BASIC_INDEX_NAME, hybridMatchQuery, 10, TEST_TEXT_FIELD_NAME, invalidModelId);
assertNotNull("Search response should contain hits", searchResponse);

// Get stats
String responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
Map<String, Object> allNodesStats = parseAggregatedNodeStatsResponse(responseBody);

// Parse json to get stats
assertEquals(
"Stats should contain the expected number of requests",
8,
getNestedValue(allNodesStats, EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT)
);
assertEquals(
"Stats should contain the expected number of errors",
1,
getNestedValue(allNodesStats, EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT)
);
}

private void verifyHighlightResults(Map<String, Object> searchResponse, String expectedContent) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.opensearch.search.lookup.SourceLookup;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.neuralsearch.highlight.extractor.QueryTextExtractorRegistry;
import org.opensearch.neuralsearch.settings.NeuralSearchSettingsAccessor;
import org.opensearch.neuralsearch.stats.events.EventStatsManager;

import java.util.ArrayList;
import java.util.HashMap;
Expand Down Expand Up @@ -67,6 +69,11 @@ public void setUp() throws Exception {

fieldType = new TextFieldMapper.TextFieldType(TEST_FIELD);

// Initialize EventStatsManager for tests
NeuralSearchSettingsAccessor mockSettingsAccessor = mock(NeuralSearchSettingsAccessor.class);
when(mockSettingsAccessor.isStatsEnabled()).thenReturn(true);
EventStatsManager.instance().initialize(mockSettingsAccessor);

// Setup default mock behavior
setupDefaultMockBehavior();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/
package org.opensearch.neuralsearch.stats.events;

import org.opensearch.test.OpenSearchTestCase;
import org.junit.Before;

import java.util.EnumSet;
import java.util.Locale;

public class SemanticHighlightingStatsTests extends OpenSearchTestCase {
private static final EnumSet<EventStatName> SEMANTIC_HIGHLIGHTING_STATS = EnumSet.of(
EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT,
EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT
);

@Before
public void setUp() throws Exception {
super.setUp();
}

public void testSemanticHighlightingStatsExist() {
// Verify that both semantic highlighting stats exist in the enum
assertTrue(
"SEMANTIC_HIGHLIGHTING_REQUEST_COUNT should exist",
EnumSet.allOf(EventStatName.class).contains(EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT)
);
assertTrue(
"SEMANTIC_HIGHLIGHTING_ERROR_COUNT should exist",
EnumSet.allOf(EventStatName.class).contains(EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT)
);
}

public void testSemanticHighlightingStatsPaths() {
// Verify that both stats have the correct path
assertEquals(
"SEMANTIC_HIGHLIGHTING_REQUEST_COUNT should have correct path",
"semantic_highlighting",
EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT.getPath()
);
assertEquals(
"SEMANTIC_HIGHLIGHTING_ERROR_COUNT should have correct path",
"semantic_highlighting",
EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT.getPath()
);
}

public void testSemanticHighlightingStatsNames() {
// Verify that both stats have the correct name strings
assertEquals(
"SEMANTIC_HIGHLIGHTING_REQUEST_COUNT should have correct name",
"request_count",
EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT.getNameString()
);
assertEquals(
"SEMANTIC_HIGHLIGHTING_ERROR_COUNT should have correct name",
"error_count",
EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT.getNameString()
);
}

public void testSemanticHighlightingStatsFullPaths() {
// Verify that both stats have the correct full paths
assertEquals(
"SEMANTIC_HIGHLIGHTING_REQUEST_COUNT should have correct full path",
"semantic_highlighting.request_count",
EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT.getFullPath()
);
assertEquals(
"SEMANTIC_HIGHLIGHTING_ERROR_COUNT should have correct full path",
"semantic_highlighting.error_count",
EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT.getFullPath()
);
}

public void testSemanticHighlightingStatsTypes() {
// Verify that both stats have the correct type
for (EventStatName stat : SEMANTIC_HIGHLIGHTING_STATS) {
assertEquals("Stat should be of type TIMESTAMPED_EVENT_COUNTER", EventStatType.TIMESTAMPED_EVENT_COUNTER, stat.getStatType());
}
}

public void testSemanticHighlightingStatsEventStats() {
// Verify that both stats have non-null event stats
for (EventStatName stat : SEMANTIC_HIGHLIGHTING_STATS) {
assertNotNull("Event stat should not be null", stat.getEventStat());
assertTrue("Event stat should be instance of TimestampedEventStat", stat.getEventStat() instanceof TimestampedEventStat);
}
}

public void testSemanticHighlightingStatsFromString() {
// Test looking up stats by their name strings
assertEquals(
"Should find SEMANTIC_HIGHLIGHTING_REQUEST_COUNT by name",
EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT,
EventStatName.from("request_count")
);
assertEquals(
"Should find SEMANTIC_HIGHLIGHTING_ERROR_COUNT by name",
EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT,
EventStatName.from("error_count")
);

// Test invalid name
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> EventStatName.from("invalid_stat_name"));
assertTrue(
"Error message should mention invalid stat name",
exception.getMessage().toLowerCase(Locale.ROOT).contains("event stat not found")
);
}
}
Loading