Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Performance Improvement] Add custom bulk scorer for hybrid query (2-3x faster) ([#1289](https://github.com/opensearch-project/neural-search/pull/1289))
- [Stats] Add stats for text chunking processor algorithms ([#1308](https://github.com/opensearch-project/neural-search/pull/1308))
- Support custom weights in RRF normalization processor ([#1322](https://github.com/opensearch-project/neural-search/pull/1322))
- [Stats] Add stats tracking for semantic highlighting ([#1327](https://github.com/opensearch-project/neural-search/pull/1327))

### Bug Fixes
- Fix score value as null for single shard when sorting is not done on score field ([#1277](https://github.com/opensearch-project/neural-search/pull/1277))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import lombok.extern.log4j.Log4j2;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.neuralsearch.stats.events.EventStatName;
import org.opensearch.neuralsearch.stats.events.EventStatsManager;
import org.opensearch.search.fetch.subphase.highlight.FieldHighlightContext;
import org.opensearch.search.fetch.subphase.highlight.HighlightField;
import org.opensearch.search.fetch.subphase.highlight.Highlighter;
Expand Down Expand Up @@ -46,6 +48,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) {
throw new IllegalStateException("SemanticHighlighter has not been initialized");
}

EventStatsManager.increment(EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT);

// Extract field text
String fieldText = semanticHighlighterEngine.getFieldText(fieldContext);

Expand All @@ -57,21 +61,28 @@ public HighlightField highlight(FieldHighlightContext fieldContext) {

if (originalQueryText == null || originalQueryText.isEmpty()) {
log.warn("No query text found for field {}", fieldContext.fieldName);
EventStatsManager.increment(EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT);
return null;
}

// The pre- and post- tags are provided by the user or defaulted to <em> and </em>
String[] preTags = fieldContext.field.fieldOptions().preTags();
String[] postTags = fieldContext.field.fieldOptions().postTags();

// Get highlighted text - allow any exceptions from this call to propagate
String highlightedResponse = semanticHighlighterEngine.getHighlightedSentences(
modelId,
originalQueryText,
fieldText,
preTags[0],
postTags[0]
);
String highlightedResponse;
try {
// Get highlighted text
highlightedResponse = semanticHighlighterEngine.getHighlightedSentences(
modelId,
originalQueryText,
fieldText,
preTags[0],
postTags[0]
);
} catch (Exception e) {
EventStatsManager.increment(EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT);
throw e;
}

if (highlightedResponse == null || highlightedResponse.isEmpty()) {
log.warn("No highlighted text found for field {}", fieldContext.fieldName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,17 @@ public enum EventStatName implements StatName {
"processors.ingest",
EventStatType.TIMESTAMPED_EVENT_COUNTER
),
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER);
TEXT_CHUNKING_DELIMITER_EXECUTIONS("text_chunking_delimiter_executions", "processors.ingest", EventStatType.TIMESTAMPED_EVENT_COUNTER),
SEMANTIC_HIGHLIGHTING_REQUEST_COUNT(
"semantic_highlighting_request_count",
"semantic_highlighting",
EventStatType.TIMESTAMPED_EVENT_COUNTER
),
SEMANTIC_HIGHLIGHTING_ERROR_COUNT(
"semantic_highlighting_error_count",
"semantic_highlighting",
EventStatType.TIMESTAMPED_EVENT_COUNTER
);

private final String nameString;
private final String path;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import static org.opensearch.neuralsearch.util.TestUtils.TEST_SPACE_TYPE;
import static org.opensearch.neuralsearch.util.TestUtils.createRandomVector;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand All @@ -20,6 +21,7 @@
import org.opensearch.neuralsearch.query.NeuralQueryBuilder;
import org.opensearch.index.query.QueryStringQueryBuilder;
import org.opensearch.index.query.TermQueryBuilder;
import org.opensearch.neuralsearch.stats.events.EventStatName;
import org.opensearch.search.builder.SearchSourceBuilder;

import com.google.common.primitives.Floats;
Expand Down Expand Up @@ -77,7 +79,10 @@ private void initializeTestIndex() {
* 5. Neural Query
* 6. Hybrid Query
*/
@SneakyThrows
public void testQueriesWithSemanticHighlighter() {
// Enable stats for the test
updateClusterSettings("plugins.neural_search.stats_enabled", true);
// Set up models for the test
String textEmbeddingModelId = prepareModel();
String sentenceHighlightingModelId = prepareSentenceHighlightingModel();
Expand Down Expand Up @@ -334,6 +339,27 @@ public void testQueriesWithSemanticHighlighter() {
sentenceHighlightingModelId
);
verifyHighlightResults(searchResponse, "artificial intelligence");

// Verify that invalid model ID is handled gracefully and error stats are incremented
String invalidModelId = "invalid-model-id";
searchResponse = searchWithSemanticHighlighter(TEST_BASIC_INDEX_NAME, hybridMatchQuery, 10, TEST_TEXT_FIELD_NAME, invalidModelId);
assertNotNull("Search response should contain hits", searchResponse);

// Get stats
String responseBody = executeNeuralStatRequest(new ArrayList<>(), new ArrayList<>());
Map<String, Object> allNodesStats = parseAggregatedNodeStatsResponse(responseBody);

// Parse json to get stats
assertEquals(
"Stats should contain the expected number of requests",
8,
getNestedValue(allNodesStats, EventStatName.SEMANTIC_HIGHLIGHTING_REQUEST_COUNT)
);
assertEquals(
"Stats should contain the expected number of errors",
1,
getNestedValue(allNodesStats, EventStatName.SEMANTIC_HIGHLIGHTING_ERROR_COUNT)
);
}

private void verifyHighlightResults(Map<String, Object> searchResponse, String expectedContent) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.opensearch.search.lookup.SourceLookup;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.neuralsearch.highlight.extractor.QueryTextExtractorRegistry;
import org.opensearch.neuralsearch.util.TestUtils;

import java.util.ArrayList;
import java.util.HashMap;
Expand Down Expand Up @@ -67,6 +68,9 @@ public void setUp() throws Exception {

fieldType = new TextFieldMapper.TextFieldType(TEST_FIELD);

// Initialize EventStatsManager for tests
TestUtils.initializeEventStatsManager();

// Setup default mock behavior
setupDefaultMockBehavior();
}
Expand Down
Loading