3131import org .apache .lucene .util .BytesRef ;
3232import org .apache .lucene .util .CollectionUtil ;
3333import org .elasticsearch .common .Strings ;
34+ import org .elasticsearch .common .logging .DeprecationLogger ;
35+ import org .elasticsearch .common .logging .Loggers ;
3436import org .elasticsearch .common .text .Text ;
3537import org .elasticsearch .index .mapper .DocumentMapper ;
3638import org .elasticsearch .index .mapper .FieldMapper ;
3739import org .elasticsearch .index .mapper .KeywordFieldMapper ;
3840import org .elasticsearch .index .mapper .MappedFieldType ;
39- import org .elasticsearch .index .mapper .MapperService ;
4041import org .elasticsearch .search .fetch .FetchPhaseExecutionException ;
4142import org .elasticsearch .search .fetch .FetchSubPhase ;
4243import org .elasticsearch .search .internal .SearchContext ;
44+ import org .elasticsearch .index .IndexSettings ;
4345
4446import java .io .IOException ;
4547import java .text .BreakIterator ;
4648import java .util .ArrayList ;
47- import java .util .HashMap ;
4849import java .util .List ;
4950import java .util .Locale ;
50- import java .util .Map ;
5151import java .util .stream .Collectors ;
5252
5353import static org .apache .lucene .search .uhighlight .CustomUnifiedHighlighter .MULTIVAL_SEP_CHAR ;
5454
5555public class UnifiedHighlighter implements Highlighter {
56+ private static final DeprecationLogger deprecationLogger = new DeprecationLogger (Loggers .getLogger (UnifiedHighlighter .class ));
57+
5658 @ Override
5759 public boolean canHighlight (FieldMapper fieldMapper ) {
5860 return true ;
@@ -67,8 +69,6 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
6769 Encoder encoder = field .fieldOptions ().encoder ().equals ("html" ) ? HighlightUtils .Encoders .HTML : HighlightUtils .Encoders .DEFAULT ;
6870 CustomPassageFormatter passageFormatter = new CustomPassageFormatter (field .fieldOptions ().preTags ()[0 ],
6971 field .fieldOptions ().postTags ()[0 ], encoder );
70- final int maxAnalyzedOffset = context .indexShard ().indexSettings ().getHighlightMaxAnalyzedOffset ();
71-
7272 List <Snippet > snippets = new ArrayList <>();
7373 int numberOfFragments ;
7474 try {
@@ -83,21 +83,41 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
8383 final CustomUnifiedHighlighter highlighter ;
8484 final String fieldValue = mergeFieldValues (fieldValues , MULTIVAL_SEP_CHAR );
8585 final OffsetSource offsetSource = getOffsetSource (fieldMapper .fieldType ());
86+
87+ final int maxAnalyzedOffset = context .indexShard ().indexSettings ().getHighlightMaxAnalyzedOffset ();
88+ // Issue a deprecation warning if maxAnalyzedOffset is not set, and field length > default setting for 7.0
89+ final int maxAnalyzedOffset7 = 1000000 ;
90+ if ((offsetSource == OffsetSource .ANALYSIS ) && (maxAnalyzedOffset == -1 ) && (fieldValue .length () > maxAnalyzedOffset7 )) {
91+ deprecationLogger .deprecated (
92+ "The length [" + fieldValue .length () + "] of [" + highlighterContext .fieldName + "] field of [" +
93+ hitContext .hit ().getId () + "] doc of [" + context .indexShard ().shardId ().getIndexName () + "] index has " +
94+ "exceeded the allowed maximum of [" + maxAnalyzedOffset7 + "] set for the next major Elastic version. " +
95+ "This maximum can be set by changing the [" + IndexSettings .MAX_ANALYZED_OFFSET_SETTING .getKey () +
96+ "] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!" );
97+ }
98+ // Throw an error if maxAnalyzedOffset is explicitly set by the user, and field length > maxAnalyzedOffset
99+ if ((offsetSource == OffsetSource .ANALYSIS ) && (maxAnalyzedOffset > 0 ) && (fieldValue .length () > maxAnalyzedOffset )) {
100+ throw new IllegalArgumentException (
101+ "The length [" + fieldValue .length () + "] of [" + highlighterContext .fieldName + "] field of [" +
102+ hitContext .hit ().getId () + "] doc of [" + context .indexShard ().shardId ().getIndexName () + "] index " +
103+ "has exceeded [" + maxAnalyzedOffset + "] - maximum allowed to be analyzed for highlighting. " +
104+ "This maximum can be set by changing the [" + IndexSettings .MAX_ANALYZED_OFFSET_SETTING .getKey () +
105+ "] index level setting. " + "For large texts, indexing with offsets or term vectors is recommended!" );
106+ }
107+
86108 if (field .fieldOptions ().numberOfFragments () == 0 ) {
87109 // we use a control char to separate values, which is the only char that the custom break iterator
88110 // breaks the text on, so we don't lose the distinction between the different values of a field and we
89111 // get back a snippet per value
90112 CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator (MULTIVAL_SEP_CHAR );
91113 highlighter = new CustomUnifiedHighlighter (searcher , analyzer , offsetSource , passageFormatter ,
92- field .fieldOptions ().boundaryScannerLocale (), breakIterator , fieldValue , field .fieldOptions ().noMatchSize (),
93- maxAnalyzedOffset );
114+ field .fieldOptions ().boundaryScannerLocale (), breakIterator , fieldValue , field .fieldOptions ().noMatchSize ());
94115 numberOfFragments = fieldValues .size (); // we are highlighting the whole content, one snippet per value
95116 } else {
96117 //using paragraph separator we make sure that each field value holds a discrete passage for highlighting
97118 BreakIterator bi = getBreakIterator (field );
98119 highlighter = new CustomUnifiedHighlighter (searcher , analyzer , offsetSource , passageFormatter ,
99- field .fieldOptions ().boundaryScannerLocale (), bi ,
100- fieldValue , field .fieldOptions ().noMatchSize (), maxAnalyzedOffset );
120+ field .fieldOptions ().boundaryScannerLocale (), bi , fieldValue , field .fieldOptions ().noMatchSize ());
101121 numberOfFragments = field .fieldOptions ().numberOfFragments ();
102122 }
103123
0 commit comments