From b1d36712ac1af950379e2052c478d2cd7efa5f0c Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Mon, 23 Jun 2025 19:03:26 +0200 Subject: [PATCH 1/6] index create fixes --- specification/_types/analysis/icu-plugin.ts | 8 ++-- .../_types/analysis/token_filters.ts | 43 ++++++++++++++++++- specification/_types/analysis/tokenizers.ts | 5 +-- specification/_types/mapping/Property.ts | 2 + specification/_types/mapping/complex.ts | 2 + specification/_types/mapping/core.ts | 13 ++++++ specification/_types/mapping/geo.ts | 7 +++ specification/indices/_types/IndexSettings.ts | 33 +------------- 8 files changed, 73 insertions(+), 40 deletions(-) diff --git a/specification/_types/analysis/icu-plugin.ts b/specification/_types/analysis/icu-plugin.ts index 92659e58b0..9f0ed47802 100644 --- a/specification/_types/analysis/icu-plugin.ts +++ b/specification/_types/analysis/icu-plugin.ts @@ -52,16 +52,16 @@ export class IcuFoldingTokenFilter extends TokenFilterBase { export class IcuCollationTokenFilter extends TokenFilterBase { type: 'icu_collation' alternate?: IcuCollationAlternate - case_first?: IcuCollationCaseFirst - case_level?: boolean + caseFirst?: IcuCollationCaseFirst + caseLevel?: boolean country?: string decomposition?: IcuCollationDecomposition - hiragana_quaternary_mode?: boolean + hiraganaQuaternaryMode?: boolean language?: string numeric?: boolean rules?: string strength?: IcuCollationStrength - variable_top?: string + variableTop?: string variant?: string } diff --git a/specification/_types/analysis/token_filters.ts b/specification/_types/analysis/token_filters.ts index 6265537816..829b7b9cfd 100644 --- a/specification/_types/analysis/token_filters.ts +++ b/specification/_types/analysis/token_filters.ts @@ -274,7 +274,7 @@ export class HunspellTokenFilter extends TokenFilterBase { dictionary?: string /** Locale directory used to specify the `.aff` and `.dic` files for a Hunspell dictionary. * @aliases lang, language */ - locale: string + locale?: string /** If `true`, only the longest stemmed version of each token is included in the output. If `false`, all stemmed versions of the token are included. Defaults to `false`. */ longest_only?: boolean } @@ -383,6 +383,7 @@ export class PatternReplaceTokenFilter extends TokenFilterBase { type: 'pattern_replace' /** If `true`, all substrings matching the pattern parameter’s regular expression are replaced. If `false`, the filter replaces only the first matching substring in each token. Defaults to `true`. */ all?: boolean + flags?: string /** Regular expression, written in Java’s regular expression syntax. The filter replaces token substrings matching this pattern with the substring in the `replacement` parameter. */ pattern: string /** Replacement substring. Defaults to an empty substring (`""`). */ @@ -486,6 +487,14 @@ export class FlattenGraphTokenFilter extends TokenFilterBase { type: 'flatten_graph' } +export class BengaliNormalizationTokenFilter extends TokenFilterBase { + type: 'bengali_normalization' +} + +export class BrazilianNormalizationTokenFilter extends TokenFilterBase { + type: 'brazilian_stem' +} + export class GermanNormalizationTokenFilter extends TokenFilterBase { type: 'german_normalization' } @@ -535,6 +544,30 @@ export class SoraniNormalizationTokenFilter extends TokenFilterBase { type: 'sorani_normalization' } +export class ArabicStemTokenFilter extends TokenFilterBase { + type: 'arabic_stem' +} + +export class CzechStemTokenFilter extends TokenFilterBase { + type: 'czech_stem' +} + +export class FrenchStemTokenFilter extends TokenFilterBase { + type: 'french_stem' +} + +export class DutchStemTokenFilter extends TokenFilterBase { + type: 'dutch_stem' +} + +export class GermanStemTokenFilter extends TokenFilterBase { + type: 'german_stem' +} + +export class RussianStemTokenFilter extends TokenFilterBase { + type: 'russian_stem' +} + /** * @codegen_names name, definition * @ext_doc_id analysis-tokenfilters @@ -548,20 +581,27 @@ export type TokenFilter = string | TokenFilterDefinition */ export type TokenFilterDefinition = | ApostropheTokenFilter + | ArabicStemTokenFilter | ArabicNormalizationTokenFilter | AsciiFoldingTokenFilter + | BengaliNormalizationTokenFilter + | BrazilianNormalizationTokenFilter | CjkBigramTokenFilter | CjkWidthTokenFilter | ClassicTokenFilter | CommonGramsTokenFilter | ConditionTokenFilter + | CzechStemTokenFilter | DecimalDigitTokenFilter | DelimitedPayloadTokenFilter + | DutchStemTokenFilter | EdgeNGramTokenFilter | ElisionTokenFilter | FingerprintTokenFilter | FlattenGraphTokenFilter + | FrenchStemTokenFilter | GermanNormalizationTokenFilter + | GermanStemTokenFilter | HindiNormalizationTokenFilter | HunspellTokenFilter | HyphenationDecompounderTokenFilter @@ -585,6 +625,7 @@ export type TokenFilterDefinition = | PredicateTokenFilter | RemoveDuplicatesTokenFilter | ReverseTokenFilter + | RussianStemTokenFilter | ScandinavianFoldingTokenFilter | ScandinavianNormalizationTokenFilter | SerbianNormalizationTokenFilter diff --git a/specification/_types/analysis/tokenizers.ts b/specification/_types/analysis/tokenizers.ts index a7f53675fe..649b555eea 100644 --- a/specification/_types/analysis/tokenizers.ts +++ b/specification/_types/analysis/tokenizers.ts @@ -50,10 +50,7 @@ export class EdgeNGramTokenizer extends TokenizerBase { custom_token_chars?: string max_gram?: integer min_gram?: integer - /** - * @server_default [] - */ - token_chars?: TokenChar[] + token_chars?: string | TokenChar[] } export enum TokenChar { diff --git a/specification/_types/mapping/Property.ts b/specification/_types/mapping/Property.ts index bc34cf66d9..55f53b92d5 100644 --- a/specification/_types/mapping/Property.ts +++ b/specification/_types/mapping/Property.ts @@ -59,6 +59,7 @@ import { PercolatorProperty, RankFeatureProperty, RankFeaturesProperty, + RankVectorProperty, ScaledFloatNumberProperty, SearchAsYouTypeProperty, SemanticTextProperty, @@ -147,6 +148,7 @@ export type Property = | NestedProperty | ObjectProperty | PassthroughObjectProperty + | RankVectorProperty | SemanticTextProperty | SparseVectorProperty diff --git a/specification/_types/mapping/complex.ts b/specification/_types/mapping/complex.ts index 3f83b38aae..4de0ba1a85 100644 --- a/specification/_types/mapping/complex.ts +++ b/specification/_types/mapping/complex.ts @@ -33,6 +33,7 @@ export class FlattenedProperty extends PropertyBase { null_value?: string similarity?: string split_queries_on_whitespace?: boolean + time_series_dimensions?: string[] type: 'flattened' } @@ -59,6 +60,7 @@ export class PassthroughObjectProperty extends CorePropertyBase { export class AggregateMetricDoubleProperty extends PropertyBase { type: 'aggregate_metric_double' default_metric: string + ignore_malformed?: boolean metrics: string[] time_series_metric?: TimeSeriesMetricType } diff --git a/specification/_types/mapping/core.ts b/specification/_types/mapping/core.ts index 01a2f6400b..f43b425289 100644 --- a/specification/_types/mapping/core.ts +++ b/specification/_types/mapping/core.ts @@ -213,7 +213,14 @@ export class RankFeaturesProperty extends PropertyBase { type: 'rank_features' } +export class RankVectorProperty extends PropertyBase { + type: 'rank_vectors' + element_type?: RankVectorElementType + dims?: integer +} + export class SparseVectorProperty extends PropertyBase { + store?: boolean type: 'sparse_vector' } @@ -364,3 +371,9 @@ export class DynamicProperty extends DocValuesPropertyBase { precision_step?: integer locale?: string } + +export enum RankVectorElementType { + byte, + float, + bit +} diff --git a/specification/_types/mapping/geo.ts b/specification/_types/mapping/geo.ts index 86f14f4aa0..f0471115f1 100644 --- a/specification/_types/mapping/geo.ts +++ b/specification/_types/mapping/geo.ts @@ -29,6 +29,13 @@ export class GeoPointProperty extends DocValuesPropertyBase { on_script_error?: OnScriptError script?: Script type: 'geo_point' + time_series_metric?: GeoPointMetricType +} + +export enum GeoPointMetricType { + gauge, + counter, + position } export enum GeoOrientation { diff --git a/specification/indices/_types/IndexSettings.ts b/specification/indices/_types/IndexSettings.ts index c416167cce..3a64c9e58c 100644 --- a/specification/indices/_types/IndexSettings.ts +++ b/specification/indices/_types/IndexSettings.ts @@ -534,7 +534,6 @@ export class SlowlogTresholdLevels { } export class Storage { - type: StorageType /** * You can restrict the use of the mmapfs and the related hybridfs store type via the setting node.store.allow_mmap. * This is a boolean setting indicating whether or not memory-mapping is allowed. The default is to allow it. This @@ -542,36 +541,8 @@ export class Storage { * of memory maps so you need disable the ability to use memory-mapping. */ allow_mmap?: boolean -} - -/** - * @non_exhaustive - */ -export enum StorageType { - /** - * Default file system implementation. This will pick the best implementation depending on the operating environment, which - * is currently hybridfs on all supported systems but is subject to change. - */ - fs, - /** - * The NIO FS type stores the shard index on the file system (maps to Lucene NIOFSDirectory) using NIO. It allows multiple - * threads to read from the same file concurrently. It is not recommended on Windows because of a bug in the SUN Java - * implementation and disables some optimizations for heap memory usage. - */ - niofs, - /** - * The MMap FS type stores the shard index on the file system (maps to Lucene MMapDirectory) by mapping a file into - * memory (mmap). Memory mapping uses up a portion of the virtual memory address space in your process equal to the size - * of the file being mapped. Before using this class, be sure you have allowed plenty of virtual address space. - */ - mmapfs, - /** - * The hybridfs type is a hybrid of niofs and mmapfs, which chooses the best file system type for each type of file - * based on the read access pattern. Currently only the Lucene term dictionary, norms and doc values files are memory - * mapped. All other files are opened using Lucene NIOFSDirectory. Similarly to mmapfs be sure you have allowed - * plenty of virtual address space. - */ - hybridfs + /** How often store statistics are refreshed */ + stats_refresh_interval?: Duration } export class IndexingPressure { From 597638944386c76cedbcc66a54d62ef92c81a393 Mon Sep 17 00:00:00 2001 From: Laura Trotta <153528055+l-trotta@users.noreply.github.com> Date: Thu, 26 Jun 2025 16:34:54 +0200 Subject: [PATCH 2/6] Update specification/_types/analysis/token_filters.ts Co-authored-by: Quentin Pradet --- specification/_types/analysis/token_filters.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/_types/analysis/token_filters.ts b/specification/_types/analysis/token_filters.ts index 829b7b9cfd..c19d72877c 100644 --- a/specification/_types/analysis/token_filters.ts +++ b/specification/_types/analysis/token_filters.ts @@ -491,7 +491,7 @@ export class BengaliNormalizationTokenFilter extends TokenFilterBase { type: 'bengali_normalization' } -export class BrazilianNormalizationTokenFilter extends TokenFilterBase { +export class BrazilianStemTokenFilter extends TokenFilterBase { type: 'brazilian_stem' } From 01abd3f813602a8b6394bd7d3ad28844a2c7255c Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Thu, 26 Jun 2025 16:50:19 +0200 Subject: [PATCH 3/6] addressing review --- .../_types/analysis/token_filters.ts | 2 +- specification/indices/_types/IndexSettings.ts | 31 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/specification/_types/analysis/token_filters.ts b/specification/_types/analysis/token_filters.ts index c19d72877c..d4f5176994 100644 --- a/specification/_types/analysis/token_filters.ts +++ b/specification/_types/analysis/token_filters.ts @@ -585,7 +585,7 @@ export type TokenFilterDefinition = | ArabicNormalizationTokenFilter | AsciiFoldingTokenFilter | BengaliNormalizationTokenFilter - | BrazilianNormalizationTokenFilter + | BrazilianStemTokenFilter | CjkBigramTokenFilter | CjkWidthTokenFilter | ClassicTokenFilter diff --git a/specification/indices/_types/IndexSettings.ts b/specification/indices/_types/IndexSettings.ts index 3a64c9e58c..531062ffc0 100644 --- a/specification/indices/_types/IndexSettings.ts +++ b/specification/indices/_types/IndexSettings.ts @@ -534,6 +534,7 @@ export class SlowlogTresholdLevels { } export class Storage { + type: StorageType /** * You can restrict the use of the mmapfs and the related hybridfs store type via the setting node.store.allow_mmap. * This is a boolean setting indicating whether or not memory-mapping is allowed. The default is to allow it. This @@ -545,6 +546,36 @@ export class Storage { stats_refresh_interval?: Duration } +/** + * @non_exhaustive + */ +export enum StorageType { + /** + * Default file system implementation. This will pick the best implementation depending on the operating environment, which + * is currently hybridfs on all supported systems but is subject to change. + */ + fs, + /** + * The NIO FS type stores the shard index on the file system (maps to Lucene NIOFSDirectory) using NIO. It allows multiple + * threads to read from the same file concurrently. It is not recommended on Windows because of a bug in the SUN Java + * implementation and disables some optimizations for heap memory usage. + */ + niofs, + /** + * The MMap FS type stores the shard index on the file system (maps to Lucene MMapDirectory) by mapping a file into + * memory (mmap). Memory mapping uses up a portion of the virtual memory address space in your process equal to the size + * of the file being mapped. Before using this class, be sure you have allowed plenty of virtual address space. + */ + mmapfs, + /** + * The hybridfs type is a hybrid of niofs and mmapfs, which chooses the best file system type for each type of file + * based on the read access pattern. Currently only the Lucene term dictionary, norms and doc values files are memory + * mapped. All other files are opened using Lucene NIOFSDirectory. Similarly to mmapfs be sure you have allowed + * plenty of virtual address space. + */ + hybridfs +} + export class IndexingPressure { memory: IndexingPressureMemory } From f90bf1be9d21ecd866e9d931d63948314f34d456 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Thu, 26 Jun 2025 16:56:25 +0200 Subject: [PATCH 4/6] persian stem add --- specification/_types/analysis/token_filters.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/specification/_types/analysis/token_filters.ts b/specification/_types/analysis/token_filters.ts index d4f5176994..6bc1ac98bc 100644 --- a/specification/_types/analysis/token_filters.ts +++ b/specification/_types/analysis/token_filters.ts @@ -568,6 +568,10 @@ export class RussianStemTokenFilter extends TokenFilterBase { type: 'russian_stem' } +export class PersianStemTokenFilter extends TokenFilterBase { + type: 'persian_stem' +} + /** * @codegen_names name, definition * @ext_doc_id analysis-tokenfilters @@ -621,6 +625,7 @@ export type TokenFilterDefinition = | PatternCaptureTokenFilter | PatternReplaceTokenFilter | PersianNormalizationTokenFilter + | PersianStemTokenFilter | PorterStemTokenFilter | PredicateTokenFilter | RemoveDuplicatesTokenFilter From a816e1b997f227be1e6070dde1182fe7e9c7653f Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Fri, 27 Jun 2025 15:38:57 +0200 Subject: [PATCH 5/6] add comment --- specification/_types/mapping/core.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/specification/_types/mapping/core.ts b/specification/_types/mapping/core.ts index f43b425289..66c4c6c952 100644 --- a/specification/_types/mapping/core.ts +++ b/specification/_types/mapping/core.ts @@ -213,6 +213,9 @@ export class RankFeaturesProperty extends PropertyBase { type: 'rank_features' } +/** + * Technical preview + */ export class RankVectorProperty extends PropertyBase { type: 'rank_vectors' element_type?: RankVectorElementType From 779cdd22cf35fba894f7fc967b0774b86d826036 Mon Sep 17 00:00:00 2001 From: Laura Trotta Date: Fri, 27 Jun 2025 15:40:10 +0200 Subject: [PATCH 6/6] revert making locale optional just for validation --- specification/_types/analysis/token_filters.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/_types/analysis/token_filters.ts b/specification/_types/analysis/token_filters.ts index 6bc1ac98bc..fc34cac69d 100644 --- a/specification/_types/analysis/token_filters.ts +++ b/specification/_types/analysis/token_filters.ts @@ -274,7 +274,7 @@ export class HunspellTokenFilter extends TokenFilterBase { dictionary?: string /** Locale directory used to specify the `.aff` and `.dic` files for a Hunspell dictionary. * @aliases lang, language */ - locale?: string + locale: string /** If `true`, only the longest stemmed version of each token is included in the output. If `false`, all stemmed versions of the token are included. Defaults to `false`. */ longest_only?: boolean }