diff --git a/open-api.yaml b/open-api.yaml index 3e959071..aeadaa3d 100644 --- a/open-api.yaml +++ b/open-api.yaml @@ -2,7 +2,7 @@ openapi: 3.1.0 info: title: Meilisearch Core API description: 'Search documents, configure and manage the Meilisearch engine.' - version: 1.1.0 + version: 1.2.0 contact: name: Meilisearch email: bonjour@Meilisearch.com @@ -1208,7 +1208,7 @@ components: examples: Payload Too Large: value: - message: The provided payload reached the size limit. + message: The provided payload reached the size limit. The maximum accepted payload size is 20.00 MiB. code: payload_too_large type: invalid_request link: 'https://docs.meilisearch.com/errors#payload_too_large' @@ -1664,6 +1664,7 @@ paths: - $ref: '#/components/parameters/limit' - $ref: '#/components/parameters/offset' - $ref: '#/components/parameters/fields' + - $ref: '#/components/parameters/filter' post: operationId: indexes.documents.create summary: Add or replace documents @@ -1805,6 +1806,93 @@ paths: description: Not Found parameters: - $ref: '#/components/parameters/indexUid' + '/indexes/{indexUid}/documents/fetch': + post: + operationId: indexes.documents.fetch + summary: Get Documents + description: | + Get [documents](https://docs.meilisearch.com/learn/core_concepts/documents.html) by batch. + tags: + - Documents + security: + - apiKey: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + offset: + type: number + description: Number of documents to skip. + default: 0 + limit: + type: number + description: Maximum number of documents returned. + default: 20 + fields: + type: array + description: 'Array of attributes whose fields will be present in the returned documents. By default all attributes will be returned.' + items: + type: string + example: '["title", "overview"]' + default: '["*"]' + filter: + $ref: '#/components/schemas/filter' + examples: + Example: + value: + offset: 2 + limit: 5 + fields: + - name + - picture + filter: 'doggo = "bernese mountain"' + responses: + '200': + description: Ok + content: + application/json: + schema: + type: object + properties: + results: + type: array + items: + $ref: '#/components/schemas/document' + limit: + $ref: '#/components/schemas/limit' + offset: + $ref: '#/components/schemas/offset' + total: + $ref: '#/components/schemas/total' + required: + - results + - limit + - offset + - total + examples: + Example: + value: + results: + - id: 25684 + title: American Ninja 5 + poster: 'https://image.tmdb.org/t/p/w1280/iuAQVI4mvjI83wnirpD8GVNRVuY.jpg' + overview: 'When a scientists daughter is kidnapped, American Ninja, attempts to find her, but this time he teams up with a youngster he has trained in the ways of the ninja.' + release_date: 725846400 + - id: 45881 + title: The Bridge of San Luis Rey + poster: 'https://image.tmdb.org/t/p/w500/4X7quIcdkc24Cveg5XdpfRqxtYA.jpg' + overview: "The Bridge of San Luis Rey is American author Thornton Wilder's second novel, first published in 1927 to worldwide acclaim. It tells the story of several interrelated people who die in the collapse of an Inca rope-fiber suspension bridge in Peru, and the events that lead up to their being on the bridge.[ A friar who has witnessed the tragic accident then goes about inquiring into the lives of the victims, seeking some sort of cosmic answer to the question of why each had to die. The novel won the Pulitzer Prize in 1928." + release_date: 1072915200 + limit: 20 + offset: 0 + total: 2 + '401': + $ref: '#/components/responses/401' + '404': + description: Not Found '/indexes/{indexUid}/documents/delete-batch': post: operationId: indexes.documents.removeBatch @@ -1850,6 +1938,51 @@ paths: - $ref: '#/components/parameters/Content-Type' parameters: - $ref: '#/components/parameters/indexUid' + '/indexes/{indexUid}/documents/delete': + post: + operationId: indexes.documents.remove + summary: Delete documents + description: Delete a selection of documents based on a filter. + tags: + - Documents + security: + - apiKey: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + filter: + $ref: '#/components/schemas/filter' + examples: + Example: + value: + offset: 2 + limit: 5 + fields: + - name + - picture + filter: 'doggo = "bernese mountain"' + responses: + '202': + description: Accepted + content: + application/json: + schema: + $ref: '#/components/responses/202' + examples: + '202': + $ref: '#/components/examples/202_documentDeletion' + '401': + $ref: '#/components/responses/401' + '404': + description: Not Found + parameters: + - $ref: '#/components/parameters/Content-Type' + parameters: + - $ref: '#/components/parameters/indexUid' '/indexes/{indexUid}/documents/{documentId}': get: operationId: indexes.documents.get diff --git a/text/0028-indexing-csv.md b/text/0028-indexing-csv.md index 831cbf81..a43cb4e4 100644 --- a/text/0028-indexing-csv.md +++ b/text/0028-indexing-csv.md @@ -38,9 +38,9 @@ While there's [RFC 4180](https://tools.ietf.org/html/rfc4180) as a try to add a - CSV data format needs to contain a first line representing the list of attributes with the optionally chosen type separated from the attribute name by `:` character. The type is case insensitive. -> An attribute can be specificed with two types: `string` or `number`. A missing type will be interpreted as a `string` by default. +> An attribute can be specificed with three types: `string`, `boolean` or `number`. A missing type will be interpreted as a `string` by default. > -> Valid headline example: "id:number","title:string","author","price:number" +> Valid headline example: "id:number","title:string","author","price:number","cute:boolean" - The following CSV lines will represent a document for Meilisearch. - A `,` character must separate each cell. @@ -57,14 +57,15 @@ While there's [RFC 4180](https://tools.ietf.org/html/rfc4180) as a try to add a ##### `null` value - If a field is of type `string`, then an empty cell is considered as a `null` value (e.g. `,,`), anything else is turned into a string value (e.g. `, ,` is a single whitespace string) -- If a field is of type `number`, when the trimmed field is empty, it's considered as a `null` value (e.g. `,,` `, ,`); otherwise Meilisearch try to parse the number. +- If a field is of type `number`, when the trimmed field is empty, it's considered as a `null` value (e.g. `,,` `, ,`); otherwise, Meilisearch tries to parse the number. +- If a field is of type `boolean`, when the trimmed field is empty, it's considered as a `null` value (e.g. `,,` `, ,`); otherwise, Meilisearch tries to parse the boolean as either `true` or `false`. ##### Example with a comma inside a cell Given the CSV payload ``` -"id:number","label","price:number","colors","description" -"1","t-shirt","4.99","red","Thus, you will rock at summer time." +"id:number","label","price:number","colors","description","contains_a_dog_picture:boolean" +"1","t-shirt","4.99","red","Thus, you will rock at summer time.","false" ``` the search result should be displayed as ```json @@ -75,7 +76,8 @@ the search result should be displayed as "label": "t-shirt", "price": 4.99, "colors": "red", - "description": "Hey, you will rock at summer time." + "description": "Thus, you will rock at summer time.", + "contains_a_dog_picture": false } ], ... @@ -172,6 +174,7 @@ curl \ - 🔴 Sending an invalid CSV format will lead to a 400 bad_request - **malformed_payload** error code. - 🔴 Sending a CSV header that does not conform to the specification will lead to a 400 bad_request - **malformed_payload** error code. - 🔴 Sending an invalid csv delimiter: not exactly one ASCII char. This will lead to a 400 bad_request - **invalid_document_csv_delimiter** error code. +- 🔴 Sending a CSV cell with the type `number` or `boolean` that can't be parsed will lead to a 400 bad_request - **malformed_payload** error code. ##### Errors Definition diff --git a/text/0030-asc-desc-criterion.md b/text/0030-asc-desc-criterion.md deleted file mode 100644 index 4b6dd082..00000000 --- a/text/0030-asc-desc-criterion.md +++ /dev/null @@ -1,247 +0,0 @@ -- Title: ASC / DESC Criterion -- Start Date: 2021-04-14 -- Specification PR: [#30](https://github.com/meilisearch/specifications/pull/30) -- MeiliSearch Tracking-Issues: [#161](https://github.com/meilisearch/milli/issues/161) - -# Asc/Desc Criterion - -## 1. Feature Description and Interaction - -### I. Summary - -Ranking rules are built-in rules that ensure relevancy in search results. Ranking rules are applied in a default order which can be changed in the settings. You can add or remove rules and change their order of importance. - -MeiliSearch allows you to create custom rules within the default rules. Custom rules are dedicated to sorting in ascending or descending order on an attribute. - -### II. Motivation - -We want to provide our users with an always improved usage experience. Relevance is essential in a search engine since it is what allows the engine to fulfill its objective. Delivering results that match user demands by allowing modification of the relevance is critical. - -The new search engine called Milli no longer processes this criterion of relevance the same way as the current MeilliSearch. This specification aims to make Milli identical both in API usage and in expected search results as the current ISO version (v0.20). - -### III. Additional Materials - -#### Algolia - -Algolia offers 8 classification rules to achieve relevance. - -- Number of typos -- Geolocation -- Number of words in the query matching in the result -- Filters -- Distance between words -- Best matching attribute in the record -- Number of words matching exactly (without typo) -- Custom ranking - -Note that Algolia doesn't recommend changing the order of the default criteria because of the fact that it works for the vast majority of their use cases. Like MeiliSearch (v0.20), documents, where the attribute specified in the custom ranking is missing, are pushed to the bottom of the search results. - -### IV.Explanation - -#### Current behavior of v0.20 - -In the case of one or many custom ASC / DESC rules configured at different places within the rankings rules, the sorting results can appear undefined when the documents do not necessarily contain the attributes which are set on this rule. - -E.g. -Given this set of documents - -```json -[ - { - "colors": "red", - "id": 1, - "label": "t-shirt", - "product_id": 1, - "price": 4.99 - }, - { - "colors": "black", - "id": 2, - "label": "t-shirt", - "product_id": 1 - }, - { - "colors": "red", - "id": 3, - "label": "t-short", - "product_id": 1, - "price": 19.99 - }, - { - "colors": "red", - "id": 4, - "label": "t-short", - "product_id": 1 - } -] -``` - -Given this custom ordering of ranking rules -```json -[ - "exactness", - "words", - "proximity", - "attribute", - "asc(price)", - "wordsPosition", - "typo" -] -``` - -A search with `q` containing `t-shirt` will return: -```json -{ - "hits": [ - { - "colors": "red", - "id": 1, - "label": "t-shirt", - "product_id": 1, - "price": 4.99 - }, - { - "colors": "black", - "id": 2, - "label": "t-shirt", - "product_id": 1 - }, - { - "colors": "red", - "id": 3, - "label": "t-short", - "product_id": 1, - "price": 19.99 - }, - { - "colors": "red", - "id": 4, - "label": "t-short", - "product_id": 1 - } - ], -} -``` - -A search with `q` containing `t-short` will return: -```json -{ - "hits": [ - { - "colors": "red", - "id": 3, - "label": "t-short", - "product_id": 1, - "price": 19.99 - }, - { - "colors": "red", - "id": 4, - "label": "t-short", - "product_id": 1 - }, - { - "colors": "red", - "id": 1, - "label": "t-shirt", - "product_id": 1, - "price": 4.99 - }, - { - "colors": "black", - "id": 2, - "label": "t-shirt", - "product_id": 1 - } - ] -} -``` - -This is because the exactness criterion is set before the asc/desc criterion. - -A search with `q` containing `t-shart` will return: -```json -{ - "hits": [ - { - "colors": "red", - "id": 1, - "label": "t-shirt", - "product_id": 1, - "price": 4.99 - }, - { - "colors": "red", - "id": 3, - "label": "t-short", - "product_id": 1, - "price": 19.99 - }, - { - "colors": "black", - "id": 2, - "label": "t-shirt", - "product_id": 1 - }, - { - "colors": "red", - "id": 4, - "label": "t-short", - "product_id": 1 - } - ] -} -``` -The results are firstly bucketed by the `asc(price)` criterion before the last `typo` criterion. - -This criterion can only be used with an attribute containing numbers. - -#### Current behavior of Milli - -If the attribute on which the ASC / DESC criterion is configured does not exist in all the documents of the index concerned by the search, Milli will discard them and never return them. This behavior is not necessarily what a end user would expect when searching. - -As v0.20, Milli can’t handle ASC / DESC criterion on string. Only numbers allow its use. - -#### Decisions - -1. We thought about allowing filtering of the attributes used in the ASC / DESC criterion as long as it would also be declared in the `attributesForFaceting` parameter. - -✅ We have decided that Milli will act exactly as MeiliSearch: no need to declare the ranking rule field in `attributeForFaceting` since the rankings rules and the attributes for faceting do not strictly meet the same needs. Moreover, this could confuse the users configuring the search engine. The criterion configuration will remain on the `rankingRrules` field of the [global settings endpoint](https://docs.meilisearch.com/reference/api/settings.html#get-settings) and in the -[specific ranking rules setting endpoint](https://docs.meilisearch.com/reference/api/ranking_rules.html). - -2. We wondered if Milli's current behavior, which is to discard documents from search results that do not have the attribute configured as ascending and descending criteria might make sense for the purpose of a user performing a search. - -✅ We have decided that Milli will act exactly as MeiliSearch concerning search results. The ASC / DESC criterion will no longer discard documents which do not have the attribute. - -### V. Impact on documentation - -- It should inform users that having custom ASC / DESC criteria on attributes that are not always defined for each document may return results that are not necessarily relevant in the end user's eye. -> Already done by https://github.com/meilisearch/documentation/pull/905 - -- To be able to use this criterion on a date format, it must indicate that the timestamp is to be preferred since the string type is not supported. -> Related to https://github.com/meilisearch/documentation/issues/840. - -### VI. Impact on SDKs -N/A - -## 2. Technical Aspects - -To apply the ranking rule, the search engine needs to create a database. This database is the same as we create to apply facets and filters on an attribute. -However, the users will not pass the attribute into `attributeForFaceting` when setting a ranking rule. It means the search engine must create this database for the related attribute. - -ex: -```json -{ - "rankingRules": ["words", "typo", "asc(price)", "proximity"], - "attributesForFaceting": ["genre"] -} -``` -means the search engine has to create facet databases for `genre` and `price`. - -⚠️ Following this example, it also means the search engine would be technically able to apply filters and facet distribution on `price`, however, we should prevent this. To avoid confusion, the search engine should prevent the users to execute a filter or get facet distribution on the ranking attributes. Only the ranking rule should be available for this field. - -If the user wants to filter on that attribute, he will have to add it in `attributesForFaceting` as well. - -## 3. Future possibilities - -- ASC / DESC criterion on string value. \ No newline at end of file diff --git a/text/0034-telemetry-policies.md b/text/0034-telemetry-policies.md index 442f363a..01e8e8b6 100644 --- a/text/0034-telemetry-policies.md +++ b/text/0034-telemetry-policies.md @@ -43,7 +43,9 @@ The collected data is sent to [Segment](https://segment.com/). Segment is a plat | Documents Searched by Multi-Search POST | Aggregated event on all received requests via the `POST`- `/multi-search` route during one hour or until a batch size reaches `500Kb`. | | Documents Added | Aggregated event on all received requests via the `POST` - `/indexes/:indexUid/documents` route during one hour or until a batch size reaches `500Kb`. | | Documents Updated | Aggregated event on all received requests via the `PUT` - `/indexes/:indexUid/documents` route during one hour or until a batch size reaches `500Kb`. | -| Documents Deleted | Aggregated event on all received requests via the `DELETE` - `/indexes/:indexUid/documents`, `DELETE` - `/indexes/:indexUid/documents/:documentId`, `POST` - `indexes/:indexUid/documents/delete-batch` routes during one hour or until a batch size reaches `500Kb`. | +| Documents Fetched GET | Aggregated event on all received requests via the `GET` - `/indexes/:indexUid/documents` or `GET` - `/indexes/:indexUid/documents/:doc_id` routes during one hour or until a batch size reaches `500Kb`. | +| Documents Fetched POST | Aggregated event on all received requests via the `POST` - `/indexes/:indexUid/documents/fetch` routes during one hour or until a batch size reaches `500Kb`. | +| Documents Deleted | Aggregated event on all received requests via the `DELETE` - `/indexes/:indexUid/documents`, `DELETE` - `/indexes/:indexUid/documents/:documentId`, `POST` - `indexes/:indexUid/documents/delete-batch`, and `POST` - `indexes/:indexUid/documents/delete` routes during one hour or until a batch size reaches `500Kb`. | | Index Created | Occurs when an index is created via `POST` - `/indexes`. | | Index Updated | Occurs when an index is updated via `PUT` - `/indexes/:indexUid`. | | Indexes Swapped | Occurs when indexes are swapped via `POST` - `/swap-indexes`. | @@ -98,7 +100,8 @@ The collected data is sent to [Segment](https://segment.com/). Segment is a plat | `infos.ssl_resumption` | `true` if `--ssl-resumption`/`MEILI_SSL_RESUMPTION` is specified, otherwise `false` | false | Every Hour | | `infos.ssl_tickets` | `true` if `--ssl-tickets`/`MEILI_SSL_TICKETS` is specified, otherwise `false` | false | Every Hour | | `infos.with_configuration_file` | `true` if the instance is launched with a configuration file, otherwise `false` | false | Every Hour | -| `infos.experimental_enable_metrics` | `true` if `--experimental-enable-metrics`/`MEILI_EXPERIMENTAL_ENABLE_METRICS` is specified at launch, otherwise `false` | `false` | Every Hour | +| `infos.experimental_enable_metrics` | `true` if `--experimental-enable-metrics`/`MEILI_EXPERIMENTAL_ENABLE_METRICS` is specified at launch, otherwise `false` | `false` | Every Hour | +| `infos.experimental_reduce_indexing_memory_usage` | `true` if `--experimental-reduce-indexing-memory-usage`/`MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE` is specified at launch, otherwise `false` | `false` | Every Hour | | `system.distribution` | Distribution on which MeiliSearch is launched | Arch Linux | Every hour | | `system.kernel_version` | Kernel version on which MeiliSearch is launched | 5.14.10 | Every hour | | `system.cores` | Number of cores | 24 | Every hour | @@ -112,15 +115,15 @@ The collected data is sent to [Segment](https://segment.com/). Segment is a plat | `requests.99th_response_time` | Highest latency from among the fastest 99% of successful search requests | 57ms | `Documents Searched POST`, `Documents Searched GET`| | `requests.total_succeeded` | Total number of successful requests in this batch | 3456 | `Documents Searched POST`, `Documents Searched GET`, `Documents Searched by Multi-Search POST` | | `requests.total_failed` | Total number of failed requests in this batch | 24 | `Documents Searched POST`, `Documents Searched GET`, `Documents Searched by Multi-Search POST` | -| `requests.total_received` | Total number of received requests in this batch | 3480 | `Documents Searched POST`, `Documents Searched GET`, `Documents Deleted`, `Health Seen`, `Tasks Seen`, `Documents Searched by Multi-Search POST` | +| `requests.total_received` | Total number of received requests in this batch | 3480 | `Documents Searched POST`, `Documents Searched GET`, `Documents Deleted`, `Documents Fetched GET`, `Documents Fetched POST`, `Health Seen`, `Tasks Seen`, `Documents Searched by Multi-Search POST` | | `sort.with_geoPoint` | `true` if the sort rule `_geoPoint` was used in this batch, otherwise `false` | true | `Documents Searched POST`, `Documents Searched GET` | | `sort.avg_criteria_number` | Average number of sort criteria among all requests containing the `sort` parameter in this batch | 2 | `Documents Searched POST`, `Documents Searched GET` | | `filter.with_geoRadius` | `true` if the filter rule `_geoRadius` was used in this batch, otherwise `false` | false | `Documents Searched POST`, `Documents Searched GET` | | `filter.with_geoBoundingBox` | `true` if the filter rule `_geoBoundingBox` was used in this batch, otherwise `false`| false | `Documents Searched POST`, `Documents Searched GET` | | `filter.most_used_syntax` | Most used filter syntax among all requests containing the `filter` parameter in this batch | string | `Documents Searched POST`, `Documents Searched GET` | | `q.max_terms_number` | Highest number of terms given for the `q` parameter in this batch | 5 | `Documents Searched POST`, `Documents Searched GET` | -| `pagination.max_limit` | Highest value given for the `limit` parameter in this batch | 60 | `Documents Searched POST`, `Documents Searched GET` | -| `pagination.max_offset` | Highest value given for the `offset` parameter in this batch | 1000 | `Documents Searched POST`, `Documents Searched GET` | +| `pagination.max_limit` | Highest value given for the `limit` parameter in this batch | 60 | `Documents Searched POST`, `Documents Searched GET`, `Documents Fetched GET`, `Documents Fetched POST` | +| `pagination.max_offset` | Highest value given for the `offset` parameter in this batch | 1000 | `Documents Searched POST`, `Documents Searched GET`, `Documents Fetched GET`, `Documents Fetched POST` | | `pagination.most_used_navigation` | Most used search results navigation among all search requests in this batch. `estimated` / `exhaustive` | `estimated` | `Documents Searched POST`, `Documents Searched GET` | | `formatting.max_attributes_to_retrieve` | The maximum number of attributes to retrieve encountered among all requests in this batch. | 100 | `Documents Searched POST`, `Documents Searched GET` | | `formatting.max_attributes_to_highlight` | The maximum number of attributes to highlight encountered among all requests in this batch. | 100 | `Documents Searched POST`, `Documents Searched GET` | @@ -180,8 +183,9 @@ The collected data is sent to [Segment](https://segment.com/). Segment is a plat | `swap_operation_number` | The number of swap operation given in `POST /swap-indexes` API call | 2 | `Indexes Swapped` | | `matching_strategy.most_used_strategy` | Most used word matching strategy among all search requests in this batch | `last` | `Documents Searched POST`, `Documents Searched GET` | | `per_document_id` | `true` if `DELETE /indexes/:indexUid/documents/:documentUid` endpoint was used in this batch, otherwise `false` | false | `Documents Deleted` | -| `clear_all` | `true` if `DELETE /indexes/:indexUid/documents` endpoint was used in this batch, otherwise `false` | false | `Documents Deleted` | | `per_batch` | `true` if `POST /indexes/:indexUid/documents/delete-batch` endpoint was used in this batch, otherwise `false` | false | `Documents Deleted` | +| `per_filter`| `true` if `POST /indexes/:indexUid/documents/delete` endpoint was used in this batch, otherwise `false` | false | `Documents Fetched GET`, `Documents Fetched POST`, `Documents Deleted` | +| `clear_all` | `true` if `DELETE /indexes/:indexUid/documents` endpoint was used in this batch, otherwise `false` | false | `Documents Deleted` | ---- @@ -394,6 +398,31 @@ This property allows us to gather essential information to better understand on | per_document_id | `true` if `DELETE /indexes/:indexUid/documents/:documentUid` endpoint is called in the aggregated event, otherwise `false` | `false` | | clear_all | `true` if `DELETE /indexes/:indexUid/documents` endpoint is called in the aggregated event, otherwise `false` | `false` | | per_batch | `true` if `POST /indexes/:indexUid/documents/delete-batch` endpoint is called in the aggregated event, otherwise `false` | `false` | +| per_filter | `true` if `POST /indexes/:indexUid/documents/delete` endpoint is called in the aggregated event, otherwise `false` | `false` | + +## `Documents Fetched GET` + +> The Documents Fetched GET event is sent once an hour or when a batch reaches the maximum size of `500kb`. + +| Property name | Description | Example | +|---------------|-------------|---------| +| `requests.total_received` | Total number of request received in this batch | 325 | +| `per_document_id` | `true` if `GET /indexes/:indexUid/documents/:doc_id` endpoint was used in this batch, otherwise `false` | false | +| `per_filter` | `true` if `GET /indexes/:indexUid/documents` endpoint was used with a filter in this batch, otherwise `false` | false | +| `pagination.max_limit` | Highest value given for the `limit` parameter in this batch | 60 | +| `pagination.max_offset` | Highest value given for the `offset` parameter in this batch | 1000 | + +## `Documents Fetched POST` + +> The Documents Fetched POST event is sent once an hour or when a batch reaches the maximum size of `500kb`. + +| Property name | Description | Example | +|---------------|-------------|---------| +| `requests.total_received` | Total number of request received in this batch | 325 | +| `per_document_id` | `false` | false | +| `per_filter` | `true` if `POST /indexes/:indexUid/documents/fetch` endpoint was used with a filter in this batch, otherwise `false` | false | +| `pagination.max_limit` | Highest value given for the `limit` parameter in this batch | 60 | +| `pagination.max_offset` | Highest value given for the `offset` parameter in this batch | 1000 | ## `Settings Updated` diff --git a/text/0036-exactness-criterion.md b/text/0036-exactness-criterion.md deleted file mode 100644 index 5cba0f00..00000000 --- a/text/0036-exactness-criterion.md +++ /dev/null @@ -1,139 +0,0 @@ -- Title: Exactness Criterion -- Start Date: 2021-04-22 -- Specification PR: [#36](https://github.com/meilisearch/specifications/pull/36) -- MeiliSearch Tracking-Issues: [milli/#165](https://github.com/meilisearch/milli/issues/165) - -# Exactness Criterion - -## 1. Functional Specification - -### I. Summary - -Exactness Criterion is used within ranking rules to sort results by the similarity of the matched words with the query words. Documents that contain exactly the same terms as the ones queried first are placed first. - -> Ranking rules are built-in rules that ensure relevancy in search results. Ranking rules are applied in a default order which can be changed in the settings. You can add or remove rules and change their order. - -### II. Motivation - -While we continue to advance the new Milli engine towards a version offering the same features as Meilisearch. - -This specification aims to change the behavior of the `exactness` criterion to enhance the relevanvy of search results. - -### III. Additional Materials - -#### TypeSense - -Typesense calculates a `_text_match` score for ranking the documents about text relevance. - -`_text_match` score is computed from different criterias like frequency, edit distance, proximity and, ordering of `query_by` fields. [See more details here](https://typesense.org/docs/0.19.0/guide/ranking-and-relevance.html#text-match-score). - -If documents have the exact same `_text_match` score, TypeSense accepts two more numerical fields in the `sort_by` parameter to break the tie. - -E.g. `sort_by=_text_match:desc,price:desc,rating:desc` - -It is possible to put a numerical field before the `_text_match` score. - -E.g. `sort_by=price:desc,_text_match:desc` - -`sort_by` parameter is set at query time. - -#### Algolia - -Algolia explains the Exact criterion like: - -> Records with words that match query terms exactly are ranked higher. The more matching words in a record’s attribute, the higher a record is ranked. By default, an exact match occurs when a full word in a query is matched without typos to a word in an attribute. An inexact match is one that has typos or only matches on a prefix. - -> Additionally, synonym matching and plural/singular matching are considered exact. Thus, a word is considered an exact match if its synonym matches a query exactly. - -Adding some exceptions by default: - -[...] single-word matches on multi-word attributes aren’t considered exact matches by default [...] -[...] Multiword synonyms are not counted as exact matches by default [...] - -It is possible to specify that multi-words synonyms are considered an exact match using `alternativesAsExact` setting. Algolia also permit to disable some attributes to be used for exact criterion with `disableExactOnAttributes`setting. - -### IV. Explanation - -#### Current MeiliSearch Behavior (0.20) - -For each word of the query we match documents: - -- without typo -- without prefix -- without ngrams -- without multi-words synonyms -- without word split - -> If the query contains only one word, only attribute containing this one word value can match. - -##### Pros & Cons - -This behavior of the Exact criterion gives a better rank to documents that match the words of the original query, ranking ngrams, complex synonyms, and word split after. - -In the specific case of a query containing only 1 word, only 1-word attributes matching will boost documents that have an attribute that exactly contains the query word. - -This is useful when an ID is searched but in others cases it could impact the relevancy of search results. - -Moreover, when users search with a multi-words query, attributes that exactly contains the query are not boosted. - -Because MeiliSearch is a search as you type engine, attributes that exactly begin by the original query should also be boosted. - -#### Milli Behavior (0.21) - -We will divide Exactness into three layers of ranking. - -##### Layer 1: Try to match exactly the query in an attribute - -Any document that has an attribute that contains exactly the query: - -- without typo -- without prefix -- without ngrams -- without multi-words synonyms -- without word split -- with words in the right order - -has a rank of `0` (perfect match) - -##### Layer 2: Try to match exactly the query at the start of an attribute - -Any document that has an attribute that contains the query at the n firsts positions: - -- without typo -- without prefix -- without ngrams -- without multi-words synonyms -- without word split -- with words in the right order - -has a rank of `1` (the user didn't finish to type the search but the final query could possibly be an exact match) - -##### Layer 3: Try to match exactly the word of the query anywhere in the document - -Any other document has a rank of `2 + 1` for each word that is not an exact match. - -A word is considered as an exact match when it matches: - -- without typo -- without prefix -- without ngrams -- without multi-words synonyms -- without word split - -> Given theses layered operations, the higher the rank of a document is, the less relevant it is. - -### V. Impact on Documentation - -N/A - -### VI. Impact on SDKs -N/A - -## 2. Technical Aspects -N/A - -## 3. Future Possibilities - -- In the second layer, we can rank `Any document that has an attribute that contains exactly the query` with the last word considered as prefix. -- Give the user the possibility to deactivate some fields for the Exactness criterion. -- Rename the Exactness criterion. diff --git a/text/0055-sort.md b/text/0055-sort.md index 6a0e3703..026859c8 100644 --- a/text/0055-sort.md +++ b/text/0055-sort.md @@ -177,7 +177,7 @@ Request body ```json { - "message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.", + "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", "errorCode": "invalid_sort", "errorType": "invalid_request_error", "errorLink": "https://docs.meilisearch.com/errors#invalid_sort" diff --git a/text/0060-tasks-api.md b/text/0060-tasks-api.md index 8d5704bf..cb1b3b7e 100644 --- a/text/0060-tasks-api.md +++ b/text/0060-tasks-api.md @@ -4,7 +4,7 @@ ### I. Summary -This specification describes the API endpoints for handling asynchronous tasks. +This specification describes the behavior of the task queue and the API endpoints for handling asynchronous operations. ### II. Motivation @@ -103,6 +103,7 @@ List of global tasks by `type`: | name | description | |---------------------|--------------------------------------| | providedIds | Number of provided document ids. | +| originalFilter | The filter used to delete documents. `null` if `filter` was not used for the deletion request. | | deletedDocuments | Number of documents finally deleted. | ##### indexCreation @@ -1008,10 +1009,15 @@ The task types are listed in decreasing order of priority: 4. `dumpCreation` 5. All other task types with by their enqueued at order. +### 2.2. Auto deletion of tasks + +Since Meilisearch can't store tasks forever, at some point, it needs to free up some space in its queue. The engine will try to delete the 100k last finished tasks upon reaching 1M total tasks stored. + +That means after a batch finishes processing and right before processing the following enqueued tasks, Meilisearch will check the number of tasks currently written in its queue. If this number is more than 1M, the engine will enqueue a new task that automatically deletes the last 100k **finished** tasks. That means if there are only 2k finished tasks, only these ones will be deleted. And if all the tasks in the queue are still enqueued, then nothing will be deleted, and the engine will continue to process the next enqueued tasks. + ## 3. Future Possibilities - Use Hateoas capability to give direct access to a `task` resource. - Add dedicated task type names modifying a sub-setting. e.g. `SearchableAttributesUpdate`. -- Add an archived state for old `tasks`. - Add the `API Key` identity that added a `task`. - Make dump import visible as a task. diff --git a/text/0061-error-format-and-definitions.md b/text/0061-error-format-and-definitions.md index 63b5c767..2d1abf03 100644 --- a/text/0061-error-format-and-definitions.md +++ b/text/0061-error-format-and-definitions.md @@ -878,6 +878,54 @@ HTTP Code: `400 Bad Request` --- +## invalid_document_filter + +`Synchronous` / `Asynchronous` + +### Context + +This error occurs if a value with a different type than `String`, `Array of String` or `Array of array of String` for `filter` is specified. + +### Error Definition + +HTTP Code: `400 Bad Request` + +```json +{ + "message": "`:deserr_helper`", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" +} +``` + +--- + +## missing_document_filter + +`Synchronous` + +### Context + +This error happens when `filter` is missing from a delete documents by filter operation. + +### Error Definition + +In the first case: + +HTTP Code: `400 Bad Request` + +```json +{ + "message": "`filter` field is mandatory.", + "code": "missing_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_document_filter" +} +``` + +--- + ## invalid_document_limit `Synchronous` @@ -1829,7 +1877,7 @@ HTTP Code: `413 Payload Too Large` ```json { - "message": "The provided payload reached the size limit.", + "message": "The provided payload reached the size limit. The maximum accepted payload size is :playloadSizeLimit.", "code": "payload_too_large", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#payload_too_large" diff --git a/text/0085-api-keys.md b/text/0085-api-keys.md index 7fd5293b..146c9712 100644 --- a/text/0085-api-keys.md +++ b/text/0085-api-keys.md @@ -315,8 +315,8 @@ Create an API key. |------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | search | Provides access to `GET` and `POST` methods on `/indexes/:authorizedIndexes/search` routes. | | documents.add | Provides access to `POST` and `PUT` on `/indexes/:authorizedIndexes/documents` routes. | -| documents.get | Provides access to `GET` methods on `/indexes/:authorizedIndexes/documents` and `/indexes/:authorizedIndexes/documents/:documentId` routes. | -| documents.delete | Provides access to `DELETE` method on `/indexes/:authorizedIndexes/documents/:documentId`, `indexes/:authorizedIndexes/documents/:documentId` and `POST` method on `/indexes/:authorizedIndexes/documents/delete-batch` routes. | +| documents.get | Provides access to `GET` methods on `/indexes/:authorizedIndexes/documents`, `/indexes/:authorizedIndexes/documents/:documentId` and `POST` methods on `/indexes/:authorizedIndexes/documents/fetch` routes. | +| documents.delete | Provides access to `DELETE` method on `/indexes/:authorizedIndexes/documents/:documentId`, `indexes/:authorizedIndexes/documents/:documentId` and `POST` method on `/indexes/:authorizedIndexes/documents/delete-batch` and `/indexes/:authorizedIndexes/documents/delete` routes. | | indexes.create | Provides access to `POST` `/indexes`. **⚠️ `indexes` field should indicate the newly created index or having `[*]` to permits access on it.**. | | indexes.get | Provides access to `GET` `/indexes` and `/indexes/:authorizedIndexes`. **⚠️Non-authorized `indexes` are omitted from the response on `/indexes`**. | | indexes.update | Provides access to `PUT` `/indexes/:authorizedIndexes`. | diff --git a/text/0118-search-api.md b/text/0118-search-api.md index f9a4707e..0b3ca2c5 100644 --- a/text/0118-search-api.md +++ b/text/0118-search-api.md @@ -156,9 +156,15 @@ The grammar for the value of a filterable attribute is the same as the grammar f - Exists: * `attribute EXISTS` * `attribute NOT EXISTS` -- In: +- IN: * `attribute IN[value, value, etc.]` * `attribute NOT IN[value, value, etc.]` +- IS EMPTY: + * `attribute IS EMPTY` + * `attribute IS NOT EMPTY` +- IS NULL: + * `attribute IS NULL` + * `attribute IS NOT NULL` - AND: `filter AND filter` - OR: `filter OR filter` - NOT: `NOT filter` @@ -298,7 +304,7 @@ size = 0 OR NOT size = 2 -> selects [0,1] NOT (NOT size = 0) -> selects [0] ``` -###### 3.1.2.1.10 Exists +###### 3.1.2.1.10 EXISTS The `EXISTS` operator selects the documents for which the filterable attribute exists, even if its value is `null` or an empty array. It is a postfix operator that takes an attribute name as argument. @@ -325,7 +331,7 @@ For example, with the documents: ``` Then the filter `colour EXISTS` selects the document ids `[0,1]` while the filter `colour NOT EXISTS` or `NOT colour EXISTS` selects the document ids `[2]`. -###### 3.1.2.1.11 In +###### 3.1.2.1.11 IN The `IN[..]` operator is a more concise way to combine equality operators. It is a postfix operator that takes an attribute name on the left hand side and an array of values on the right hand side. An array of value is a comma-separated list of values delimited by square brackets. @@ -354,6 +360,82 @@ attribute != value1 AND attribute != value2 AND ... - The `_geoRadius` operator selects the documents whose geographical coordinates fall within a certain range of a given coordinate. See [GeoSearch](0059-geo-search.md) for more information. - The `_geoBoundingBox` operator selects the documents whose geographical coordinates fall within a square described by the given coordinates. See [GeoSearch](0059-geo-search.md) for more information. +###### 3.1.2.1.12 IS EMPTY + +The `IS EMPTY` operator selects the documents for which the filterable attribute exists and is empty. If the attribute doesn't exists then it is not empty and the document will not be returned. It is a postfix operator that takes an attribute name as argument. + +The negated form of `IS EMPTY` can be written in two ways: +``` +attribute IS NOT EMPTY +NOT attribute IS EMPTY +``` +Both forms are equivalent. They select the documents for which the attribute is not empty. + +Here is the list of JSON values that are considered empty: + - `""` + - `[]` + - `{}` + +For example, with the documents: +```json +[{ + "id": 0, + "colour": [] +}, +{ + "id": 1, + "colour": null +}, +{ + "id": 2, + "colour": "" +}, +{ + "id": 3, + "colour": {} +}, +{ + "id": 4 +}] +``` +Then the filter `colour IS EMPTY` selects the document ids `[0,2,3]` while the filter `colour IS NOT EMPTY` or `NOT colour IS EMPTY` selects the document ids `[1,4]`. + +###### 3.1.2.1.13 IS NULL + +The `IS NULL` operator selects the documents for which the filterable attribute exists and is `null`. If the attribute doesn't exists then it is not `null` and the document will not be returned. It is a postfix operator that takes an attribute name as argument. + +The negated form of `IS NULL` can be written in two ways: +``` +attribute IS NOT NULL +NOT attribute IS NULL +``` +Both forms are equivalent. They select the documents for which the attribute is not `null`. + +For example, with the documents: +```json +[{ + "id": 0, + "colour": [] +}, +{ + "id": 1, + "colour": null +}, +{ + "id": 2, + "colour": "" +}, +{ + "id": 3, + "colour": {} +}, +{ + "id": 4 +}] +``` +Then the filter `colour IS NULL` selects the document ids `[1]` while the filter `colour IS NOT NULL` or `NOT colour IS NULL` selects the document ids `[0,2,3,4]`. + + ##### 3.1.2.2. Array Syntax The array syntax is an alternative way to combine different filters with `OR` and `AND` operators. diff --git a/text/0119-instance-options.md b/text/0119-instance-options.md index 2dab22fe..069c8fb7 100644 --- a/text/0119-instance-options.md +++ b/text/0119-instance-options.md @@ -118,6 +118,7 @@ The expected behavior of each flag is described in the list above. - [SSL tickets](#3325-ssl-tickets) - [Config file path](#3326-config-file-path) - [Experimental enable Metrics](#3327-experimental-enable-metrics) +- [Experimental reduce indexing memory usage](#3328-experimental-reduce-indexing-memory-usage) #### 3.3.1. Database path @@ -487,6 +488,16 @@ See [Configuration File](0185-configuration-file.md) specification details. Activate the `/metrics` endpoint to collect Meilisearch metrics for monitoring purposes. See [0174-metrics-api.md](0174-metrics-api.md). +#### 3.3.28. Experimental Reduce Indexing Memory Usage + +**Environment variable**: `MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE` +**CLI option**: `--experimental-reduce-indexing-memory-usage` +**Default**: Disabled + +⚠️ This command-line option does not take any values. Assigning a value will throw an error. + +Enables the `MDB_WRITEMAP` option of LMDB, making the internal key-value store use much less RAM than usual. + ## 4. Technical Aspects N/A diff --git a/text/0124-documents-api.md b/text/0124-documents-api.md index 729c2f99..6bab2579 100644 --- a/text/0124-documents-api.md +++ b/text/0124-documents-api.md @@ -20,7 +20,7 @@ Documents are stored inside indexes. Manipulate documents of a Meilisearch index. -- [3.1.1. `GET` - `indexes/:index_uid/documents`](#311-get---indexesindexuiddocuments) +- [3.1.1. (Fetch Documents) - `GET` - `indexes/:index_uid/documents` and `POST` - `indexes/:index_uid/documents/fetch](#311-fetch-documents-get-indexesindexuiddocuments-and-post-indexesindexuiddocumentsfetch) - [3.1.2. `GET` - `indexes/:index_uid/documents/:document_id`](#312-get---indexesindexuiddocumentsdocumentid) - [3.1.3. `POST` - `indexes/:index_uid/documents`](#313-post---indexesindexuiddocuments) - [3.1.4. `PUT` - `indexes/:index_uid/documents`](#314-put---indexesindexuiddocuments) @@ -28,13 +28,17 @@ Manipulate documents of a Meilisearch index. - [3.1.6. `DELETE` - `indexes/:index_uid/documents/:document_id`](#316-delete---indexesindexuiddocumentsdocumentid) - [3.1.7. `POST` - `indexes/:index_uid/documents/delete-batch`](#317-post---indexesindexuiddocumentsdelete-batch) -#### 3.1.1. `GET` - `indexes/:index_uid/documents` +#### 3.1.1. (Fetch Documents) - `GET` - `indexes/:index_uid/documents` and `POST` - `indexes/:index_uid/documents/fetch + +Meilisearch exposes 2 routes to get the documents: +- GET `indexes/:index_uid/documents`, which gets its parameters as query parameters. +- POST `indexes/:index_uid/documents/fetch`, which gets its parameters in a JSON payload. List all documents of a Meilisearch index. The query parameters `offset` and `limit` permit browsing through all documents of an index. -Meilisearch orders documents depending on the hash of their id. +Meilisearch orders documents depending on the order they were inserted in the db. ##### 3.1.1.1. Path Parameters @@ -49,13 +53,20 @@ Meilisearch orders documents depending on the hash of their id. Unique identifier of an index. -##### 3.1.1.2. Query Parameters +##### 3.1.1.2. Parameters -| Field | Type | Required | -|--------------------------|--------------------------|----------| -| `offset` | Integer / `null` | false | -| `limit` | String / `null` | false | -| `fields` | String / `null` | false | +The following parameters need to be send as: +- Query parameter for the `GET` - `indexes/:index_uid/documents` route. +- JSON body for the `POST `indexes/:index_uid/documents/fetch` route. + +| Field | Type | Required | +|--------------------------|---------------------------|----------| +| `offset` | Integer | false | +| `limit` | Integer | false | +| `fields` | Array of Strings / `null` | false | +| `filter` | filter / `null` | false | + +In the case of the query parameter, as always, the `filter` can only be a string, while it can be either a string, an array of strings, or an array of array of strings for the JSON body. ###### 3.1.1.2.1. `offset` @@ -75,7 +86,7 @@ Sets the maximum number of documents to be returned by the current request. ###### 3.1.1.2.3. `fields` -- Type: String +- Type: Array of Strings - Required: False - Default: `*` @@ -92,6 +103,17 @@ If `fields` is not specified, all attributes from the documents are returned in > The index setting `displayedAttributes` has no impact on this endpoint. +###### 3.1.1.2.4. `filter` + +- Type: String | Array of array of Strings +- Required: False +- Default: null + +Refine the results by selecting documents that match the given filter. +In the case of the POST route, it is possible to send the filter in the form of an array of array of strings akin to the search route. + +Attributes used as filter criteria must be added to the `filterableAttributes` list of an index settings. See [Filterable Attributes Setting API](0123-filterable-attributes-setting-api.md). + ##### 3.1.1.3. Response Definition A `results` array representing documents as JSON objects. @@ -161,6 +183,7 @@ Gives the total number of documents that can be browsed in the related index. - 🔴 Sending a value with a different type than `Integer` or `null` for `offset` will return a [invalid_document_offset](0061-error-format-and-definitions.md#invalid_document_offset) error. - 🔴 Sending a value with a different type than `Integer` or `null` for `limit` will return a [invalid_document_limit](0061-error-format-and-definitions.md#invalid_document_limit) error. - 🔴 Sending a value with a different type than `String` or `null` for `fields` will return a [invalid_document_fields](0061-error-format-and-definitions.md#invalid_document_fields) error. +- 🔴 Sending a value with a different type than `String`, `Array of strings`, `Array of array of strings` or `null` for `filter` will return a [invalid_document_filter](0061-error-format-and-definitions.md#invalid_document_filter) error. #### 3.1.2. `GET` - `indexes/:index_uid/documents/:document_id` @@ -252,7 +275,7 @@ This endpoint accepts various content-type: - [`application/json`](0135-indexing-json.md) - [`text/csv`](0028-indexing-csv.md) -- [`application/x-ndjson`](0028-indexing-ndjson.md) +- [`application/x-ndjson`](0029-indexing-ndjson.md) ##### 3.1.3.1. Path Parameters @@ -326,7 +349,7 @@ This endpoint accepts various content-type: - [`application/json`](0135-indexing-json.md) - [`text/csv`](0028-indexing-csv.md) -- [`application/x-ndjson`](0028-indexing-ndjson.md) +- [`application/x-ndjson`](0029-indexing-ndjson.md) ##### 3.1.4.1. Path Parameters @@ -496,7 +519,7 @@ e.g. ##### 3.1.7.3. Response Definition -When the request is successful, Meilisearch returns the HTTP code `202 Accepted`. The response's content is the summarized representation of the received asynchronous task. +When the request is successful, Meilisearch returns the HTTP code `202 Accepted`. The response's content is the summarized representation of the received asynchronous task with the type `documentDeletion`. See [Summarized `task` Object for `202 Accepted`](0060-tasks-api.md#summarized-task-object-for-202-accepted). @@ -514,11 +537,64 @@ See [Summarized `task` Object for `202 Accepted`](0060-tasks-api.md#summarized-t - 🔴 If the requested `index_uid` does not exist, the API returns an [index_not_found](0061-error-format-and-definitions.md#index_not_found) error. -#### 3.1.8. General Errors +#### 3.1.8. `POST` - `indexes/:index_uid/documents/delete` + +Delete a selection of documents based on a filter. + +##### 3.1.8.1. Path Parameters + +| Field | Type | Required | +|--------------------------|--------------------------|----------| +| `index_uid` | String | True | + +###### 3.1.8.1.1. `index_uid` + +- Type: String +- Required: True + +Unique identifier of an index. + +##### 3.1.8.2 Request Payload Definition + +A filter. + +- Type: String or array of array of strings +- Required: True + +e.g. + +```json +{ + "filter": "doggo = 'bernese mountain'" +} +``` + +##### 3.1.8.3. Response Definition + +When the request is successful, Meilisearch returns the HTTP code `202 Accepted`. The response's content is the summarized representation of the received asynchronous task with the type `documentDeletion`. + +See [Summarized `task` Object for `202 Accepted`](0060-tasks-api.md#summarized-task-object-for-202-accepted). + +##### 3.1.8.4. Errors + +- 🔴 Omitting Content-Type header returns a [missing_content_type](0061-error-format-and-definitions.md#missing_content_type) error. +- 🔴 Sending an empty Content-Type returns an [invalid_content_type](0061-error-format-and-definitions.md#invalid_content_type) error. +- 🔴 Sending a different Content-Type than `application/json` returns an [invalid_content_type](0061-error-format-and-definitions.md#invalid_content_type) error. +- 🔴 Sending an empty payload returns a [missing_payload](0061-error-format-and-definitions.md#missing_payload) error. +- 🔴 Sending an invalid payload returns a [malformed_payload](0061-error-format-and-definitions.md#malformed_payload) error. +- 🔴 Sending an invalid index uid format for the `:index_uid` path parameter returns an [invalid_index_uid](0061-error-format-and-definitions.md#invalid_index_uid) error. +- 🔴 Sending a value without a filter will return a [missing_document_filter](0061-error-format-and-definitions.md#missing_document_filter) error. +- 🔴 Sending a value with an invalid or empty filter will return an [invalid_document_filter](0061-error-format-and-definitions.md#invalid_document_filter) error. + +###### 3.1.8.4.1 Async Errors + +- 🔴 If the requested `index_uid` does not exist, the API returns an [index_not_found](0061-error-format-and-definitions.md#index_not_found) error. + +#### 3.1.9. General Errors These errors apply to all endpoints described here. -##### 3.1.8.1 Auth Errors +##### 3.1.9.1 Auth Errors The auth layer can return the following errors if Meilisearch is secured (a master-key is defined).