diff --git a/modules/n1ql/pages/n1ql-intro/sysinfo.adoc b/modules/n1ql/pages/n1ql-intro/sysinfo.adoc index db3fb4399..0d9a2a443 100644 --- a/modules/n1ql/pages/n1ql-intro/sysinfo.adoc +++ b/modules/n1ql/pages/n1ql-intro/sysinfo.adoc @@ -3,6 +3,31 @@ :description: {sqlpp} has a system namespace that stores metadata about data containers, the Query service, and the system as a whole. \ You can query the system namespace to get this information. +// Pass through HTML table styles for this page + +ifdef::basebackend-html[] +++++ + +++++ +endif::[] + [abstract] {description} @@ -29,6 +54,7 @@ xref:n1ql:n1ql-manage/monitoring-n1ql-query.adoc#vitals[system:vitals] xref:n1ql:n1ql-manage/monitoring-n1ql-query.adoc#sys-active-req[system:active_requests] xref:n1ql:n1ql-manage/monitoring-n1ql-query.adoc#sys-prepared[system:prepareds] xref:n1ql:n1ql-manage/monitoring-n1ql-query.adoc#sys-completed-req[system:completed_requests] +xref:n1ql:n1ql-manage/monitoring-n1ql-query.adoc#sys-history[system:completed_requests_history] a| [%hardbreaks] <> diff --git a/modules/n1ql/pages/n1ql-language-reference/metafun.adoc b/modules/n1ql/pages/n1ql-language-reference/metafun.adoc index 1bb2002a4..d1d88c04d 100644 --- a/modules/n1ql/pages/n1ql-language-reference/metafun.adoc +++ b/modules/n1ql/pages/n1ql-language-reference/metafun.adoc @@ -331,6 +331,265 @@ SELECT DS_VERSION() as server_version; ---- ==== +[[finderr,FINDERR()]] +== FINDERR(`expression`) + +ifeval::['{page-component-version}' == '7.6'] +[.status]#Couchbase Server 7.6.4# +endif::[] + +=== Description + +Returns the full details of any Query service or cbq shell error. + +=== Arguments + +expression:: One of the following: ++ +-- +* A number representing an error code. +In this case, the function returns the full details of the error matching the error code. + +* A string. +In this case, the function searches for the target string in all of the error message fields except for `user_error`, and returns the full details of any errors that match the string. + +* A regular expression. +In this case, the function searches for the regular expression in all of the error message fields except for `user_error`, and returns the full details of any errors that match the pattern. +-- + +=== Return Value + +The return value is an array of one or more objects, each of which contains the details of an error that matches the find expression. + +For each error, the function returns the following fields. + +[options="header", cols="~a,~a,~a"] +|=== +|Name|Description|Schema + +|**applies_to** + +__required__ +|One of the following: + +* `cbq-shell`: The error applies to the cbq shell. +* `Server`: The error applies to the server. +|enum (cbq-shell, Server) + +|**code** + +__required__ +|A number representing the error. +|Integer + +|**description** + +__required__ +|Message describing why the error occurred. +|String + +|**reason** + +__optional__ +|List of possible causes of the error. +|String array + +|**user_action** + +__optional__ +|List of possible steps a user can take to mitigate the error. +|String array + +|**user_error** + +__optional__ +|One of the following: + +* `Yes`: The error was caused by the user. +* `No`: The error was caused by other services, or was internal to the server. +* `Maybe`: A combination of both. +|enum (Yes, No, Maybe) +|=== + +NOTE: The error details also include a `symbol` field, which contains a representation string for the error. +This field is for internal use only, and is not shown in the results. +However, the FINDERR function does search this field when the find expression is a string or a regular expression. + +=== Examples + +[[finderr-ex1,FINDERR() Example 1]] +.Find error details by code number +==== +.Query +[source,sqlpp] +---- +SELECT FINDERR(5011); +---- + +.Results +[source,json] +---- +[ + { + "$1": [ + { + "applies_to": "Server", + "code": 5011, + "description": "Abort: «reason»", + "reason": [ + [ + "The SQL++ abort() function was called in the statement.", + "e.g. SELECT abort('An example cause')" + ] + ], + "user_error": "Yes" + } + ] + } +] +---- +==== + +[[finderr-ex2,FINDERR() Example 2]] +.Find error details by matching a string +==== +.Query +[source,sqlpp] +---- +SELECT FINDERR("A semantic error is present in the statement."); +---- + +.Results +[source,json] +---- +[ + { + "$1": [ + { + "applies_to": "Server", + "code": 3100, + "description": "A semantic error is present in the statement.", + "reason": [ + "The statement includes portions that violate semantic constraints." + ], + "user_action": [ + "The cause will contain more detail on the violation; revise the statement and re-submit." + ], + "user_error": "Yes" + } + ] + } +] +---- +==== + +[[finderr-ex3,FINDERR() Example 3]] +.Find multiple error details by matching a string +==== +.Query +[source,sqlpp] +---- +SELECT FINDERR("semantic"); +---- + +.Results +[source,json] +---- +[ + { + "$1": [ + { + "applies_to": "Server", + "code": 3100, + "description": "A semantic error is present in the statement.", + "reason": [ + "The statement includes portions that violate semantic constraints." + ], + "user_action": [ + "The cause will contain more detail on the violation; revise the statement and re-submit." + ], + "user_error": "Yes" + }, + { + "applies_to": "Server", + "code": 3220, + "description": "«name» window function «clause» «reason»", + "reason": [ + "A violation of the window function semantic restrictions was present in the statement." + ], + "user_action": [ + "Revise the statement to remove the violation." + ], + "user_error": "Yes" + }, + { + "applies_to": "Server", + "code": 3300, + "description": "recursive_with semantics: «cause»", + "reason": [ + "The statement specifies restricted syntax in a recursive common table expression definition." + ], + "user_action": [ + "Revise the statement removing the restricted syntax." + ], + "user_error": "Yes" + } + ] + } +] +---- +==== + +[[finderr-ex4,FINDERR() Example 4]] +.Find multiple error details by matching a regular expression +==== +.Query +[source,sqlpp] +---- +SELECT FINDERR("[IU][NP]SERT"); +---- + +.Results +[source,json] +---- +[ + { + "$1": [ + { + "applies_to": "Server", + "code": 3150, + "description": "MERGE with ON KEY clause cannot have document key specification in INSERT action.", + "reason": [ + [ + "A lookup merge statement specified a document key.", + "e.g. MERGE INTO default USING [{},{}] AS source ON KEY 'aaa' WHEN NOT MATCHED THEN INSERT ('key',{})" + ] + ], + "user_action": [ + "Refer to the documentation for lookup merge statements." + ], + "user_error": "Yes" + }, +// ... + { + "applies_to": "Server", + "code": 5072, + "description": "No UPSERT key for «value»", + "user_action": [ + "Contact support." + ] + }, +// ... + { + "applies_to": "Server", + "code": 15005, + "description": "No keys to insert «details»" + } + ] + } +] +---- +==== + +=== See Also + +* The xref:cli:finderr.adoc[finderr] command line tool +* xref:n1ql:n1ql-language-reference/n1ql-error-codes.adoc[] + [[flatten_keys,FLATTEN_KEYS()]] == FLATTEN_KEYS(`expr1` [ `modifiers` ], `expr2` [ `modifiers` ], ...) diff --git a/modules/n1ql/pages/n1ql-language-reference/sequenceops.adoc b/modules/n1ql/pages/n1ql-language-reference/sequenceops.adoc index fe8ae6de1..1066bc98b 100644 --- a/modules/n1ql/pages/n1ql-language-reference/sequenceops.adoc +++ b/modules/n1ql/pages/n1ql-language-reference/sequenceops.adoc @@ -188,7 +188,7 @@ INSERT INTO bookings ==== [[ex-nextval-key]] -.Insert a sequential value in a document key +.Insert a sequential value in a document key and body ==== The following statement uses the `ordNum` sequence to generate the document key and a booking number within the body of the document. @@ -201,6 +201,12 @@ INSERT INTO bookings RETURNING META().id, *; ---- +This query gives different results, depending on the version of Couchbase Server. + +''' + +[.status]##Couchbase Server 7.6–7.6.3## + .Results [source,json] ---- @@ -215,12 +221,33 @@ INSERT INTO bookings ] ---- -Since the key is not part of the document, the query has incremented the sequence twice. -This gives a different sequence number for the document key and the document value, which may not be what you want. +In versions of Couchbase Server prior to 7.6.4, the key is not regarded as part of the document, so this query increments the sequence twice. +This gives a different sequence number for the document key and the document value. + +''' + +[.status]#Couchbase Server 7.6.4# + +.Results +[source,json] +---- +[ + { + "id": "1001", + "bookings": { + "num": 1001, + "user": 1 + } + } +] +---- + +In Couchbase Server 7.6.4 and later, the entire VALUES clause (key, value, and options) is regarded as a single document, so the query only increments the sequence once. +This gives the same sequence number in the document key and the document value. ==== [[ex-nextval-same]] -.Insert the same sequential value in a document key and body +.Insert a sequential value with INSERT SELECT ==== The following statement uses an INSERT SELECT statement. With this query, the document key and document value are both generated within the same document. diff --git a/modules/n1ql/pages/n1ql-manage/monitoring-n1ql-query.adoc b/modules/n1ql/pages/n1ql-manage/monitoring-n1ql-query.adoc index 6832258c9..b0f29e5fd 100644 --- a/modules/n1ql/pages/n1ql-manage/monitoring-n1ql-query.adoc +++ b/modules/n1ql/pages/n1ql-manage/monitoring-n1ql-query.adoc @@ -52,7 +52,7 @@ This information can be very useful to assess the current workload and performan [#sys-vitals-get] === Get System Vitals -To view system vitals, use the Admin REST API or a {sqlpp} query. +To view system vitals, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -126,7 +126,7 @@ The `system:active_requests` catalog lists all currently executing active reques [[sys-active-get]] === Get Active Requests -To view active requests, use the Admin REST API or a {sqlpp} query. +To view active requests, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -173,7 +173,7 @@ SELECT *, meta().plan FROM system:active_requests; [[sys-active-delete]] === Terminate an Active Request -The DELETE command can be used to terminate an active request, for instance, a non-responding or a long-running query. +To terminate an active request, for instance, a non-responding or a long-running query, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -272,7 +272,7 @@ When there are multiple prepared statements with the same name in different quer [[sys-prepared-get]] === Get Prepared Statements -To get a list of all known prepared statements, use the Admin REST API or a {sqlpp} query. +To get a list of all known prepared statements, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -346,7 +346,7 @@ SELECT *, meta().plan FROM system:prepareds; [[sys-prepared-delete]] === Delete Prepared Statements -To delete a specific prepared statement, use the Admin REST API or a {sqlpp} query. +To delete a specific prepared statement, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -492,7 +492,7 @@ This information provides a general insight into the health and performance of t [[sys-completed-get]] === Get Completed Requests -To get a list of all logged completed requests, use the Admin REST API or a {sqlpp} query. +To get a list of all logged completed requests, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -559,7 +559,7 @@ SELECT *, meta().plan FROM system:completed_requests; [[sys-completed-delete]] === Purge the Completed Requests -To purge a specific completed request, use the Admin REST API or a {sqlpp} query. +To purge a specific completed request, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] or a {sqlpp} query. [tabs] ==== @@ -690,9 +690,9 @@ For field names and meanings, see xref:n1ql-rest-admin:index.adoc#Requests[Reque For query plan field names and meanings, see <>. [[sys-completed-config]] -== Configure the Completed Requests +== Configure Completed Requests -You can configure the `system:completed_requests` keyspace by specifying parameters through the Admin API `/admin/settings` endpoint. +You can configure the `system:completed_requests` keyspace by specifying parameters through the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] `/admin/settings` endpoint. You can specify the conditions for completed request logging using the `completed` field. This field takes a JSON object containing the names and values of logging qualifiers. @@ -819,6 +819,82 @@ curl $BASE_URL/admin/settings -u $USER:$PASSWORD \ -d '{"completed-limit":1000}' ---- +[[sys-history]] +== Stream Completed Requests + +[.status]#Couchbase Server 7.6.4# + +In Couchbase Server 7.6.4 and later, you can stream completed requests to disk. + +To enable completed request streaming, use the xref:n1ql:n1ql-rest-api/admin.adoc[Admin REST API] `/admin/settings` endpoint to specify the `completed_stream_size` property. + +[source,sh] +---- +curl $BASE_URL/admin/settings -u $USER:$PASSWORD \ + -H 'Content-Type: application/json' \ + -d '{"completed_stream_size":500}' +---- + +This property is a file size in MiB. +When set to `0` (the default), completed request streaming is disabled. + +When set to any size greater than `0`, completed requests are streamed to archive files. +The value of this property determines the size of the data to retain, per node. +The configuration for completed requests determines which requests are saved. + +NOTE: The additional processing required to save completed requests to disk may limit overall request throughput on a Query node, but typically only when every completed request is being recorded, and requests are very small or short-lived. +The speed of the file system on which the server logs directory resides naturally affects the potential impact too. + +[#sys-history-files] +=== Archived Request Files + +When streaming is enabled, completed requests are saved to GZIP archives with the prefix `local_request_log` in the Couchbase Server `logs` directory. +Each saved GZIP archive file contains multiple JSON entries, one for each for each recorded completed request. + +Couchbase Server writes multiple archive files in parallel, so whilst the order of requests in a file is sequential, a single given file may not contain a contiguous sequence of requests. + +When an archive file reaches or exceeds 100 MiB, it is finalized and saved to disk. +This is not a hard limit -- entries are not truncated to adhere to it. +Files may also be finalized with less content, if nothing has been written to them for an extended period. +Files that are actively being written are not available for reading, and they don't count towards the configured size limit until they're finalized. + +Couchbase Server tries to manage and retain archive files such that the total disk space used by the files is within the specified limit for the node. +When the specified limit is reached, older files are removed as necessary to make space for newly finalized files. +When a file is removed, it isn't guaranteed that only the oldest requests are evicted, given that Couchbase Server writes to multiple archive files in parallel. + +[#sys-history-view] +=== View Archived Requests + +To view archived completed requests, use https://www.gnu.org/software/gzip[gzip] and https://jqlang.github.io/jq[jq] on the command line, or a {sqlpp} query. + +[tabs,sync-group-id="REST API|{sqlpp}"] +==== +Command Line:: ++ +-- +To view all archived completed requests in `$FILE`: + +[source,sh] +---- +gzip -qdc $FILE | jq . +---- +-- + +{sqlpp}:: ++ +-- +To get a list of archived completed requests using {sqlpp}: + +[source,sqlpp] +---- +SELECT * FROM system:completed_requests_history; +---- +-- +==== + +The `system:completed_requests_history` keyspace is provided for {sqlpp} access to the archived files, but as they are external GZIP archives performance is restricted, particularly with large histories on clusters with multiple Query service nodes. +Directly reading the files may be more useful in some cases. + [#query-monitoring-settings] == Query Profiling diff --git a/modules/search/examples/create-search-index-response.json b/modules/search/examples/create-search-index-response.json new file mode 100644 index 000000000..f927a0c34 --- /dev/null +++ b/modules/search/examples/create-search-index-response.json @@ -0,0 +1,5 @@ +{ + "status": "ok", + "name": "travel-sample.inventory.landmark-content-index", + "uuid": "49563a96ea6d3686" +} \ No newline at end of file diff --git a/modules/search/examples/geojson-search-response.json b/modules/search/examples/geojson-search-response.json new file mode 100644 index 000000000..9a1d15c05 --- /dev/null +++ b/modules/search/examples/geojson-search-response.json @@ -0,0 +1,104 @@ +{ + "status": { + "total": 1, + "failed": 0, + "successful": 1 + }, + "hits": [ + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_40010", + "score": 0.1332053777355554, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_40011", + "score": 0.1332053777355554, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_554", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_11323", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_37316", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_581", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_15903", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_570", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_566", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_642c3761fc0a2c73_4c1c5584", + "id": "landmark_22565", + "score": 0.016920542554627847, + "sort": [ + "_score" + ], + "partial_match": true + } + ], + "total_hits": 257, + "cost": 35339, + "max_score": 0.1332053777355554, + "took": 10019436, + "facets": null +} \ No newline at end of file diff --git a/modules/search/examples/geospatial-search-query-geojson.sh b/modules/search/examples/geospatial-search-query-geojson.sh index d7d89c83f..05a623ab0 100644 --- a/modules/search/examples/geospatial-search-query-geojson.sh +++ b/modules/search/examples/geospatial-search-query-geojson.sh @@ -5,33 +5,45 @@ curl -s -XPUT -H "Content-Type: application/json" \ "field": "geojson", "geometry": { "shape": { - "type": "Polygon", "coordinates": [ + [ [ + -3.272607322511618, + 53.94443025530833 + ], [ - 0.47482593026924746, - 51.31232878073189 + -3.369506040138134, + 53.2576036482846 ], [ - 0.6143265647863245, - 51.31232878073189 + -1.531900030030954, + 53.352538254565076 ], [ - 0.6143265647863245, - 51.384000374770466 + -0.08209172686298416, + 53.568703110993994 ], [ - 0.47482593026924746, - 51.384000374770466 + -0.4648577685729265, + 53.86797332814126 ], [ - 0.47482593026924746, - 51.31232878073189 - ] + -1.612712602375666, + 54.022352820673774 + ], + [ + -2.2803785770867933, + 54.05470383755585 + ], + [ + -3.272607322511618, + 53.94443025530833 ] ] + ], + "type": "Polygon" }, "relation": "within" } } - } \ No newline at end of file + }' \ No newline at end of file diff --git a/modules/search/examples/geospatial-search-response.json b/modules/search/examples/geospatial-search-response.json new file mode 100644 index 000000000..3f5bfd254 --- /dev/null +++ b/modules/search/examples/geospatial-search-response.json @@ -0,0 +1,104 @@ +{ + "status": { + "total": 1, + "failed": 0, + "successful": 1 + }, + "hits": [ + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17411", + "score": 0.009274733001968816, + "sort": [ + " \u0001?E#9\u003eN\u000c\"e" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17409", + "score": 0.009274733001968816, + "sort": [ + " \u0001?O~i*(kD," + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17403", + "score": 0.009274733001968816, + "sort": [ + " \u0001?Sg*|/t\u001f\u0002" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17410", + "score": 0.009274733001968816, + "sort": [ + " \u0001?Z3T6 \u0010\u0019@" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17412", + "score": 0.009274733001968816, + "sort": [ + " \u0001?]-\u000fm?\u000b\u0014#" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17408", + "score": 0.009274733001968816, + "sort": [ + " \u0001?^DV7\u0014t:^" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17406", + "score": 0.009274733001968816, + "sort": [ + " \u0001?_\u003c\u00009\u001eW\u0013\u0012" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17397", + "score": 0.009274733001968816, + "sort": [ + " \u0001?c\u001cx\u0010n\u0016Wl" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17407", + "score": 0.009274733001968816, + "sort": [ + " \u0001?c!7\u0001@SwS" + ], + "partial_match": true + }, + { + "index": "travel-sample.inventory.geo-index_78125822b3de7be3_4c1c5584", + "id": "landmark_17391", + "score": 0.009274733001968816, + "sort": [ + " \u0001?dgzZ[\u0007;y" + ], + "partial_match": true + } + ], + "total_hits": 640, + "cost": 157249, + "max_score": 0.17106779096990765, + "took": 15349178, + "facets": null +} \ No newline at end of file diff --git a/modules/search/examples/geospatial-sqlpp-query.sqlpp b/modules/search/examples/geospatial-sqlpp-query.sqlpp new file mode 100644 index 000000000..57642e8ae --- /dev/null +++ b/modules/search/examples/geospatial-sqlpp-query.sqlpp @@ -0,0 +1,3 @@ +UPDATE `travel-sample`.inventory.landmark + SET geojson = { "type": "Point", "coordinates": [geo.lon, geo.lat] } + WHERE geo IS NOT null; \ No newline at end of file diff --git a/modules/search/examples/query-sample-results-unindexed.jsonc b/modules/search/examples/query-sample-results-unindexed.jsonc new file mode 100644 index 000000000..06e146641 --- /dev/null +++ b/modules/search/examples/query-sample-results-unindexed.jsonc @@ -0,0 +1,42 @@ +{ + "status": { + "total": 1, + "failed": 0, + "successful": 1 + }, + "request": { + "query": { + "location": [ + -2.235143, + 53.482358 + ], + "distance": "100mi", + "field": "geo" + }, + "size": 10, + "from": 0, + "highlight": { + "style": null, + "fields": null + }, + "fields": [ + "content" + ], + "facets": null, + "explain": true, + "sort": [ + "-_score" + ], + "includeLocations": false, + "search_after": null, + "search_before": null, + "knn": null, + "knn_operator": "" + }, + "hits": [], + "total_hits": 0, + "cost": 0, + "max_score": 0, + "took": 4150150, + "facets": null +} \ No newline at end of file diff --git a/modules/search/examples/query-sample-results-validate.jsonc b/modules/search/examples/query-sample-results-validate.jsonc new file mode 100644 index 000000000..00a141eb4 --- /dev/null +++ b/modules/search/examples/query-sample-results-validate.jsonc @@ -0,0 +1,24 @@ +{ + "error": "rest_index: Query, indexName: travel-sample.inventory.landmark-content-index, err: bleve: QueryBleve query validation failed against index, err: query_validate: field not indexed, name: geo, type: geopoint", + "request": { + "ctl": { + "validate": true + }, + "explain": true, + "fields": [ + "content" + ], + "from": 0, + "highlight": {}, + "query": { + "distance": "100mi", + "field": "geo", + "location": { + "lat": 53.482358, + "lon": -2.235143 + } + }, + "size": 10 + }, + "status": "fail" +} \ No newline at end of file diff --git a/modules/search/examples/query-sample-results.jsonc b/modules/search/examples/query-sample-results.jsonc new file mode 100644 index 000000000..396591ee6 --- /dev/null +++ b/modules/search/examples/query-sample-results.jsonc @@ -0,0 +1,740 @@ +{ + // tag::query_summary[] + "status": { + "total": 1, + "failed": 0, + "successful": 1 + }, + "request": { + "query": { + "query": "+view +food +beach" + }, + "size": 10, + "from": 0, + "highlight": { + "style": null, + "fields": null + }, + "fields": [ + "*" + ], + "facets": null, + "explain": true, + "sort": [ + "-_score" + ], + "includeLocations": false, + "search_after": null, + "search_before": null, + "knn": null, + "knn_operator": "" + }, + // end::query_summary[] + "hits": [ + // tag::first_hit[] + { + "index": "travel-sample.inventory.landmark-content-index_6369e656b9eec849_4c1c5584", + "id": "landmark_4428", + "score": 2.425509689250102, + "explanation": { + "value": 2.425509689250102, + "message": "sum of:", + "children": [ + { + "value": 2.425509689250102, + "message": "sum of:", + "children": [ + { + "value": 1.0124422206433388, + "message": "product of:", + "children": [ + { + "value": 1.0124422206433388, + "message": "sum of:", + "children": [ + { + "value": 1.0124422206433388, + "message": "weight(_all:view^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\nN), product of:", + "children": [ + { + "value": 0.6460760126054528, + "message": "queryWeight(_all:view^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 4.701190745593387, + "message": "idf(docFreq=110, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 1.5670636285665962, + "message": "fieldWeight(_all:view in \u0000\u0000\u0000\u0000\u0000\u0000\nN), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:view)=1" + }, + { + "value": 0.3333333432674408, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\nN)" + }, + { + "value": 4.701190745593387, + "message": "idf(docFreq=110, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + }, + { + "value": 0.9006237048061059, + "message": "product of:", + "children": [ + { + "value": 0.9006237048061059, + "message": "sum of:", + "children": [ + { + "value": 0.9006237048061059, + "message": "weight(_all:beach^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\nN), product of:", + "children": [ + { + "value": 0.6093547205466089, + "message": "queryWeight(_all:beach^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 4.433987204485146, + "message": "idf(docFreq=144, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 1.4779957788760876, + "message": "fieldWeight(_all:beach in \u0000\u0000\u0000\u0000\u0000\u0000\nN), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:beach)=1" + }, + { + "value": 0.3333333432674408, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\nN)" + }, + { + "value": 4.433987204485146, + "message": "idf(docFreq=144, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + }, + { + "value": 0.5124437638006571, + "message": "product of:", + "children": [ + { + "value": 0.5124437638006571, + "message": "sum of:", + "children": [ + { + "value": 0.5124437638006571, + "message": "weight(_all:food^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\nN), product of:", + "children": [ + { + "value": 0.4596440040764192, + "message": "queryWeight(_all:food^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 3.3446128568019726, + "message": "idf(docFreq=430, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 1.1148709854930678, + "message": "fieldWeight(_all:food in \u0000\u0000\u0000\u0000\u0000\u0000\nN), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:food)=1" + }, + { + "value": 0.3333333432674408, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\nN)" + }, + { + "value": 3.3446128568019726, + "message": "idf(docFreq=430, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + } + ] + } + ] + }, + // tag::locations[] + "locations": { + "content": { + "beach": [ + { + "pos": 11, + "start": 61, + "end": 66, + "array_positions": null + } + ], + "food": [ + { + "pos": 3, + "start": 13, + "end": 17, + "array_positions": null + } + ], + "view": [ + { + "pos": 8, + "start": 46, + "end": 50, + "array_positions": null + } + ] + } + }, + // end::locations[] + // tag::highlight[] + "fragments": { + "content": [ + "serves fresh food at very reasonable prices - view of stoney beach with herons" + ] + }, + // end::highlight[] + "sort": [ + "_score" + ], + // tag::field_content[] + "fields": { + "content": "serves fresh food at very reasonable prices - view of stoney beach with herons" + } + // end::field_content[] + }, + // end::first_hit[] + { + "index": "travel-sample.inventory.landmark-content-index_6369e656b9eec849_4c1c5584", + "id": "landmark_26385", + "score": 1.6270812956011347, + "explanation": { + "value": 1.6270812956011347, + "message": "sum of:", + "children": [ + { + "value": 1.6270812956011347, + "message": "sum of:", + "children": [ + { + "value": 0.6791668602218446, + "message": "product of:", + "children": [ + { + "value": 0.6791668602218446, + "message": "sum of:", + "children": [ + { + "value": 0.6791668602218446, + "message": "weight(_all:view^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\u0006~), product of:", + "children": [ + { + "value": 0.6460760126054528, + "message": "queryWeight(_all:view^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 4.701190745593387, + "message": "idf(docFreq=110, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 1.051218195646895, + "message": "fieldWeight(_all:view in \u0000\u0000\u0000\u0000\u0000\u0000\u0006~), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:view)=1" + }, + { + "value": 0.22360679507255554, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\u0006~)" + }, + { + "value": 4.701190745593387, + "message": "idf(docFreq=110, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + }, + { + "value": 0.6041567225889205, + "message": "product of:", + "children": [ + { + "value": 0.6041567225889205, + "message": "sum of:", + "children": [ + { + "value": 0.6041567225889205, + "message": "weight(_all:beach^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\u0006~), product of:", + "children": [ + { + "value": 0.6093547205466089, + "message": "queryWeight(_all:beach^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 4.433987204485146, + "message": "idf(docFreq=144, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 0.9914696681876435, + "message": "fieldWeight(_all:beach in \u0000\u0000\u0000\u0000\u0000\u0000\u0006~), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:beach)=1" + }, + { + "value": 0.22360679507255554, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\u0006~)" + }, + { + "value": 4.433987204485146, + "message": "idf(docFreq=144, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + }, + { + "value": 0.3437577127903696, + "message": "product of:", + "children": [ + { + "value": 0.3437577127903696, + "message": "sum of:", + "children": [ + { + "value": 0.3437577127903696, + "message": "weight(_all:food^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\u0006~), product of:", + "children": [ + { + "value": 0.4596440040764192, + "message": "queryWeight(_all:food^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 3.3446128568019726, + "message": "idf(docFreq=430, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 0.7478781616679533, + "message": "fieldWeight(_all:food in \u0000\u0000\u0000\u0000\u0000\u0000\u0006~), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:food)=1" + }, + { + "value": 0.22360679507255554, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\u0006~)" + }, + { + "value": 3.3446128568019726, + "message": "idf(docFreq=430, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + } + ] + } + ] + }, + "locations": { + "content": { + "beach": [ + { + "pos": 25, + "start": 127, + "end": 132, + "array_positions": null + } + ], + "food": [ + { + "pos": 17, + "start": 90, + "end": 94, + "array_positions": null + } + ], + "view": [ + { + "pos": 34, + "start": 169, + "end": 173, + "array_positions": null + } + ] + } + }, + "fragments": { + "content": [ + "Burgers, seafood, and other simple but tasty meals right at the harbor. You can take your food around the corner to sit on the beach or the sea wall and enjoy the ocean view while you eat." + ] + }, + "sort": [ + "_score" + ], + "fields": { + "content": "Burgers, seafood, and other simple but tasty meals right at the harbor. You can take your food around the corner to sit on the beach or the sea wall and enjoy the ocean view while you eat." + } + }, + { + "index": "travel-sample.inventory.landmark-content-index_6369e656b9eec849_4c1c5584", + "id": "landmark_38035", + "score": 1.1962539437368078, + "explanation": { + "value": 1.1962539437368078, + "message": "sum of:", + "children": [ + { + "value": 1.1962539437368078, + "message": "sum of:", + "children": [ + { + "value": 0.4993333997460529, + "message": "product of:", + "children": [ + { + "value": 0.4993333997460529, + "message": "sum of:", + "children": [ + { + "value": 0.4993333997460529, + "message": "weight(_all:view^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd), product of:", + "children": [ + { + "value": 0.6460760126054528, + "message": "queryWeight(_all:view^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 4.701190745593387, + "message": "idf(docFreq=110, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 0.7728709780330244, + "message": "fieldWeight(_all:view in \u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:view)=1" + }, + { + "value": 0.16439898312091827, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd)" + }, + { + "value": 4.701190745593387, + "message": "idf(docFreq=110, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + }, + { + "value": 0.4441848504964135, + "message": "product of:", + "children": [ + { + "value": 0.4441848504964135, + "message": "sum of:", + "children": [ + { + "value": 0.4441848504964135, + "message": "weight(_all:beach^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd), product of:", + "children": [ + { + "value": 0.6093547205466089, + "message": "queryWeight(_all:beach^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 4.433987204485146, + "message": "idf(docFreq=144, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 0.7289429875885212, + "message": "fieldWeight(_all:beach in \u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:beach)=1" + }, + { + "value": 0.16439898312091827, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd)" + }, + { + "value": 4.433987204485146, + "message": "idf(docFreq=144, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + }, + { + "value": 0.25273569349434155, + "message": "product of:", + "children": [ + { + "value": 0.25273569349434155, + "message": "sum of:", + "children": [ + { + "value": 0.25273569349434155, + "message": "weight(_all:food^1.000000 in \u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd), product of:", + "children": [ + { + "value": 0.4596440040764192, + "message": "queryWeight(_all:food^1.000000), product of:", + "children": [ + { + "value": 1, + "message": "boost" + }, + { + "value": 3.3446128568019726, + "message": "idf(docFreq=430, maxDocs=4495)" + }, + { + "value": 0.1374281639627249, + "message": "queryNorm" + } + ] + }, + { + "value": 0.5498509525913937, + "message": "fieldWeight(_all:food in \u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd), product of:", + "children": [ + { + "value": 1, + "message": "tf(termFreq(_all:food)=1" + }, + { + "value": 0.16439898312091827, + "message": "fieldNorm(field=_all, doc=\u0000\u0000\u0000\u0000\u0000\u0000\u0006\ufffd)" + }, + { + "value": 3.3446128568019726, + "message": "idf(docFreq=430, maxDocs=4495)" + } + ] + } + ] + } + ] + }, + { + "value": 1, + "message": "coord(1/1)" + } + ] + } + ] + } + ] + }, + "locations": { + "content": { + "beach": [ + { + "pos": 17, + "start": 86, + "end": 91, + "array_positions": null + } + ], + "food": [ + { + "pos": 50, + "start": 280, + "end": 284, + "array_positions": null + } + ], + "view": [ + { + "pos": 30, + "start": 169, + "end": 173, + "array_positions": null + } + ] + } + }, + "fragments": { + "content": [ + "… Beach distillery offers a full menu, Sunday brunch, drinks, and a tremendous ocean view with comfortable fire pits. Happy hour Mon-Fri from 5PM to 7PM offers half-priced drinks and a discounted food …" + ] + }, + "sort": [ + "_score" + ], + "fields": { + "content": "Famous for "the Blue Lady", a ghost rumored to haunt the premises, the Moss Beach distillery offers a full menu, Sunday brunch, drinks, and a tremendous ocean view with comfortable fire pits. Happy hour Mon-Fri from 5PM to 7PM offers half-priced drinks and a discounted food menu." + } + } + ], + // tag::end_summary[] + "total_hits": 3, + "cost": 123616, + "max_score": 2.425509689250102, + "took": 636964, + "facets": null + // end::end_summary[] +} \ No newline at end of file diff --git a/modules/search/examples/run-search-full-request.jsonc b/modules/search/examples/run-search-full-request.jsonc index 5d04c3143..ce73d684a 100644 --- a/modules/search/examples/run-search-full-request.jsonc +++ b/modules/search/examples/run-search-full-request.jsonc @@ -9,7 +9,7 @@ "bool": false }, { - "field": "ratings.Cleanliness", + "field": "reviews.ratings.Cleanliness", "min": 1, "max": 3, "inclusive_min": true, @@ -47,6 +47,10 @@ "knn": [ { "k": 10, + "params": { + "ivf_nprobe_pct": 1, + "ivf_max_codes_pct": 0.2 + }, "field": "vector_field", "vector": [ 0.707106781186548, 0, 0.707106781186548 ] } @@ -67,8 +71,9 @@ // end::vectors[] "level": "at_plus", "results": "complete" - } + }, // end::consistency[] + "validate": true }, // end::ctl[] "size": 10, @@ -143,9 +148,9 @@ "by": "field", "field": "field2", "desc": false, - "mode": "max", + "mode": "default", "missing": "last", - "type": "number" + "type": "string" }, "-_score", "-_id" @@ -154,8 +159,8 @@ // end::sort[] "includeLocations": false, "score": "none", - "search_after": ["field1Value", "5", "10.033205341869529", "1234"], - "search_before": ["field1Value", "5", "10.033205341869529", "1234"], + "search_after": ["field1String", "field2String", "10.033205341869529", "hotel_1234"], + "search_before": ["field1String", "field2String", "10.033205341869529", "hotel_1234"], "limit": 10, "offset": 0, "collections": ["collection1", "collection2"] diff --git a/modules/search/examples/run-search-response.json b/modules/search/examples/run-search-response.json new file mode 100644 index 000000000..deeed06b2 --- /dev/null +++ b/modules/search/examples/run-search-response.json @@ -0,0 +1,47 @@ +{ + "status": { + "total": 1, + "failed": 0, + "successful": 1 + }, + "hits": [ + { + "index": "travel-sample.inventory.landmark-content-index_49563a96ea6d3686_4c1c5584", + "id": "landmark_4428", + "score": 2.425509689250102, + "sort": [ + "_score" + ], + "fields": { + "content": "serves fresh food at very reasonable prices - view of stoney beach with herons" + } + }, + { + "index": "travel-sample.inventory.landmark-content-index_49563a96ea6d3686_4c1c5584", + "id": "landmark_26385", + "score": 1.6270812956011347, + "sort": [ + "_score" + ], + "fields": { + "content": "Burgers, seafood, and other simple but tasty meals right at the harbor. You can take your food around the corner to sit on the beach or the sea wall and enjoy the ocean view while you eat." + } + }, + { + "index": "travel-sample.inventory.landmark-content-index_49563a96ea6d3686_4c1c5584", + "id": "landmark_38035", + "score": 1.1962539437368078, + "sort": [ + "_score" + ], + "fields": { + "content": "Famous for "the Blue Lady", a ghost rumored to haunt the premises, the Moss Beach distillery offers a full menu, Sunday brunch, drinks, and a tremendous ocean view with comfortable fire pits. Happy hour Mon-Fri from 5PM to 7PM offers half-priced drinks and a discounted food menu." + } + } + ], + "total_hits": 3, + "cost": 150479, + "max_score": 2.425509689250102, + "took": 1441203, + "facets": null +} \ No newline at end of file diff --git a/modules/search/examples/run-search-unindexed-field.sh b/modules/search/examples/run-search-unindexed-field.sh new file mode 100644 index 000000000..417c1d8fa --- /dev/null +++ b/modules/search/examples/run-search-unindexed-field.sh @@ -0,0 +1,19 @@ +curl -XPOST -H "Content-Type: application/json" \ + -u ${CB_USERNAME}:${CB_PASSWORD} http://${CB_HOSTNAME}:8094/api/bucket/travel-sample/scope/inventory/index/landmark-content-index/query \ + -d '{ + "explain": true, + "fields": [ + "content" + ], + "highlight": {}, + "query": { + "location": { + "lon": -2.235143, + "lat": 53.482358 + }, + "distance": "100mi", + "field": "geo" + }, + "size": 10, + "from": 0 + }' \ No newline at end of file diff --git a/modules/search/examples/run-search-validate-ui.jsonc b/modules/search/examples/run-search-validate-ui.jsonc new file mode 100644 index 000000000..127ce382f --- /dev/null +++ b/modules/search/examples/run-search-validate-ui.jsonc @@ -0,0 +1,20 @@ +{ + "explain": true, + "fields": [ + "content" + ], + "highlight": {}, + "query": { + "location": { + "lon": -2.235143, + "lat": 53.482358 + }, + "distance": "100mi", + "field": "geo" + }, + "ctl": { + "validate": true + }, + "size": 10, + "from": 0 +} \ No newline at end of file diff --git a/modules/search/examples/run-search-validate.sh b/modules/search/examples/run-search-validate.sh new file mode 100644 index 000000000..550d2c7d3 --- /dev/null +++ b/modules/search/examples/run-search-validate.sh @@ -0,0 +1,22 @@ +curl -XPOST -H "Content-Type: application/json" \ + -u ${CB_USERNAME}:${CB_PASSWORD} http://${CB_HOSTNAME}:8094/api/bucket/travel-sample/scope/inventory/index/landmark-content-index/query \ + -d '{ + "explain": true, + "fields": [ + "content" + ], + "highlight": {}, + "query": { + "location": { + "lon": -2.235143, + "lat": 53.482358 + }, + "distance": "100mi", + "field": "geo" + }, + "ctl": { + "validate": true + }, + "size": 10, + "from": 0 + }' \ No newline at end of file diff --git a/modules/search/pages/create-search-index-rest-api.adoc b/modules/search/pages/create-search-index-rest-api.adoc index cf52e6b81..244bb4af7 100644 --- a/modules/search/pages/create-search-index-rest-api.adoc +++ b/modules/search/pages/create-search-index-rest-api.adoc @@ -69,6 +69,16 @@ IMPORTANT: XATTRs mappings are only available in Couchbase Server version 7.6.2 For more information about the available JSON properties for a Search index, see xref:search-index-params.adoc[]. +If the REST API call is successful, the Search Service returns a `200 OK` and the following JSON response: + +[source,json] +---- +include::example$create-search-index-response.json[] +---- + +The `"uuid"` is randomly generated for each Search index you create. +Your own UUID might not match the value shown in the example. + == Next Steps After you create a Search index, you can xref:simple-search-rest-api.adoc[] to test your Search index. diff --git a/modules/search/pages/geo-search-rest-api.adoc b/modules/search/pages/geo-search-rest-api.adoc index dcd9f3a40..a551dc640 100644 --- a/modules/search/pages/geo-search-rest-api.adoc +++ b/modules/search/pages/geo-search-rest-api.adoc @@ -75,8 +75,26 @@ For example, the following query searches a geospatial field, `geo`, for any loc include::example$geospatial-search-query.sh[] ---- +If the REST API call is successful, the Search Service returns a `200 OK`. +Using the `landmark` collection, the query can return the following JSON response: + +[source,json] +---- +include::example$geospatial-search-response.json[] +---- + === Example: GeoJSON Query +[TIP] +==== +To run the following example against the `landmark` collection in the `travel-sample` dataset, run the following {sqlpp} query from the xref:tools:query-workbench.adoc[Query Workbench]: + +[source,sqlpp] +---- +include::example$geospatial-sqlpp-query.sqlpp[] +---- +==== + For example, the following query searches a geospatial field, `geojson`, for any locations within a defined shape with a xref:search-request-params.adoc#geojson-queries-polygon[Polygon GeoJSON Query]: [source,console] @@ -84,6 +102,14 @@ For example, the following query searches a geospatial field, `geojson`, for any include::example$geospatial-search-query-geojson.sh[] ---- +If the REST API call is successful, the Search Service returns a `200 OK`. +Using the `landmark` collection, the query can return the following JSON response: + +[source,json] +---- +include::example$geojson-search-response.json[] +---- + == Next Steps You can xref:customize-index.adoc[customize your Search index] to improve search results and performance. diff --git a/modules/search/pages/search-index-params.adoc b/modules/search/pages/search-index-params.adoc index 32e51279f..ae17c044f 100644 --- a/modules/search/pages/search-index-params.adoc +++ b/modules/search/pages/search-index-params.adoc @@ -1077,7 +1077,7 @@ The child field's type. Can be one of: For more information about the available field data types, see xref:field-data-types-reference.adoc[]. -|vector_index_optimized_for |String |Vector Only a| +|[#vector-index-optimized-param]#vector_index_optimized_for# |String |Vector Only a| include::partial$vector-search-field-descriptions.adoc[tag=optimized_for] diff --git a/modules/search/pages/search-request-params.adoc b/modules/search/pages/search-request-params.adoc index 25ced5b3e..24e62c678 100644 --- a/modules/search/pages/search-request-params.adoc +++ b/modules/search/pages/search-request-params.adoc @@ -49,6 +49,14 @@ Set the total number of results to return for a single page of search results. If you provide both the `size` and `limit` properties, the Search Service uses the `size` value. +The Search Service returns the `size` number of results: + +* Starting at the offset in the `from` or `offset` property. +* Starting from the key specified in the <>. +* Starting backward from the key specified in the <>. + +The `size` property is added by default to all Search requests, if not otherwise specified, with a value of `10`. + If you do not provide a `from`, `size`, or other pagination settings in your query, the Search Service defaults to a `size` value of `10` and a `from` value of `0`. This means the Search Service does not offset results, and returns the first `10` matches to your query. @@ -60,6 +68,8 @@ For example, if you set a `size` value of `5` and a `from` value of `10`, the Se If you provide both the `from` and `offset` properties, the Search Service uses the `from` value. +The `from` property is added by default to all Search requests, if not otherwise specified, with a value of `0`. + If you do not provide a `from`, `size`, or other pagination settings in your query, the Search Service defaults to a `size` value of `10` and a `from` value of `0`. This means the Search Service does not offset results, and returns the first `10` matches to your query. @@ -93,7 +103,7 @@ To create an explanation for a search result's score in search results, set `exp To turn off explanations for search result scoring, set `explain` to `false`. -|[[sort-array]]sort |Array |No a| +|[[sort_arr]]sort |Array |No a| Contains an array of strings or JSON objects to set how to sort search results. By default, the Search Service sorts results based on score values, from highest to lowest. @@ -121,36 +131,50 @@ To turn off document relevancy scoring in search results, set `score` to `none`. To turn on document relevancy scoring in search results, remove the `score` property. -|search_after |Array |No a| +|[[search_after]]search_after |Array |No a| -NOTE: If you use `search_after` in a search request, you cannot use `search_before`. Both properties are included in the example code to show the correct syntax. +NOTE: If you use `search_after` in a search request, you can't use `search_before`. +Both properties are included in the example code to show the correct syntax. Use `search_after` with `from/offset` and `sort` to control pagination in search results. -For example, if you had a set of 10 documents to sort based on `_id` values of 1-10, with `from` set to `2` and `search_after` set to `8`, documents 9-10 appear on the same page. +Give a value for each string or JSON object in the <> to the `search_after` array. +You must provide the values in the same order that they appear in the <>. +Your `sort` array must force a total order on your search results. +Values in the `search_after` array must be strings. +You cannot use `search_after` with numbers or other field data types - if your `sort` array includes fields with a `date` or `number` type, you cannot use `search_after`. +Only result relevancy score values can be entered as strings in the array. -You must give a value for each string or JSON object in the `sort` array to the `search_after` array. -The Search Service starts search result pagination after the document with those values. +The Search Service starts search result pagination after the document with the values you provide in the array. -You must provide the values in the same order that they appear in the `sort` array. -Your `sort` array must force a total order on your search results. +For example, if you had a set of 10 documents to sort based on `_id` values of 1-10, with `from` set to `2` and `search_after` set to `8`, documents 9-10 appear on the same page. + +To reduce the resource costs of deeper pagination on your Search queries, try to always include your document ID values as the final sort criteria in your <>. +Set the `search_after` property to include the values from the last result on your previous page of search results to effectively paginate. Use `search_after` to make the memory requirements of deeper page searches more manageable, when compared to using only `from/offset`. `search_after` lets you start your search results from a specific result, rather than needing to process a number of search results to skip. -|search_before |Array |No a| +|[[search_before]]search_before |Array |No a| -NOTE: If you use `search_before` in a search request, you cannot use `search_after`. Both properties are included in the example code to show the correct syntax. +NOTE: If you use `search_before` in a search request, you can't use `search_after`. +Both properties are included in the example code to show the correct syntax. Use `search_before` with `from/offset` and `sort` to control pagination in search results. -For example, if you had a set of 10 documents to sort based on `_id` values of 1-10, with `from` set to `2` and `search_before` set to `8`, documents 2-6 appear on the same page. - -You must give a value for each string or JSON object in the `sort` array to the `search_before` array. -The Search Service starts search result pagination before the document with those values. - +Give a value for each string or JSON object in the `sort` array to the `search_before` array. You must provide the values in the same order that they appear in the `sort` array. Your `sort` array must force a total order on your search results. +Values in the `search_before` array must be strings. +You cannot use `search_before` with numbers or other field data types - if your `sort` array includes fields with a `date` or `number` type, you cannot use `search_before`. +Only result relevancy score values can be entered as strings in the array. + +The Search Service starts search result pagination before the document with the values you provide in the array. + +For example, if you had a set of 10 documents to sort based on `_id` values of 1-10, with `from` set to `2` and `search_before` set to `8`, documents 2-6 appear on the same page. + +To reduce the resource costs of deeper pagination on your Search queries, try to always include your document ID values as the final sort criteria in your <>. +Set the `search_before` property to include the values from the last result on your previous page of search results to effectively paginate. Use `search_before` to make the memory requirements of deeper page searches more manageable, when compared to using only `from/offset`. `search_before` lets you start your search results back from a specific result, rather than needing to process a number of search results to skip. @@ -188,6 +212,12 @@ The Search Service returns the `k` closest vectors to the vector given in `vecto NOTE: The <> overrides any value set in `k`. +|params |Object |No a| + +Enter additional parameters to control how the Search Service compares vectors when running a Vector Search request. + +For more information about the `params` object, see <>. + |field |String |Yes a| The name of the field that contains the vector data you want to search. @@ -214,6 +244,40 @@ For more information about the dimension value, see the xref:search-index-params |==== +[#knn-params] +=== Knn params Object + +Use the `params` object inside a `knn` object to fine tune the probes and centroids the Search Services uses and searches while running a Vector Search request. + +The `params` object can contain the following properties: + +[cols="1,1,1,4"] +|==== +|Property |Type |Required? |Description + +|ivf_nprobe_pct |Number (percentage) |No a| + +Set the `ivf_nprobe_pct` value to control the percentage of probes, or the percentage of clusters, that the Search Service searches during a single Vector Search query. + +The Search Service automatically calculates a default `nprobe` percentage based on the vectors in a given partition of your Vector Search index. +For more information about this calculation, see xref:vector-search:fine-tune-vector-search.adoc[]. + +If you set the value of `ivf_nprobe_pct` higher than this default calculated value, the Search Service will search a higher percentage of clusters in your processed vectors. +This can increase your accuracy and recall for Vector Search, but requires more compute time for each query. + +In the example, the Search Service searches only `1%` of the total available clusters. + +|ivf_max_codes_pct |Number (percentage out of 100) |No a| + +Set the `ivf_max_codes_pct` value to control the maximum number of centroids that the Search Service accesses during a single Vector Search query. + +By default, this value is always 100%. + +If you reduce your `ivf_max_codes_pct` value, the Search Service accesses fewer centroids, which reduces your Vector Search accuracy and recall, but gives faster compute times for your search. + +In the example, the Search Service searches only `0.2%` of the available centroids in your vector data. +|==== + [#query-object] == Query Object @@ -1864,7 +1928,9 @@ The Search Service uses a consistency vector to synchronize the last document wr include::example$run-search-full-request.jsonc[tag=ctl] ---- -The `ctl` object contains the following properties: +In Couchbase Server 7.6.4 and later, you can also use the `ctl` object with the `validate` property to add an extra check to your queries and get help troubleshooting when a query does not return results. + +The `ctl` object can contain the following properties: [cols="1,1,1,4"] |==== @@ -1883,6 +1949,18 @@ An object that contains a `vectors` object and the `level` and `results` propert For more information, see <>. +|[[validate]]validate |Boolean |No a| + +[.status]#Couchbase Server 7.6.4# + +Add the `validate` property with a value of `true` to add extra validation checks to your Search query. + +For example, the Search Service can tell you through the Web Console or the REST API that a field in your Search query is not in your Search index: + +---- +err: query_validate: field not indexed, field: ratings.Cleanliness, type: number +---- + |==== [#consistency] @@ -2141,7 +2219,8 @@ The following `sort` object orders search results by the values in `field1`, the include::example$run-search-full-request.jsonc[tag=sort] ---- -TIP: For the best results with sorting and page navigation in search results, always include your document ID values (`_id` or `-_id`) as the final sort criteria in your `sort` object. +This means that if 2 documents have the same value in `field1`, then the Search Service will sort them again based on their `field2` values. +If they have the same value in `field2`, then sorting will happen again based on each document's score, and then finally the documents' ID values. The `sort` object can contain the following string values: diff --git a/modules/search/pages/simple-search-rest-api.adoc b/modules/search/pages/simple-search-rest-api.adoc index 5e9b30628..1df8dff5c 100644 --- a/modules/search/pages/simple-search-rest-api.adoc +++ b/modules/search/pages/simple-search-rest-api.adoc @@ -50,7 +50,7 @@ You can choose to copy a full command-line curl example, or copy just the xref:s For more information about how to perform a search with the UI, see xref:simple-search-ui.adoc[]. ==== -=== Example +=== Example: Simple Text Search In the following example, the JSON payload queries an index named `landmark-content-index` for the strings `view`, `food`, and `beach`: @@ -59,8 +59,93 @@ In the following example, the JSON payload queries an index named `landmark-cont include::example$run-search-payload.sh[] ---- +If the request is successful, the Search Service API starts by returning a status and a summary of the query request: + +[source,json] +---- +include::example$query-sample-results.jsonc[tag=query_summary] +---- + +The Search Service returns an array, called `hits`, which contains objects that describe the matches found in each document in the Search index: + +[source,json] +---- +include::example$query-sample-results.jsonc[tag=first_hit] +---- + +The Search query set score explanations to `true`, so the Search Service shows how it calculated the score for each document. + +It also includes the locations of each match, under the `locations` object: + +[source,json] +---- +include::example$query-sample-results.jsonc[tag=locations] +---- + +The Search query enabled highlighting, so the Search Service uses the `` HTML tag to mark each match it found inside the Search index, inside a `fragments` object: + +[source,json] +---- +include::example$query-sample-results.jsonc[tag=highlight] +---- + +Since the target Search index was configured to store field values and return them in results, the Search Service returns the original field content in the `fields` object: + +[source,json] +---- +include::example$query-sample-results.jsonc[tag=field_content] +---- + +At the end of the `hits` array, the Search Service returns a quick summary of the total number of hits, the maximum score, how long the query took, and other information: + +[source,json] +---- +include::example$query-sample-results.jsonc[tag=end_summary] +---- + For more information about the available properties for a Search query JSON payload, see xref:search-request-params.adoc[]. +If the REST API call is successful, the Search Service returns a `200 OK` and the following JSON response: + +[source,json] +---- +include::example$run-search-response.json[] +---- + +=== Example: Validate a Search Request + +[.status]#Couchbase Server 7.6.4# + +In the following example, the JSON payload queries a Search index, `landmark-content-index`, using a xref:search-request-params.adoc#geopoint-queries-distance[Distance/Radius-Based Geopoint Query] on the `geo` field: + +[source,json] +---- +include::example$run-search-unindexed-field.sh[] +---- + +The query runs, but does not return any results: + +[source,json] +---- +include::example$query-sample-results-unindexed.jsonc[] +---- + +To try and validate why the query did not return any results, you can include the xref:search-request-params.adoc#ctl[ctl object] with the `validate` property: + +[source,json] +---- +include::example$run-search-validate.sh[] +---- + +With the `validate` property included, the Search query now returns a `400 Bad Request` error and the following JSON response: + +[source,json] +---- +include::example$query-sample-results-validate.jsonc[] +---- + +The Search Service validates the query and identifies that the `geo` field used in the query is not included in the Search index. + == Next Steps If you do not get the search results you were expecting, you can change the JSON payload xref:search-index-params.adoc[for your Search index] or xref:search-request-params.adoc[for your Search query]. diff --git a/modules/search/pages/simple-search-ui.adoc b/modules/search/pages/simple-search-ui.adoc index 7ff6a0643..aa6a3a2c7 100644 --- a/modules/search/pages/simple-search-ui.adoc +++ b/modules/search/pages/simple-search-ui.adoc @@ -43,7 +43,7 @@ To run a simple search with the Couchbase {page-ui-name}: For more information about the available parameters, see xref:search-request-params.adoc[]. .. Click btn:[Execute]. -=== Example +=== Example: Simple Text Search For example, the following query searches for the strings `view`, `food`, and `beach`: @@ -58,6 +58,24 @@ It also returns all available fields in the index, and returns 10 results per pa TIP: Use a xref:index-aliases.adoc[Search index alias] to search multiple Search indexes in a single search query. Use the xref:search-request-params.adoc#collections[`collections` parameter] in your request to specify an array of collections to search from the Search index. +=== Example: Validate a Search Query + +[.status]#Couchbase Server 7.6.4# + +For example, the following query searches a Search index, `landmark-content-index`, using a xref:search-request-params.adoc#geopoint-queries-distance[Distance/Radius-Based Geopoint Query] on the `geo` field. +The query includes the xref:search-request-params.adoc#ctl[ctl object] with the `validate` property to validate the query: + +[source,json] +---- +include::example$run-search-validate-ui.jsonc[] +---- + +Since the `landmark-content-index` does not include a mapping for the `geo` field and the `validate` property is included in the query, the Web Console returns the following error: + +---- +query_validate: field not indexed, name: geo, type: geopoint +---- + == Next Steps If you do not get the search results you were expecting, you can change the xref:search-request-params.adoc[JSON payload for your Search query]. diff --git a/modules/search/partials/vector-search-field-descriptions.adoc b/modules/search/partials/vector-search-field-descriptions.adoc index fbe64610d..d1dc1aaf6 100644 --- a/modules/search/partials/vector-search-field-descriptions.adoc +++ b/modules/search/partials/vector-search-field-descriptions.adoc @@ -1,5 +1,5 @@ // tag::optimized_for[] -For a `vector` child field, choose whether the Search Service should prioritize recall or latency when returning similar vectors in search results: +For a `vector` child field, choose whether the Search Service should prioritize recall, latency, or memory efficiency when returning similar vectors in search results: * *recall*: The Search Service prioritizes returning the most accurate result. This may increase resource usage for Search queries. @@ -12,6 +12,11 @@ This may reduce the accuracy of results. + The Search Service uses half the `nprobe` value calculated for *recall* priority. +* *memory-efficient*: The Search Service prioritizes reducing memory usage and optimizes search operations for less resources. +This may reduce both accuracy (recall) and latency. ++ +The Search Service uses either an inverted file index with scalar quantization, or a directly mapped index with exact vector comparisons, depending on the number of vectors in your data. + For more information about Vector Search indexes, see xref:vector-search:vector-search.adoc[] or xref:vector-search:create-vector-search-index-ui.adoc[]. // end::optimized_for[] // tag::similarity_metric[] @@ -31,6 +36,16 @@ Smaller euclidean distances mean that the values of each coordinate in the vecto + It's best to use *l2_norm* similarity when your embeddings contain information about the count or measure of specific things, and your embedding model uses the same similarity metric. +* *cosine*: Calculated by adding the result of multiplying a vector's components, or the product of the magnitudes of the vectors and the cosine of the angle between them. +This metric is not affected by the size of the vectors being measured. ++ +Use *cosine* similarity to get the best results with an embedding model that uses cosine similarity. +Cosine similarity works well for semantic search, document classification, and recommendation systems. ++ +The Search Service will normalize any vectors in your documents before indexing when using cosine similarity. +It will also normalize any vectors in your queries if the field for those queries uses cosine similarity. +Use *dot_product* similarity if your vectors are already normalized. + For more information about Vector Search indexes, see xref:vector-search:vector-search.adoc[] or xref:vector-search:create-vector-search-index-ui.adoc[]. // end::similarity_metric[] // tag::dimension[] diff --git a/modules/vector-search/examples/create-vector-search-index-response.json b/modules/vector-search/examples/create-vector-search-index-response.json new file mode 100644 index 000000000..1db73edb0 --- /dev/null +++ b/modules/vector-search/examples/create-vector-search-index-response.json @@ -0,0 +1,5 @@ +{ + "status": "ok", + "name": "vector-sample.color.color-index", + "uuid": "629266a5f4e09384" +} \ No newline at end of file diff --git a/modules/vector-search/examples/run-pre-filtered-vector-search-rest-api.sh b/modules/vector-search/examples/run-pre-filtered-vector-search-rest-api.sh new file mode 100644 index 000000000..5ad973e62 --- /dev/null +++ b/modules/vector-search/examples/run-pre-filtered-vector-search-rest-api.sh @@ -0,0 +1,23 @@ +curl -XPOST -H "Content-Type: application/json" \ + -u ${CB_USERNAME}:${CB_PASSWORD} http://${CB_HOSTNAME}:8094/api/bucket/vector-sample/scope/color/index/color-index/query \ +-d '{ + "fields": ["*"], + "query": { + "min": 70, + "max": 80, + "inclusive_min": false, + "inclusive_max": true, + "field": "brightness" + }, + "knn": [ + { + "k": 10, + "field": "colorvect_l2", + "vector": [ 176, 0, 176 ], + "filter": { + "field: "color", + "match": "navy" + } + } + ] + }' diff --git a/modules/vector-search/examples/run-vector-search-payload.sh b/modules/vector-search/examples/run-vector-search-payload.sh index a0711019f..67409bc40 100644 --- a/modules/vector-search/examples/run-vector-search-payload.sh +++ b/modules/vector-search/examples/run-vector-search-payload.sh @@ -11,7 +11,7 @@ curl -XPOST -H "Content-Type: application/json" \ }, "knn": [ { - "k": 10, + "k": 3, "field": "colorvect_l2", "vector": [ 176, 0, 176 ] } diff --git a/modules/vector-search/examples/run-vector-search-response.json b/modules/vector-search/examples/run-vector-search-response.json new file mode 100644 index 000000000..033f513dd --- /dev/null +++ b/modules/vector-search/examples/run-vector-search-response.json @@ -0,0 +1,125 @@ +{ + "status": { + "total": 1, + "failed": 0, + "successful": 1 + }, + "hits": [ + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#B000B0", + "score": 3.4028234663852886e+38, + "sort": [ + "_score" + ], + "fields": { + "brightness": 72.688, + "color": "dark lavender", + "description": "Dark lavender is a deep, rich color that exudes a sense of mystery and calmness. It envelopes the viewer in its alluring hue, drawing them in with its soothing presence. This color is perfect for creating a sense of depth and intrigue in any space." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#008000", + "score": 0.42046520427629075, + "sort": [ + "_score" + ], + "fields": { + "brightness": 75.136, + "color": "green", + "description": "Green is a color that evokes feelings of freshness and vitality. It is often associated with nature and growth, as it is the color of many plants and trees. The color green can also represent balance and harmony, as it is a combination of the calming blue and energizing yellow. It is a versatile color that can range from a soft pastel to a bold and vibrant hue. Whether it's the lush green of a forest or the crisp green of a freshly cut lawn, this color has a way of invigorating and rejuvenating the senses." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#483D8B", + "score": 0.42046520427629075, + "sort": [ + "_score" + ], + "fields": { + "brightness": 73.181, + "color": "dark slate blue", + "description": "Dark slate blue is a rich and deep color that evokes a sense of mystery and calmness. It envelopes the viewer in its deep hue, creating a soothing and tranquil atmosphere. This color is perfect for creating a sense of depth and intrigue in any space." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#C000C0", + "score": 0.3829836951163303, + "sort": [ + "_score" + ], + "fields": { + "brightness": 79.296, + "color": "magenta", + "description": "Magenta is a vibrant and bold color that is often described as a deep purplish-red. It is a highly saturated color that is eye-catching and demands attention. Magenta is often associated with creativity, passion, and energy. It is a color that exudes confidence and can add a pop of excitement to any design or outfit." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#FF0000", + "score": 0.3810305701163303, + "sort": [ + "_score" + ], + "fields": { + "brightness": 76.245, + "color": "red", + "description": "Red is a vibrant color that evokes feelings of passion and intensity. It is a bold and attention-grabbing color that symbolizes love, energy, and power. Red is often associated with strong emotions and can also represent danger or warning. It is a color that demands attention and can make a statement in any setting." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#A52A2A", + "score": 0.3810305701163303, + "sort": [ + "_score" + ], + "fields": { + "brightness": 78.777, + "color": "brown", + "description": "Brown is a warm and earthy color that often evokes feelings of comfort and stability. It is a rich color that can range from light tan to dark chocolate. Brown is often associated with nature and can be found in the colors of trees, soil, and animals. It is a versatile color that can be used in both casual and formal settings, making it a popular choice in fashion and interior design. Overall, brown is a comforting and grounding color that adds a sense of warmth and coziness to any environment." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#B22222", + "score": 0.3810305701163303, + "sort": [ + "_score" + ], + "fields": { + "brightness": 77.056, + "color": "firebrick", + "description": "Firebrick is a deep, rich red color that evokes images of a blazing fire. It is a warm and intense hue, reminiscent of the glowing embers of a fire. The color is bold and eye-catching, yet also has a sense of warmth and comfort. Firebrick is a powerful and passionate color that demands attention and exudes energy and vitality." + }, + "partial_match": true + }, + { + "index": "vector-sample.color.color-index_629266a5f4e09384_4c1c5584", + "id": "#9400D3", + "score": 0.0004977600796416127, + "sort": [ + "_score" + ], + "fields": { + "brightness": 68.306, + "color": "dark violet", + "description": "Dark violet is a rich and deep color that can be described as enveloping, mysterious, and intense. It is a shade of purple that is darker and more intense than traditional violet. It exudes a sense of mystery and depth, making it a popular choice for creating a dramatic and moody atmosphere. The color is often associated with luxury, royalty, and spirituality. Its deep and intense hue can evoke a sense of power and sophistication. Dark violet is a versatile color that can be used to add depth and drama to any space or design." + } + } + ], + "total_hits": 8, + "cost": 2621, + "max_score": 3.4028234663852886e+38, + "took": 6628491, + "facets": null +} \ No newline at end of file diff --git a/modules/vector-search/pages/create-vector-search-index-rest-api.adoc b/modules/vector-search/pages/create-vector-search-index-rest-api.adoc index 3c1000ce8..131fd3887 100644 --- a/modules/vector-search/pages/create-vector-search-index-rest-api.adoc +++ b/modules/vector-search/pages/create-vector-search-index-rest-api.adoc @@ -70,6 +70,16 @@ NOTE: This sample JSON Vector Search index is the same as the one provided in xr For more information about all the available JSON properties for a Search index, see xref:search:search-index-params.adoc[]. +If the REST API call is successful, the Search Service returns a `200 OK` and the following JSON response: + +[source,json] +---- +include::example$create-vector-search-index-response.json[] +---- + +The `"uuid"` is randomly generated for each Search index you create. +Your own UUID might not match the value shown in the example. + == Next Steps After you create a Search index, you can xref:run-vector-search-rest-api.adoc[] to test your Search index. diff --git a/modules/vector-search/pages/fine-tune-vector-search.adoc b/modules/vector-search/pages/fine-tune-vector-search.adoc new file mode 100644 index 000000000..a277f218b --- /dev/null +++ b/modules/vector-search/pages/fine-tune-vector-search.adoc @@ -0,0 +1,150 @@ += Fine-Tuning a Vector Search Query +:stem: asciimath +:page-ui-name: {ui-name} +:page-product-name: {product-name} +:description: Add additional parameters to a Vector Search REST API call to tune the search for recall or accuracy. + +[abstract] +{description} + + + +The Search Service automatically tunes your Vector Search indexes to achieve a balance between: + +* Recall, or the quality of your search results +* Latency, or your search response time +* Memory efficiency + +This tuning occurs during indexing and querying. +You do not need to adjust these parameters manually. + +Specifically, the Search Service dynamically adjusts two critical vector parameters: + +`nlist`, also known as `Centroid` count:: +The number of clusters used for indexing. +Centroids are used to quickly find the surrounding closest matches in the Vector Search index. +Increasing the number of centroids will increase accuracy but will decrease the speed of the search. ++ +The `nlist` is determined dynamically based on the size of the dataset, or the number of vectors in a partition: ++ +[%header, cols="3*a"] +|=== +| Number of vectors in partition (`nvec`) +| `Centroid count` (`nlist` calculation) +| Notes + +| stem:["nvec " ge " 200,000"] +| stem:[4 xx sqrt("nvec")] +| This formula is designed to handle larger datasets + where increasing the number of datasets does not yield significant improvements in recall. + +| stem:["1000" le "nvec" le "200,000" ] +| stem:["nvec" / 100] +| This formula targets approximately 100 vectors per cluster, +which balances between too few and too many clusters, ensuring efficient indexing. + +| stem:["nvec" lt 1000] +| N/A +| For a number of vectors less than 1000, the Search Service will carry out a straight forward one-to-one mapping between IDs and vectors with an exact vector comparison. +Vectors are directly stored without the need for additional processing for the `nlist` calculation. + +|=== + +`nprobes` (or `probes`):: +This is the number of `centroids` that a Search query will check for similar vectors. +The `nprobe` value is only set when the Search Service is using an Inverted File Index. The Search Service will select the best index type and comparison method depending on the size of the dataset and your `vector_index_optimized_for` setting. ++ +For more information on the `vector_index_optimized_for` setting, see xref:search:search-index-params.adoc#vector-index-optimized-param[Search Index JSON properties]. ++ ++ +[%header, cols="3*a"] +|=== +| Query optimization +| `nprobe` calculation +| Notes + + +| Default calculation +| stem:[sqrt("nlist")] +| This provides a balanced tradeoff between recall and latency by adjusting the number of clusters probed during queries. + +| Latency-optimized calculation (`vector_index_optimized_for: latency`) +| stem:[sqrt("nlist") / 2] +| A minimum value of 1 is enforced to avoid setting `nprobe` too low. + +|=== + +== Default `nlist` and `nprobe` calculations on a Vector Search Index + + +The cluster maintains two dynamically adjusted parameters that will affect the speed, accuracy, and resources used during a search: + +`ivf_nprobe_pct`:: +The percentage of clusters searched during queries, allowing for fine-tuning of the balance between recall and performance. +If the value of `nprobe` is 5% of `nlist` (the centroid count), then setting the value of `ivf_nprobe_pct` higher than 5% will have the search cover a higher percentage of clusters, which will improve the accuracy of the search. + +`ivf_max_codes_pct`:: +The value represents the percentage of `centroids` that will be visited during a search. +Reducing the value reduces the number of centroids visited, which will decrease accuracy and recall, but will result in faster compute times. The default value is 100 (i.e 100% of the centroids will be visited during the search). + + +.Default calculation +==== +If you have a Vector Search index with `vector_index_optimized_for` set to `"recall"` and `indexPartitions` set to `5`, then the `centroid count` (`nlist`) and `nprobe` are determined based on the current vector count in a given partition. +[options="noheader", frame="none", grid="none" cols="1,1"] + +|=== +| Total vectors in index (optimization = recall) +| 10,000,000 + +| Average vectors in a partition for 5 partitions total +| 2,000,000 + +| centroid count (`nlist`) = stem:[4 times sqrt("total vectors in index")] +| 5657 + +| nprobes = stem:[sqrt(nlist)] +| 75 + +| Calculated default: `ivf_nprobe_pct` +| 1.325% + +| Calculated default: `ivf_max_codes_pct` +| 100% (default value) + +|=== + +==== + +== Fine-Tuning Query Parameters + + +You can set the values of `ivf_nprobe_pct` and `ivf_max_codes_pct` in your Vector Search queries to tune the recall or accuracy of your search. + +You can add the following parameters to your query: + +[source, json] +.Using tuning parameters +---- +{ + "fields": ["*"], + "knn": [{ + "k": 10, + "params": { + "ivf_nprobe_pct": 1, + "ivf_max_codes_pct": 0.2 + }, + "field": "embedding", + "vector": [0.024901132253900747, . . .] + }] +} +---- + +In the preceding example, the Search request returns results from any available fields in the index, but specifically searches the `embedding` field for the `10` closest matches to the vector `[0.024901132253900747, . . .]`. +The vector in the Search request has been truncated to reduce the display size of the example. +The parameters have been set to search 1% of the clusters, and 0.2 per cent of the centroids. + + + + + diff --git a/modules/vector-search/pages/pre-filtering-vector-search.adoc b/modules/vector-search/pages/pre-filtering-vector-search.adoc new file mode 100644 index 000000000..0d133e918 --- /dev/null +++ b/modules/vector-search/pages/pre-filtering-vector-search.adoc @@ -0,0 +1,73 @@ += Pre-filtering Vector Searches +:page-topic-type: guide +:page-ui-name: {ui-name} +:description: You can specify filters as part of a vector search statement which will restrict the documents searched during the query. + +[abstract] +{description} + +== About Pre-filtering + +Using pre-filtering as part of your vector search offers two key advantages: + +. *Enhanced precision and relevance:* +Narrow your search results based on specific criteria, such as organization, date/time ranges, or geospatial locations. + +. *Performance optimization:* +Reduce the search space before executing queries to improve query execution time and reduce computational overhead. + +== Prerequisites + +* You have the Search Service enabled on a node in your database. +For more information about how to deploy a new node and Services on your database, see xref:server:manage:manage-nodes/node-management-overview.adoc[]. + +* You have a bucket with scopes and collections in your database. +For more information about how to create a bucket, see xref:server:manage:manage-buckets/create-bucket.adoc[]. + +* Your user account has the *Search Admin* or *Search Reader* role. + +* You installed the Couchbase command-line tool (CLI). + +* You have the hostname or IP address for the node in your database where you're running the Search Service. +For more information about where to find the IP address for your node, see xref:server:manage:manage-nodes/list-cluster-nodes.adoc[]. + +* You have created a Vector Search index. ++ +For more information about how to create a Vector Search index, see xref:create-vector-search-index-ui.adoc[] or xref:create-vector-search-index-rest-api.adoc[]. ++ +[TIP] +-- +include::partial$download-sample-partial.adoc[] + +For the best results, consider using the sample Vector Search index from xref:create-vector-search-index-ui.adoc#example[Create a Vector Search Index with the {page-ui-name}] or xref:create-vector-search-index-rest-api.adoc#example[Create a Vector Search Index with the REST API and curl/HTTP]. +-- + +== Procedure + +To run a pre-filtered vector search with the REST API: + +. In your command-line tool, enter a `curl` command with the `XPOST` verb. +. Set your header content to include `"Content-Type: application/json"`. +. Add your `username`, `password`, and the Search Service endpoint on port `8094`. +. Add the `index name` you want to query to the endpoint. + +[source, console] +---- +curl -XPOST -H "Content-Type: application/json" \ + -u ${CB_USERNAME}:${CB_PASSWORD} http://${CB_HOSTNAME}:8094/api/bucket/vector-sample/scope/color/index/{INDEX_NAME}/query \ +-d \ +---- + +=== Example + +In the following example, you will extend a search query +to find matches in `color-index`. + A pre-filter on the query will narrow the documents in the index searched to those with a `color` field value + that closely matches `navy`. + +[source, console] +---- +include::example$run-pre-filtered-vector-search-rest-api.sh[] +---- + + diff --git a/modules/vector-search/pages/run-vector-search-rest-api.adoc b/modules/vector-search/pages/run-vector-search-rest-api.adoc index aa14917d5..dabd29b7d 100644 --- a/modules/vector-search/pages/run-vector-search-rest-api.adoc +++ b/modules/vector-search/pages/run-vector-search-rest-api.adoc @@ -72,6 +72,13 @@ TIP: For a more complex query, you can copy the `query` object from the example For more information about the available properties for a Search query JSON payload, see xref:search:search-request-params.adoc[]. +If the REST API call is successful, the Search Service returns a `200 OK` and the following JSON response: + +[source,json] +---- +include::example$run-vector-search-response.json[] +---- + == Next Steps If you do not get the search results you were expecting, you can change the JSON definition xref:search:search-index-params.adoc[for your Search index] or change the parameters xref:search:search-request-params.adoc[for your Search query]. diff --git a/modules/vector-search/partials/nav.adoc b/modules/vector-search/partials/nav.adoc index 43d3ac6c0..cbfb273a5 100644 --- a/modules/vector-search/partials/nav.adoc +++ b/modules/vector-search/partials/nav.adoc @@ -1,6 +1,9 @@ * xref:7.6@server:vector-search:vector-search.adoc[] ** xref:7.6@server:vector-search:create-vector-search-index-ui.adoc[] ** xref:7.6@server:vector-search:create-vector-search-index-rest-api.adoc[] +** xref:7.6@server:vector-search:pre-filtering-vector-search.adoc[] ** xref:7.6@server:vector-search:run-vector-search-ui.adoc[] ** xref:7.6@server:vector-search:run-vector-search-rest-api.adoc[] -** xref:7.6@server:vector-search:run-vector-search-sdk.adoc[] \ No newline at end of file +** xref:7.6@server:vector-search:run-vector-search-sdk.adoc[] +** xref:7.6@server:vector-search:fine-tune-vector-search.adoc[] +