diff --git a/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/esql_task_description.text b/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/esql_task_description.text index ed3134c26353c..ed1d375db05b7 100644 --- a/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/esql_task_description.text +++ b/x-pack/platform/packages/shared/kbn-ai-tools/src/tools/esql/esql_task_description.text @@ -170,9 +170,9 @@ LEAST: returns the smallest value from multiple columns ### Search functions Search functions perform full-text search against the data - -MATCH: execute a match query on a specified field (tech preview) -QSTR: performs a Lucene query string query (tech preview) +MATCH: execute a match query on a specified field - equivalent to match query for Elasticsearch Query DSL +QSTR: perform a Lucene query string query. It can also be used to search for patterns in complete Event. +KQL: perform a KQL query. KQL is a simple text-based query language for filtering data. It can also be used to search for patterns in complete Event ### Date-time functions diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/.file-cache.json b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/.file-cache.json new file mode 100644 index 0000000000000..4a65d33db024d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/.file-cache.json @@ -0,0 +1,980 @@ +{ + "reference/query-languages/esql/commands/change-point.md": { + "hash": "4a7237a13331be19d90acf33178e2241a176bf8068a6b8878d8b250b91cccfd3", + "outputFiles": [ + "esql-change-point.txt" + ] + }, + "reference/query-languages/esql/commands/completion.md": { + "hash": "f152594fbea7017b3a2212e969c7d3d02df580eb1ad9c79393eea8adf3c92310", + "outputFiles": [ + "esql-completion.txt" + ] + }, + "reference/query-languages/esql/commands/dissect.md": { + "hash": "3b5cc29cfd116169c740944e5c99996e070c0bacb70146851905015389acdc9a", + "outputFiles": [ + "esql-dissect.txt" + ] + }, + "reference/query-languages/esql/commands/drop.md": { + "hash": "ff48d8f4922883065da25eb61508e799e44a734565f9662ae1eff9fe84ca41d5", + "outputFiles": [ + "esql-drop.txt" + ] + }, + "reference/query-languages/esql/commands/enrich.md": { + "hash": "c2df5c20077572cb9a7aaedd498da22381d56d7f14e64248cc2f769a1c4f9a4f", + "outputFiles": [ + "esql-enrich.txt" + ] + }, + "reference/query-languages/esql/commands/eval.md": { + "hash": "ad199b0fb20bb4f758c1a96fdf953ef749d6d60be3260e36cf4eeca0e05ce494", + "outputFiles": [ + "esql-eval.txt" + ] + }, + "reference/query-languages/esql/commands/fork.md": { + "hash": "ca0d355e98351581a4254feadc43731ae792993df9f90fd953240aff067e0011", + "outputFiles": [ + "esql-fork.txt" + ] + }, + "reference/query-languages/esql/commands/from.md": { + "hash": "bfb32e1166c447af787bfa817780a2839b2bdb349e9461919a9680d237106555", + "outputFiles": [ + "esql-from.txt" + ] + }, + "reference/query-languages/esql/commands/fuse.md": { + "hash": "13f8ddfdfc8924ed72e23d36ac9e88b33dd3bc9b893a19dae4b0ec389f7640e0", + "outputFiles": [ + "esql-fuse.txt" + ] + }, + "reference/query-languages/esql/commands/grok.md": { + "hash": "6a2437ba920a8fd92d4d02e4b5579016a7365343ff2b6c179327137c2eb1fc18", + "outputFiles": [ + "esql-grok.txt" + ] + }, + "reference/query-languages/esql/commands/inlinestats-by.md": { + "hash": "ec7c4c13f45025b1df65e929f1343f9ae371525bf155f1d5e1459532d1e6210c", + "outputFiles": [ + "esql-inlinestats-by.txt" + ] + }, + "reference/query-languages/esql/commands/keep.md": { + "hash": "3c51a210f2a3b8b5b5343f1c8d7365bfd5f88d4640302169d0cd246d9fc6a973", + "outputFiles": [ + "esql-keep.txt" + ] + }, + "reference/query-languages/esql/commands/limit.md": { + "hash": "b012be5cd7a08bf4f5c71449db2b3b04021854d0d0a60d06cc7e40bfbe2456a0", + "outputFiles": [ + "esql-limit.txt" + ] + }, + "reference/query-languages/esql/commands/lookup-join.md": { + "hash": "27afb8c0fb310ee5fb5fc0bdd82ef7819af1d12bd45dfebe53c15228e715e81a", + "outputFiles": [ + "esql-lookup-join.txt" + ] + }, + "reference/query-languages/esql/commands/mv_expand.md": { + "hash": "2bbd5ab9eda464a6c6f66b4972744d79db665741cb4776ba59a1b5b4031609b1", + "outputFiles": [ + "esql-mv_expand.txt" + ] + }, + "reference/query-languages/esql/commands/rename.md": { + "hash": "b0d8944225b190becb69d54ac4ba99bd557aa4048b68b96e4212083af31c0bc0", + "outputFiles": [ + "esql-rename.txt" + ] + }, + "reference/query-languages/esql/commands/rerank.md": { + "hash": "fdcdcaf21ed4e2d74127d42c03f82abcdf5a7783012322fc24e6d354418f1b4b", + "outputFiles": [ + "esql-rerank.txt" + ] + }, + "reference/query-languages/esql/commands/row.md": { + "hash": "da1989e5ec8fe3926c95100a5d3bea1d206e3a70997171e1a61e3c81918fbab7", + "outputFiles": [ + "esql-row.txt" + ] + }, + "reference/query-languages/esql/commands/sample.md": { + "hash": "f25978957feb4ca18a9e809fe6fade065d224712b79249c0d046a053ecba3e12", + "outputFiles": [ + "esql-sample.txt" + ] + }, + "reference/query-languages/esql/commands/show.md": { + "hash": "7a2537c9889f642e2313f784ac750d64400f41ceff148d15323c9a1e8a172603", + "outputFiles": [ + "esql-show.txt" + ] + }, + "reference/query-languages/esql/commands/sort.md": { + "hash": "727fdc967f247eff3907ecbdb6f8ef0c3e9113a16cbd6d22c6d734386fa440fc", + "outputFiles": [ + "esql-sort.txt" + ] + }, + "reference/query-languages/esql/commands/stats-by.md": { + "hash": "23950849b85e6cfdcd7c033e1824c9d46388c90d84fb4e9096401c54159740eb", + "outputFiles": [ + "esql-stats-by.txt" + ] + }, + "reference/query-languages/esql/commands/ts.md": { + "hash": "12dd414dbc515ca408cc280a73e6e8293fc12c1eb3d86f6728edb0149b18521f", + "outputFiles": [ + "esql-ts.txt" + ] + }, + "reference/query-languages/esql/commands/where.md": { + "hash": "8c44f37502efcf9819f5dfe7296450da18d03c85c60fb4c99861008e60fc0c20", + "outputFiles": [ + "esql-where.txt" + ] + }, + "reference/query-languages/esql/functions-operators/aggregation-functions.md": { + "hash": "c091035c363743c049db403e5ec87e9bcf145d8c97e8473a4b172976692f4a74", + "outputFiles": [ + "esql-absent.txt", + "esql-avg.txt", + "esql-count.txt", + "esql-count_distinct.txt", + "esql-max.txt", + "esql-median.txt", + "esql-median_absolute_deviation.txt", + "esql-min.txt", + "esql-percentile.txt", + "esql-present.txt", + "esql-sample.txt", + "esql-st_centroid_agg.txt", + "esql-st_extent_agg.txt", + "esql-std_dev.txt", + "esql-sum.txt", + "esql-top.txt", + "esql-values.txt", + "esql-variance.txt", + "esql-weighted_avg.txt" + ], + "sections": { + "esql-absent.txt": { + "hash": "f493b69a41aebb72c2fd4e41eef3e3b1ed3c8ad5b0fc3e290a7805191a1c559e" + }, + "esql-avg.txt": { + "hash": "a97be5f6fcf76d1d0cf303d233b369d7b05dc4969c439a21deb23aaafab529f8" + }, + "esql-count.txt": { + "hash": "bd993de5844edcb0ef388e5dfd364a90fed2dece90bf8fcb9193540e2b2ac941" + }, + "esql-count_distinct.txt": { + "hash": "653801fc855f285d47c0e5d6ca642f8a1dc3f4f682b2c7d998888d72b671dfa7" + }, + "esql-max.txt": { + "hash": "88561a5ba33554ca6641f7e38837efdc1080ba255f940e55274b58f5ff46b7a6" + }, + "esql-median.txt": { + "hash": "c5c064340a81a70eba0afa3fb48f9fe0a12f42945fee9d5c165b6f19ab770315" + }, + "esql-median_absolute_deviation.txt": { + "hash": "7362768123d2803d64521a3cecea20cf86426b5297b99534dad63888aa067b4f" + }, + "esql-min.txt": { + "hash": "49be028ee8bbcafcf8cb1ee40b7a269411d7479f90f62914112da3c1f543c430" + }, + "esql-percentile.txt": { + "hash": "ea5e7354918dfbef4e4d74c6f8332b6bf3d956c41bb7507883af9b82bf9f56ff" + }, + "esql-present.txt": { + "hash": "73d184e1e67b0d11a9475367d1c49e3be854fa648aace2ab8466d7422fcd78a3" + }, + "esql-sample.txt": { + "hash": "25be207019af4b352ee26504a7fa33395eabec8e958e3d6ca6d38022d0c15959" + }, + "esql-st_centroid_agg.txt": { + "hash": "6429990665fec86f6185787f9738a82784b3acb1ebe3820d8deb10bb7f315bf7" + }, + "esql-st_extent_agg.txt": { + "hash": "b71c56324c4527f6fcea7fb9cae5515902754e8b8a3d3ed1bb75b17e193c1782" + }, + "esql-std_dev.txt": { + "hash": "99b138bcebe148665a1cd9504c768b9ae7fc2bcdd007d8e7e25611fad74bd596" + }, + "esql-sum.txt": { + "hash": "f2be8da83f336fa9a13b70f190d04c416098a498a7eb010c0536d91eef40ed43" + }, + "esql-top.txt": { + "hash": "3ced401db4123f63964b7de77dd4c8a3eb6e3d5f9042ab95d1cbef5677921853" + }, + "esql-values.txt": { + "hash": "cedd1bdc899331c0a2fdf5393b795a67d77da7cfd8d635f65b1d12701b554ffd" + }, + "esql-variance.txt": { + "hash": "66ca2c9770590ad14f107e7b11c653df5ae2c756e745578e9af1c66902a90406" + }, + "esql-weighted_avg.txt": { + "hash": "9aae5f18c064b7acd8ffe62c16981082fd233c19659a9ae55cf1115a8cac9d84" + } + } + }, + "reference/query-languages/esql/functions-operators/conditional-functions-and-expressions.md": { + "hash": "19bd6af514722f4cec1dcdb6a63354d2b23e3132ef6579563db29b5aeffd822c", + "outputFiles": [ + "esql-case.txt", + "esql-coalesce.txt", + "esql-greatest.txt", + "esql-least.txt", + "esql-clamp.txt", + "esql-clamp_min.txt", + "esql-clamp_max.txt" + ], + "sections": { + "esql-case.txt": { + "hash": "33977c4a5233dd339dc907a0903b3624ea41e96705aa71ca0f86b438d72c1e6f" + }, + "esql-coalesce.txt": { + "hash": "17f5551c526ab042827e3e8a24c4c0106dfa9f4ff8269b6ab37a6316f55ed299" + }, + "esql-greatest.txt": { + "hash": "cdb82b9b98c633be80fb0dac7ee4f9017372d147102eb2077dbe11e3e8fb19b1" + }, + "esql-least.txt": { + "hash": "6b849878503eb1b316393c0659399f5f5d065c8579b748d1dcf714800bcca945" + }, + "esql-clamp.txt": { + "hash": "86b2360d33ce7e51628231a21df7b6057a1d96773328648e923c3b879a031b32" + }, + "esql-clamp_min.txt": { + "hash": "08ad301cab2e4b55655a057e1f2065aae05464f29701438a68f1637a1aa3409d" + }, + "esql-clamp_max.txt": { + "hash": "cd95f0bce8beaa99b09df28d954253d5b868a011e80394107dfe06d113221cd2" + } + } + }, + "reference/query-languages/esql/functions-operators/date-time-functions.md": { + "hash": "38bb8145425f830f17bf20a9d937e9a99c46ae1d00464a34413956533471765b", + "outputFiles": [ + "esql-date_diff.txt", + "esql-date_extract.txt", + "esql-date_format.txt", + "esql-date_parse.txt", + "esql-date_trunc.txt", + "esql-day_name.txt", + "esql-month_name.txt", + "esql-now.txt", + "esql-trange.txt" + ], + "sections": { + "esql-date_diff.txt": { + "hash": "f93b75b24598ebc9a005cdefda2d74b982e06f9f03017b69561a509c20406474" + }, + "esql-date_extract.txt": { + "hash": "4ebd8f00949b68f87e7e343ff5199721b35523d7f01ce5d68374fb0e78509266" + }, + "esql-date_format.txt": { + "hash": "7a1bc1b98ab36590367be5efd29b4af1ae10bd61a214388a1993ee84568c53ad" + }, + "esql-date_parse.txt": { + "hash": "4367c41578dc946f3a9c7049b10c7f56dd306330278826f72fbef47184094d7f" + }, + "esql-date_trunc.txt": { + "hash": "222470423ecf866c575b27c7cae31bb4ed75bcb32da5f0fee8cb4f58bb8ad111" + }, + "esql-day_name.txt": { + "hash": "cf0bd5063d03dd023c1b5a738ddfb88fdfc817ccbbb9597e850f7dafd2e906eb" + }, + "esql-month_name.txt": { + "hash": "286f287fc0258c1dbefb5dc8b5128a9f9452f05697a23069d539eb31a02f9108" + }, + "esql-now.txt": { + "hash": "67e3039949e345669007c396a7febd4cf4296461fde5a45abc371f840274e5ad" + }, + "esql-trange.txt": { + "hash": "8a18588624772c684411cad67a377c42fc5228a16fc3ac0ceae12a6622e51f86" + } + } + }, + "reference/query-languages/esql/functions-operators/dense-vector-functions.md": { + "hash": "af7d21e4833bdc0d4eddd3f5af84c1671e231b5edf7108a13bc108c19cf2ef15", + "outputFiles": [ + "esql-knn.txt", + "esql-text_embedding.txt", + "esql-v_cosine.txt", + "esql-v_dot_product.txt", + "esql-v_hamming.txt", + "esql-v_l1_norm.txt", + "esql-v_l2_norm.txt" + ], + "sections": { + "esql-knn.txt": { + "hash": "cd23ec4706988f72cdb870e47fc06993beb5b5ca6e78092aa1f8a93eac69877f" + }, + "esql-text_embedding.txt": { + "hash": "65088e8bec1af79b429f3ededd02efb020886e1e9a7ef1bdcca5af01ab713fdf" + }, + "esql-v_cosine.txt": { + "hash": "95250bd1f7a0b022049e2b90e4017cb94b4bc7730c071e6fe4b743e8844dd26e" + }, + "esql-v_dot_product.txt": { + "hash": "e90f7da824c6e1c563f75563574e2e9d137811ded4ba8759ad1469f74530e5e1" + }, + "esql-v_hamming.txt": { + "hash": "1efb06f4f4aafed89a8cd4502824262edd70d88b48bbd091e1dabc6b4a321ae8" + }, + "esql-v_l1_norm.txt": { + "hash": "03e2286f9a435a47eb12f52f2d522048acdfa658f641fdd57610569ab51bc628" + }, + "esql-v_l2_norm.txt": { + "hash": "549794558a2b011fccf9bdc17a720cb5e7eafe77f564507372e930214d0e39c3" + } + } + }, + "reference/query-languages/esql/functions-operators/grouping-functions.md": { + "hash": "4de2e8ace30534a6ca337ca3ea321c666a4e7d52ad4e3da59d1a67c69d111045", + "outputFiles": [ + "esql-bucket.txt", + "esql-tbucket.txt", + "esql-categorize.txt" + ], + "sections": { + "esql-bucket.txt": { + "hash": "34299ed11abef141f144e16cc9ecb1e8f20f6da09639a1bb36bab777a1850d09" + }, + "esql-tbucket.txt": { + "hash": "bbab734f6b3cbdb1e8db3e84a69a73a637de489e5d3cf6632ef37847afaf73b2" + }, + "esql-categorize.txt": { + "hash": "3325503b838e7450b292a85b0c7206ea0c9e148af3b98b49121b75363c576b94" + } + } + }, + "reference/query-languages/esql/functions-operators/ip-functions.md": { + "hash": "b37d1e926e63bc2e5d8494eacb6076092bb0ea453f9011074f65c566247f6544", + "outputFiles": [ + "esql-cidr_match.txt", + "esql-ip_prefix.txt" + ], + "sections": { + "esql-cidr_match.txt": { + "hash": "1e4186327411bbfd84af5a73ae23627de695262edb41389f252e9db0eb0df659" + }, + "esql-ip_prefix.txt": { + "hash": "cde5e107f72df69f32bd85c1bf589c821182a00a9c11c0adbc597f56a93360dc" + } + } + }, + "reference/query-languages/esql/functions-operators/math-functions.md": { + "hash": "b4b5fcb268d59a72996e21eaaaa9c24251a2a4ad3dac7535701e5b11b5133904", + "outputFiles": [ + "esql-abs.txt", + "esql-acos.txt", + "esql-asin.txt", + "esql-atan.txt", + "esql-atan2.txt", + "esql-cbrt.txt", + "esql-ceil.txt", + "esql-copy_sign.txt", + "esql-cos.txt", + "esql-cosh.txt", + "esql-e.txt", + "esql-exp.txt", + "esql-floor.txt", + "esql-hypot.txt", + "esql-log.txt", + "esql-log10.txt", + "esql-pi.txt", + "esql-pow.txt", + "esql-round.txt", + "esql-round_to.txt", + "esql-scalb.txt", + "esql-signum.txt", + "esql-sin.txt", + "esql-sinh.txt", + "esql-sqrt.txt", + "esql-tan.txt", + "esql-tanh.txt", + "esql-tau.txt" + ], + "sections": { + "esql-abs.txt": { + "hash": "29781d6ff9f49ebf773a3388e5f69d7c337513b544da2bd42964234f8e2f7865" + }, + "esql-acos.txt": { + "hash": "f5e5dc70d565e8f47b78cc4e89bbc98caee0ed7af917c900f8c9f1fe656f5e8f" + }, + "esql-asin.txt": { + "hash": "78023c10c8f393c3285c6337a3466a0a710b138eef796f3d495e0266ac4ad605" + }, + "esql-atan.txt": { + "hash": "5bd219d9d134a7bc4a1f810ada2abe948ffbba7df924569a7c4bd8952604c21c" + }, + "esql-atan2.txt": { + "hash": "b7a0e830a9d3af6ff2a62dd65cf280924eafd4060dff09d08600afad3d1f52f6" + }, + "esql-cbrt.txt": { + "hash": "c2e390170b6227cdedce65964ca6bc74d3231141b069758363e93bf0ee79f88a" + }, + "esql-ceil.txt": { + "hash": "aa969ab47643f8dc4fa9e5255c02de4abee82422c2563f92031366c284303c97" + }, + "esql-copy_sign.txt": { + "hash": "c69ff40c64d2ad1fa4c8b1f8409a20c97c9cb93bf3b71ee0c10ab17161621851" + }, + "esql-cos.txt": { + "hash": "b51f7a405715516a807776582454d82e58d596e5d9dac34127341848d480f9e2" + }, + "esql-cosh.txt": { + "hash": "41217aeac98421e27a0c511bbbb29ac0db50a1c35d304994ff8d35647f78bd44" + }, + "esql-e.txt": { + "hash": "b0442599006fa48b9e013dcaf9c85ff9302f819d4c94a649b715284fa97c7a6f" + }, + "esql-exp.txt": { + "hash": "6a6d380f26c2e73f549e09bfd56cd10c6639d4bc873861ef6a23f84fd8e85e35" + }, + "esql-floor.txt": { + "hash": "388f02787e0ff093a65a1e2d402e8efc67accf617054aa828f6758c58f54c0af" + }, + "esql-hypot.txt": { + "hash": "b4eea62954f286d92b85eab4308284a4beb89e80da5d332590df477739f5fa72" + }, + "esql-log.txt": { + "hash": "bdddf74d99c64c3b2bf3864dba06898472f964ef6d9a0f73fc90e64ec10cad3d" + }, + "esql-log10.txt": { + "hash": "f84794f5f55c5f1b25e4fc727318c05e8c983b4ef6b57f383e80f148dab56816" + }, + "esql-pi.txt": { + "hash": "9aaf90798f2c03522c41cd1f9cfeb9e6eb09f7313589b1bf3324f62be73b3c70" + }, + "esql-pow.txt": { + "hash": "db06946a8749862c7b3aaf2f412a047be2babb38e397e1d2502a0f4fa1204095" + }, + "esql-round.txt": { + "hash": "ca747148965d71346d4cc76edb93ae265e38a765b6ba6e8c57c9986e7db179a6" + }, + "esql-round_to.txt": { + "hash": "cfb4551ccda75ce7f37e25c9a2f0031c20995445248a07b3a944192fc92ea774" + }, + "esql-scalb.txt": { + "hash": "a2f46db02236ded94ef714e373b30f25fb673ba14c88f6e25c1d53c029bd907e" + }, + "esql-signum.txt": { + "hash": "e608b3e26f11ce0b65c02a5f563aeb3293439553cd391e36d457ac6dbcc26f74" + }, + "esql-sin.txt": { + "hash": "4d64d4753b0a42e91c77a42473485e2ec652bc06e4e00d26d6d4f6fd56a2cf86" + }, + "esql-sinh.txt": { + "hash": "1fd0a327abb3033e4a3c00512462f2a3ca578a0e5d0fafc30b84b1471b9bb0f4" + }, + "esql-sqrt.txt": { + "hash": "d7b1aef782ecdaef6af4a49ba2bdc7153356afb2495c522184c605f879a5b116" + }, + "esql-tan.txt": { + "hash": "205959855c114d3116ee425a54467adb2d4d09d0c2e7446388279fe3d9ae08ca" + }, + "esql-tanh.txt": { + "hash": "fa7fec6c646b3d7dd79b07b58f0690d8ff389693eb89f24c363974058fff036b" + }, + "esql-tau.txt": { + "hash": "68e8ef79f3217475419c60ea1bdacefaab8dec8d86d92d34ac1f980b1954c0ff" + } + } + }, + "reference/query-languages/esql/functions-operators/mv-functions.md": { + "hash": "34a73d5889d14b5b1c2669a3c7eda4f55f017b516ea5d2061e64dd4b25eee00b", + "outputFiles": [ + "esql-mv_append.txt", + "esql-mv_avg.txt", + "esql-mv_concat.txt", + "esql-mv_contains.txt", + "esql-mv_count.txt", + "esql-mv_dedupe.txt", + "esql-mv_first.txt", + "esql-mv_intersection.txt", + "esql-mv_last.txt", + "esql-mv_max.txt", + "esql-mv_median.txt", + "esql-mv_median_absolute_deviation.txt", + "esql-mv_min.txt", + "esql-mv_percentile.txt", + "esql-mv_pseries_weighted_sum.txt", + "esql-mv_slice.txt", + "esql-mv_sort.txt", + "esql-mv_sum.txt", + "esql-mv_union.txt", + "esql-mv_zip.txt" + ], + "sections": { + "esql-mv_append.txt": { + "hash": "dd29554b45701be06a6929faab6cd66202abea68e271e66aabf1acfb39b2cea6" + }, + "esql-mv_avg.txt": { + "hash": "19e2c9fd820e3586f24b7b450d8ecc84479f5f562f9ea7c3071c1582e059bd80" + }, + "esql-mv_concat.txt": { + "hash": "eea3fa70c1069c95a4f924baa4b60d14ca2a669595052ffbd21f2b666e63285a" + }, + "esql-mv_contains.txt": { + "hash": "7352694bf7d14ade0549b83a9b41893b32552f53073010f7f3c420b7091ef74f" + }, + "esql-mv_count.txt": { + "hash": "b5e53a5c80707244aaeed6e5ce734ea8630bb5e389d63f4d94f4e7b57ef7e915" + }, + "esql-mv_dedupe.txt": { + "hash": "85e39f656f09f2cc6258591b629b20bebeb54d822f5cef791049ffa6428ce794" + }, + "esql-mv_first.txt": { + "hash": "7e69a5d8d9ca25c3ec37e65e8b31768b627f00bfbc9014a3405f9e10cfc186df" + }, + "esql-mv_intersection.txt": { + "hash": "13f935f039d45ae9fa22f2452a1c90435bafa554ccf2b849fc3e8c7936f857ac" + }, + "esql-mv_last.txt": { + "hash": "b49a1244f297fad384fdaa356da64f9f76ac952ca720d6e8086d16d3c95a7ab8" + }, + "esql-mv_max.txt": { + "hash": "96abde8b5f79836bbbbc2b2a4dc35c0983593c79215d087798a5ca8b1d157ab4" + }, + "esql-mv_median.txt": { + "hash": "f688e659d1efdfa87ba243287a897b95cb8d978c9c32a065bcf465b1f4b50357" + }, + "esql-mv_median_absolute_deviation.txt": { + "hash": "bfa94064134e7fc63796638c5fdf17efc12062898fb085aba32e7770523f218e" + }, + "esql-mv_min.txt": { + "hash": "ea1d32adb92ed4356a33ade6c5907af40aaa525b093ec4fb97fcdcf2661dab22" + }, + "esql-mv_percentile.txt": { + "hash": "62769285d8b0e0ab3d0adf6d1b96e4eef721f0496401e87740123c6637ba032c" + }, + "esql-mv_pseries_weighted_sum.txt": { + "hash": "f607f6a3b2ae78a7e862c5cf4b00232602b43a5854f24035ad8975f3bfd774a2" + }, + "esql-mv_slice.txt": { + "hash": "0766987d953d74d12df762160a9c8c17d1aa6ff4a398341e856c39d51794bdab" + }, + "esql-mv_sort.txt": { + "hash": "83d1cae3fc826570f24c2f0dafd61bcffb6da5dde254a2615d5541d1de25f45c" + }, + "esql-mv_sum.txt": { + "hash": "6818a9e14d74cc76686308ca33b17ac3d19ee57c40ebee9170bc635eda347b5a" + }, + "esql-mv_union.txt": { + "hash": "91fbde62b168ead1d1b30c6e11a7da838d2cdc6849b41cdaeab1cd9c252ee45d" + }, + "esql-mv_zip.txt": { + "hash": "2f4c4d989efa4409144db034987f24a890dfed489e907a202941cc227a813018" + } + } + }, + "reference/query-languages/esql/functions-operators/operators.md": { + "hash": "1a58fa1f1a2470f5bad0e702683bbf7b7700c98ae444d1cc0537c9e096544d2a", + "outputFiles": [ + "esql-binary operators.txt", + "esql-unary operators.txt", + "esql-logical operators.txt", + "esql-suffix operators.txt", + "esql-infix operators.txt" + ], + "sections": { + "esql-binary operators.txt": { + "hash": "c24fc594b509b9460b45cd7703b5c33d892b2f25aaa19802bc3227bdeab8deb6" + }, + "esql-unary operators.txt": { + "hash": "61fdbf0a7875d3b2ed832566e6d33b2cca6dddaf26745aa19d1fa67d77c61187" + }, + "esql-logical operators.txt": { + "hash": "81d675bfb6541de0f5671572dc625d930bd638257dcf3aa5ab960339152fbf68" + }, + "esql-suffix operators.txt": { + "hash": "4455744ec0b64c2fc1f3539fdc7946dccbf3a1ed74e1bcf0719183505b4b11da" + }, + "esql-infix operators.txt": { + "hash": "c945b4ce944f723066df76b2fe48af0609a9d25fe6649d527f0b634a4e27a582" + } + } + }, + "reference/query-languages/esql/functions-operators/search-functions.md": { + "hash": "f61f2407fcbc454040ff4195334acace3bb0538ea1870795eb6c623234a5b53a", + "outputFiles": [ + "esql-decay.txt", + "esql-kql.txt", + "esql-match.txt", + "esql-match_phrase.txt", + "esql-qstr.txt", + "esql-score.txt", + "esql-top_snippets.txt" + ], + "sections": { + "esql-decay.txt": { + "hash": "8b1c1e3448941dfaf2fe5a59a7ca66490651d65ab5439d23ff1780e9dacb7a20" + }, + "esql-kql.txt": { + "hash": "30d85b3386b27ab6939b6550a895dd9380eb6de7e36b8bf2425958d1e7d1efa0" + }, + "esql-match.txt": { + "hash": "6395f8106cb823f66c94700f5f435e99bb45a978a418c8362a50aef59b53d03d" + }, + "esql-match_phrase.txt": { + "hash": "4c6cb5d4b329c44c1b56272c72066fdca40de7ac3d427f073e6d47bf49b671c3" + }, + "esql-qstr.txt": { + "hash": "1cecc1c39c3d5f0f946ad85fd8fdd306560c5f73e9c86ba0284e0cecd83f9354" + }, + "esql-score.txt": { + "hash": "6f658ac301dacdb04c098e4549e65698b386945c3b5c34907b8527adfa3aefd9" + }, + "esql-top_snippets.txt": { + "hash": "c983b08f3bcee678017d495173594e5e1a81d93065122cb6c0fe212f72b58e46" + } + } + }, + "reference/query-languages/esql/functions-operators/spatial-functions.md": { + "hash": "b725cb9f744f4543150b1af31047dce8292e73da371bd2e5eaac8780d15781b7", + "outputFiles": [ + "esql-geospatial predicates.txt", + "esql-geometry functions.txt", + "esql-st_envelope.txt", + "esql-grid encoding functions.txt" + ], + "sections": { + "esql-geospatial predicates.txt": { + "hash": "3aabc8308f5b17de478c565ed3d345c1d170e548c340aa8c4fec85f277be8b06" + }, + "esql-geometry functions.txt": { + "hash": "76bc7c0f1c4b61144f62080634c4ed753f4540da908101804d571000d45e72ff" + }, + "esql-st_envelope.txt": { + "hash": "be74bdb54920647bb8a07c6aa89bab321192be9b86102086b6b05cf23d568ec2" + }, + "esql-grid encoding functions.txt": { + "hash": "8b9432b7de4cce29bd0c0a24dfe7219bae602fc16bfe75c7746737c21e883c01" + } + } + }, + "reference/query-languages/esql/functions-operators/string-functions.md": { + "hash": "7e7d40fea043909d3892536e68ae8e4d9bd7ae53f5195fa6998c6894afdd34cd", + "outputFiles": [ + "esql-bit_length.txt", + "esql-byte_length.txt", + "esql-chunk.txt", + "esql-concat.txt", + "esql-contains.txt", + "esql-ends_with.txt", + "esql-from_base64.txt", + "esql-hash.txt", + "esql-left.txt", + "esql-length.txt", + "esql-locate.txt", + "esql-ltrim.txt", + "esql-md5.txt", + "esql-repeat.txt", + "esql-replace.txt", + "esql-reverse.txt", + "esql-right.txt", + "esql-rtrim.txt", + "esql-sha1.txt", + "esql-sha256.txt", + "esql-space.txt", + "esql-split.txt", + "esql-starts_with.txt", + "esql-substring.txt", + "esql-to_base64.txt", + "esql-to_lower.txt", + "esql-to_upper.txt", + "esql-trim.txt", + "esql-url_encode.txt", + "esql-url_encode_component.txt", + "esql-url_decode.txt" + ], + "sections": { + "esql-bit_length.txt": { + "hash": "d05c8cdfb645d931fd6fbba73b771091acce13f8b180109f1f0f912180d36393" + }, + "esql-byte_length.txt": { + "hash": "354171999a1495331d76d6d38c78ca97a348dffd8ba0023b670b9b61aebe7590" + }, + "esql-chunk.txt": { + "hash": "368c44e8121b1fa42204b8af312a42e77bbe736652831b0bb1ba442398020907" + }, + "esql-concat.txt": { + "hash": "66695220a739589dd064ee961f5bd8452af560a60d66e965f349931f75a29347" + }, + "esql-contains.txt": { + "hash": "618e36d133bb49997b57b69824e3756e3d171756a680efff1bba377baeda51dc" + }, + "esql-ends_with.txt": { + "hash": "15cb73a91915a056785c13307cbbf0ae68883d5c6f9c38d94144d93d2306e1e9" + }, + "esql-from_base64.txt": { + "hash": "787550ce43b22c93c3b92da0f7e384d67a0be892b2d28343bd793660d74e83fa" + }, + "esql-hash.txt": { + "hash": "4e7792c0b205addac7822b91ebb92e8dce5fe967c1374796e51bc33f9cbbaf92" + }, + "esql-left.txt": { + "hash": "c4f35f53255fad4b470b986d1071c503a73137f5444eb1f2c39a786ea8e5888e" + }, + "esql-length.txt": { + "hash": "6ded6f633686aa0767459605a7f6dfcc00b6610d90c744f6c880ba5d8ed7f264" + }, + "esql-locate.txt": { + "hash": "380c141f7c06a1ba3f15f2f3e21d09aa5ed6a12a7a4fdbf0ce535999042a2735" + }, + "esql-ltrim.txt": { + "hash": "b266aa59de4d2202818d108bf2758d915e3bce2222fae76b34098640d6894fbe" + }, + "esql-md5.txt": { + "hash": "69d4edf2e9e6c65d02561909fe4b084918c9854855485cd787db3795e3e28be7" + }, + "esql-repeat.txt": { + "hash": "9ff8cfa3c002eaa91c8872caaee9817863781fdde6cb5cfa31375b5d25a7d50f" + }, + "esql-replace.txt": { + "hash": "6fef0972ab6853c4e05e2544959304bba9dc49699c4304efdd7f4864b323bb6f" + }, + "esql-reverse.txt": { + "hash": "3245fba02e333783055a481bc59ea66eff86a9d8619021ddd7cf4d3e61bf5efe" + }, + "esql-right.txt": { + "hash": "f3a6ce824c06794c1cf413e18d35964aa54a3dfd426091589dedd877ea43e559" + }, + "esql-rtrim.txt": { + "hash": "dd1aabbf4cde58cba51a7196c9dffbc21575fdccf2f507ebe913adf60622b233" + }, + "esql-sha1.txt": { + "hash": "d018242570a48e83e2363f6655d0ccd785d1b74e6749708504533ced3958f45b" + }, + "esql-sha256.txt": { + "hash": "a40cd5fe65d9ee8dc0cf098ebc10d3bcc7e8581c96d070dd1b730a79243a60f1" + }, + "esql-space.txt": { + "hash": "372cb139cca2d44927b1be7e69636a15a7cf3e9b9af1f8b965fc2de2d7137f87" + }, + "esql-split.txt": { + "hash": "bb2290f5e71dd726a8072db8060936098cc350f8778109aa99aafed7acff5549" + }, + "esql-starts_with.txt": { + "hash": "660f9183d852bbbf61a18dd992c4187db49ec0ed91cc4687da12ced85063c64c" + }, + "esql-substring.txt": { + "hash": "7a4e0f495ee0257173fc750c584fce8d4ab22c5fdb1bd1d72c62a4c020c16b40" + }, + "esql-to_base64.txt": { + "hash": "7d318053f817640eac6fc986f03cb7589f730e3c9355e40031af0deeea47582d" + }, + "esql-to_lower.txt": { + "hash": "d0aed8568331ab50bff1fe593b0d32c2b8f26c72783248095e5c6feff25b40da" + }, + "esql-to_upper.txt": { + "hash": "ae6b314f8bc0423117b70b173453979d0e4b3a2a86928ff7dbc82e4a24694aeb" + }, + "esql-trim.txt": { + "hash": "cad125d14428832c58165fd115e00224c42c055a72becc933bba7df0bf00cf7b" + }, + "esql-url_encode.txt": { + "hash": "93ea7e2946f61cbae7c2a151dab2e25a1865a5a121f5a8ced5156cf176be7f59" + }, + "esql-url_encode_component.txt": { + "hash": "d9c424c468d26c75193abcca82784e81651cecedf21518a0d135a9aaabacaabb" + }, + "esql-url_decode.txt": { + "hash": "9c87a0a2040d7162330e5ad9ad4cb4f2f7afa46cf45633da95563dc58a485929" + } + } + }, + "reference/query-languages/esql/functions-operators/time-series-aggregation-functions.md": { + "hash": "f877604569d7f15010a91aafec8f56a75fb6e690cf02f7731b1ce90d2f029efc", + "outputFiles": [ + "esql-absent_over_time.txt", + "esql-avg_over_time.txt", + "esql-count_over_time.txt", + "esql-count_distinct_over_time.txt", + "esql-delta.txt", + "esql-deriv.txt", + "esql-first_over_time.txt", + "esql-idelta.txt", + "esql-increase.txt", + "esql-irate.txt", + "esql-last_over_time.txt", + "esql-max_over_time.txt", + "esql-min_over_time.txt", + "esql-percentile_over_time.txt", + "esql-present_over_time.txt", + "esql-rate.txt", + "esql-stddev_over_time.txt", + "esql-variance_over_time.txt", + "esql-sum_over_time.txt" + ], + "sections": { + "esql-absent_over_time.txt": { + "hash": "ddcbf42c773e5810a25f1fe4a97b083d0254666c9f7b689e615f03f5ab318825" + }, + "esql-avg_over_time.txt": { + "hash": "45ac8bb30121d5ae6097b9a3c071ef29412e74509369ffc69ca249d0160996fe" + }, + "esql-count_over_time.txt": { + "hash": "595e20c72d00978070adbf38f85e0a16c31cb49dfdaf45bcfe5cec390373a116" + }, + "esql-count_distinct_over_time.txt": { + "hash": "68c4ed75111eafcb5ba64895b41b35273b91154385878865ab1baa457b179b3a" + }, + "esql-delta.txt": { + "hash": "22f9f7aed90629b63c4ebe31e7b778bd51abb7c746e0d2aa409a71dfba0957b5" + }, + "esql-deriv.txt": { + "hash": "7bf3061d65f8f229694fb8af9233e9165c991795e0c1e1ff724d61f56940d925" + }, + "esql-first_over_time.txt": { + "hash": "cb5f4af4e6daa317fadb3485fb7cac7e876fed6e57e7982a50e2828b680ecc2a" + }, + "esql-idelta.txt": { + "hash": "95be4189c5d7e41e30bad52409a22614ba344fc7004941ca45c42999be79d626" + }, + "esql-increase.txt": { + "hash": "6b6182bb75c305233d5ece6a50d71f82edc4d74866b00a0a0f7fe37b07e03c60" + }, + "esql-irate.txt": { + "hash": "d2a16bc1e222ad4eabcda9039c13dda34c6663add67348beba5b85462d3f63dc" + }, + "esql-last_over_time.txt": { + "hash": "b6d595fbdc5399e9eaa857932e51e78f3027a9e770ce8ae6c217a10cb874cf79" + }, + "esql-max_over_time.txt": { + "hash": "38e02c9749985e1f9bff1a6e770f10a3618227c7d99a790d1fbe61bee18a4acb" + }, + "esql-min_over_time.txt": { + "hash": "ad9a1bd21d86dccad413e8553fbf1cc8223a3a4789e70e8c138a556bbd6b2704" + }, + "esql-percentile_over_time.txt": { + "hash": "2f6b3773e512d3949aba4c59024927f8841250d2ea39105e42df3e3a3bdebd9b" + }, + "esql-present_over_time.txt": { + "hash": "eb60c9ac1c1c15038654bd9974e92cdfed020d17847810c2477b69b0c1ec3518" + }, + "esql-rate.txt": { + "hash": "89270bb637a66119c7e3a1383b178234d7d398cde8bec3697ceaed84376ce4f7" + }, + "esql-stddev_over_time.txt": { + "hash": "a391dac40829a19af8fcbd5e2d301affc4d005fd766b8719a5090909e1272ec0" + }, + "esql-variance_over_time.txt": { + "hash": "0814a870f172e3e9cd65b5a4c60b2913d5f0c125056612155b489f3e2894390a" + }, + "esql-sum_over_time.txt": { + "hash": "7db08238f5fd83f675fb522c755ab75051d35b8c6626be59887941906be9aff9" + } + } + }, + "reference/query-languages/esql/functions-operators/type-conversion-functions.md": { + "hash": "23092fa787e6d34a4baabecf9d235b71b45a2df3bc0bfbfafe049de0b03a9642", + "outputFiles": [ + "esql-to_aggregate_metric_double.txt", + "esql-to_boolean.txt", + "esql-to_cartesianpoint.txt", + "esql-to_cartesianshape.txt", + "esql-to_dateperiod.txt", + "esql-to_datetime.txt", + "esql-to_date_nanos.txt", + "esql-to_degrees.txt", + "esql-to_dense_vector.txt", + "esql-to_double.txt", + "esql-to_geohash.txt", + "esql-to_geohex.txt", + "esql-to_geopoint.txt", + "esql-to_geoshape.txt", + "esql-to_geotile.txt", + "esql-to_integer.txt", + "esql-to_ip.txt", + "esql-to_long.txt", + "esql-to_radians.txt", + "esql-to_string.txt", + "esql-to_timeduration.txt", + "esql-to_unsigned_long.txt", + "esql-to_version.txt" + ], + "sections": { + "esql-to_aggregate_metric_double.txt": { + "hash": "bc2108210a230cfbbabf39e101af35fda876d35ea1f792d8171f5e2f86e506c5" + }, + "esql-to_boolean.txt": { + "hash": "abe1c6f672c674a9ca175efaf5dc2f7d65cb8d10a759d40b05dd3c551a052762" + }, + "esql-to_cartesianpoint.txt": { + "hash": "30007a03ac5c5b70c8ea5fff2810f065d523c8f7ee8c3705bb2df205ec1d5a07" + }, + "esql-to_cartesianshape.txt": { + "hash": "6113d9b683ecf606ea3dbbbe032bbd9bb3f1ed2b18613cd3c585cf03d38ba342" + }, + "esql-to_dateperiod.txt": { + "hash": "55a97bd8b0f5aa8a618a842c2cb06b5e215c7dffdad3f8b4e3a3258d9c1274dd" + }, + "esql-to_datetime.txt": { + "hash": "c402efac7a69f0d5a72207a4de47e033531a5df59301a0345351298324f1bbe1" + }, + "esql-to_date_nanos.txt": { + "hash": "af76bc25ca0366c1c4ffaea492cf7d2f74660f173ed3796f5090ddfdd69987c3" + }, + "esql-to_degrees.txt": { + "hash": "ad080ebe66593d8dc71ec6a251ed697cca9cdc59b29ed40209d3e2306bb94c1c" + }, + "esql-to_dense_vector.txt": { + "hash": "59c0b4e451860efc1e71f63a9fa21c1b188efbc64bb3d575f18f508d2bfb0d94" + }, + "esql-to_double.txt": { + "hash": "7acfe5cc9be3d366f544d4b21bbd9f559057e4fe9bb572191e174fa127279703" + }, + "esql-to_geohash.txt": { + "hash": "dd7c9b2987f3688858dcc5d58681a23ffac8d1f32a432d495e4974e16c531376" + }, + "esql-to_geohex.txt": { + "hash": "161384d606510f634c4b778ab9e258ffaef8d43d9f1a563a739891fb5427496e" + }, + "esql-to_geopoint.txt": { + "hash": "2361020141b6b97deae5af8c90a5ecd46a1f9f02ec719f7d9cc620d30a8444d2" + }, + "esql-to_geoshape.txt": { + "hash": "cabab13af7fcc63c4403e9ac0d581e280b0c5ad5f0be11f6119838d72f2f2804" + }, + "esql-to_geotile.txt": { + "hash": "c351115e2ce92eb2058b2cacd29337942a1a554f05a9397dc88405b2986fe7c8" + }, + "esql-to_integer.txt": { + "hash": "2ed923ccabacf64e7f8f327aa02c4242af204c8f61de65c4fdbfa0e070522658" + }, + "esql-to_ip.txt": { + "hash": "7117d9c4e3bad583b505be5eb31126a4628190e7ae356eef3099bc48037e3cde" + }, + "esql-to_long.txt": { + "hash": "cf6634033c037872454e530b08ce85775e86c1201aa68053c4a38e174ddd8b24" + }, + "esql-to_radians.txt": { + "hash": "38ab34e50240dbe0e7033db7d8d32d3e78f50e0736a8356200737272ef50d07e" + }, + "esql-to_string.txt": { + "hash": "c2b08bad3244b535370dab6eba16289dd88d286e494dc29d68d0f49a0609ce19" + }, + "esql-to_timeduration.txt": { + "hash": "4e06974ec76bf88e5b55b627579c4d70ad49d7aa4344062fc6af222e28f4ece8" + }, + "esql-to_unsigned_long.txt": { + "hash": "5d2259f75985e5daf9c9fdac5c879cfe3fdd9d9d748ee9591859c860fa881e56" + }, + "esql-to_version.txt": { + "hash": "1837356a110115f060a2899800758305383b3d5765ddeed6194699d4c39b88b9" + } + } + } +} \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/README.md b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/README.md index d44c19d5b0262..2a685d2e8d22e 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/README.md +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/README.md @@ -5,18 +5,50 @@ The generated documentation is validated and will emit warnings when invalid que ## Requirements -- checked out `built-docs` repo in the same folder as the `kibana` repository - a running Kibana instance - an installed Generative AI connector +### Run +yarn es snapshot --license trial +node scripts/kibana --dev --no-base-path + +## Incremental Updates + +The script uses a hash-based caching mechanism to optimize performance. This means: +- Unchanged source files are skipped completely +- Only changed sections within a file are reprocessed +- The cache significantly reduces processing time when only a few files have changed + +### Force Update All Files + +To force the script to regenerate all files regardless of hash matches, use the `--force` flag: + +``` +node x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js --force +``` + +This is useful for: +- Testing the full generation pipeline +- Regenerating all files after changes to the processing logic +- Ensuring all files are up-to-date after cache corruption + ### Run script to generate ES|QL docs and verify syntax +To deterministically get the ES|QL docs from the Elastic's documentation markdown files, without modification from LLMs, you can run: ``` node x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js ``` -The script will also generate a report of syntax errors found during the generation process, located at -`x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/__tmp__/syntax-errors.json`. This file will not be checked into git. +To connect to a connector/LLM to read the built docs and then enrich the extracted docs, you must first have an installed Generative AI connector. Then, pass in the connectorId. Enrichment involves explaining in natural language what the ES|QL examples are doing. + +``` +node x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js --connectorId example-connector-id +``` + +You can also combine flags: +``` +node x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js --connectorId example-connector-id --force +``` ### Checking syntax errors for generated files diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/enrich_documentation.ts b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/enrich_documentation.ts new file mode 100644 index 0000000000000..10352839dec9d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/enrich_documentation.ts @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ScriptInferenceClient } from '../util/kibana_client'; +import { enrichDocumentationPrompt } from './prompts'; +import { bindOutput } from './utils/output_executor'; + +/** + * Enriches documentation by adding natural language descriptions for each ES|QL query example. + * Uses the connectorId from the inferenceClient to connect and enrich the extracted content. + * + * @param content - The markdown content to enrich + * @param inferenceClient - The inference client with connectorId and output API + * @returns The enriched content with natural language descriptions for ES|QL queries + */ +export async function enrichDocumentation({ + content, + inferenceClient, +}: { + content: string; + inferenceClient: ScriptInferenceClient; +}): Promise { + const callOutput = bindOutput({ + connectorId: inferenceClient.getConnectorId(), + output: inferenceClient.output, + }); + + const enrichedContent = await callOutput( + enrichDocumentationPrompt({ + content, + }) + ); + + return enrichedContent; +} diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_doc.ts b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_doc.ts index 572efc1b5456d..74ce03bd78a76 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_doc.ts +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_doc.ts @@ -33,11 +33,21 @@ export const generateDoc = async ({ }) => { const filesToWrite: FileToWrite[] = []; - const limiter = pLimit(10); + // Reduce concurrency to avoid hitting rate limits (429 errors) + // Lower concurrency = fewer simultaneous requests = less chance of rate limits + const limiter = pLimit(3); + // Configure retry logic to handle 429 (Too Many Requests) errors + // 429 errors are retryable and will be handled with exponential backoff const callOutput = bindOutput({ connectorId: inferenceClient.getConnectorId(), output: inferenceClient.output, + maxRetries: 5, // Retry up to 5 times for rate limit errors + retryConfiguration: { + retryOn: 'auto', // Will retry 429 errors (not in STATUS_NO_RETRY list) + initialDelay: 2000, // Start with 2 second delay + backoffMultiplier: 2, // Double the delay on each retry + }, }); const documentation = documentationForFunctionRewrite(extraction); diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_esql_docs.ts b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_esql_docs.ts new file mode 100644 index 0000000000000..27483f7e5bf4c --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/generate_esql_docs.ts @@ -0,0 +1,1091 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { run } from '@kbn/dev-cli-runner'; +import Fs from 'fs/promises'; +import Path from 'path'; +import { createWriteStream } from 'fs'; +import { mkdtemp } from 'fs/promises'; +import { pipeline } from 'stream/promises'; +import { execSync } from 'child_process'; +import type { Argv } from 'yargs'; +import yargs from 'yargs'; +import fetch from 'node-fetch'; +import pLimit from 'p-limit'; +import { createHash } from 'crypto'; +import { connectorIdOption, elasticsearchOption, kibanaOption } from '../util/cli_options'; +import { getServiceUrls } from '../util/get_service_urls'; +import { KibanaClient } from '../util/kibana_client'; +import { selectConnector } from '../util/select_connector'; +import { rewriteFunctionPagePrompt } from './prompts'; +import { bindOutput } from './utils/output_executor'; +import { enrichDocumentation } from './enrich_documentation'; + +async function downloadFile(url: string, filePath: string): Promise { + const dirPath = Path.dirname(filePath); + await Fs.mkdir(dirPath, { recursive: true }); + const writeStream = createWriteStream(filePath); + + const res = await fetch(url); + if (!res.ok) { + throw new Error(`Failed to download file: ${res.status} ${res.statusText}`); + } + + if (!res.body) { + throw new Error('Response body is null'); + } + + await pipeline(res.body, writeStream); +} + +function extractYamlCodeBlocks(content: string): string { + // Match ```yaml ... ``` code blocks and everything after + const yamlBlockRegex = /```yaml\n([\s\S]*?)```([\s\S]*)/; + const match = content.match(yamlBlockRegex); + + if (!match) { + return ''; + } + + const yamlContent = match[1]?.trim() || ''; + const contentAfterYaml = match[2]?.trim() || ''; + + // Combine YAML content and everything after the YAML block + let combined = ''; + if (yamlContent && contentAfterYaml) { + combined = `${yamlContent}\n\n${contentAfterYaml}`; + } else if (yamlContent) { + combined = yamlContent; + } else if (contentAfterYaml) { + combined = contentAfterYaml; + } + + // Remove the first 3 lines, which mark GA or Preview + if (combined) { + const lines = combined.split('\n'); + if (lines.length > 3) { + return lines.slice(3).join('\n'); + } else { + return ''; + } + } + + return ''; +} + +function getCommandName(fileName: string): string { + // Extract command name from filename (e.g., "match.md" -> "match") + const baseName = Path.basename(fileName, '.md'); + return baseName; +} + +interface FileCache { + [sourceFilePath: string]: { + hash: string; // Hash of entire source file (for quick check) + sections?: { + [outputFileName: string]: { + hash: string; // Hash of the raw section content before processing + }; + }; + outputFiles?: string[]; // Array of file names for mapping, no content needed + }; +} + +/** + * Normalize a file path to use only the path after /extracted for cache keys. + * This ensures cache keys are stable across runs with different temporary directories. + */ +function normalizeCacheKey(filePath: string, extractDir: string): string { + // Get the relative path from extractDir + const relativePath = Path.relative(extractDir, filePath); + // Normalize to use forward slashes (consistent across platforms) + return relativePath.split(Path.sep).join('/'); +} + +async function loadCache(cachePath: string): Promise { + try { + const cacheContent = await Fs.readFile(cachePath, 'utf-8'); + return JSON.parse(cacheContent); + } catch (error) { + // Cache file doesn't exist or is invalid, return empty cache + return {}; + } +} + +async function saveCache(cachePath: string, cache: FileCache): Promise { + await Fs.writeFile(cachePath, JSON.stringify(cache, null, 2), 'utf-8'); +} + +function hashContent(content: string): string { + return createHash('sha256').update(content).digest('hex'); +} +/** + * Rewrite the Syntax section to replace ![Embedded](...) with functionName(param1, param2, ...) + * Extract parameter names from the Parameters section + * @param content + * @param functionName + * @returns + */ +function rewriteSyntaxSection(content: string, functionName: string): string { + const parameterRegex = /####\s+`([^`]+)`/g; + const parameters: string[] = []; + let paramMatch: RegExpExecArray | null; + + while ((paramMatch = parameterRegex.exec(content)) !== null) { + parameters.push(paramMatch[1]); + } + + // Build function signature + const functionSignature = + parameters.length > 0 ? `${functionName}(${parameters.join(', ')})` : `${functionName}()`; + + // Replace the Syntax section + // Pattern: **Syntax**\n![Embedded](...)\n + // Replace with: **Syntax**\n`functionName(param1, param2, ...)` + const syntaxRegex = /\*\*Syntax\*\*\s*\n\s*!\[Embedded\]\([^\)]+\)\s*\n/g; + + return content.replace(syntaxRegex, (match) => { + return `**Syntax**\n\`${functionSignature}\`\n\n`; + }); +} + +function stripMarkdownTables(content: string): string { + // Strip markdown tables: rows with pipes and separator rows with dashes + // Pattern: | col1 | col2 |\n|------|------|\n| val1 | val2 | + const lines = content.split('\n'); + const result: string[] = []; + let inTable = false; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const trimmedLine = line.trim(); + + const isTableRow = trimmedLine.startsWith('|') && trimmedLine.endsWith('|'); + + // Check if this is a separator row (contains | and dashes like |---|---|) + const isSeparatorRow = isTableRow && /^[\|\s\-:]+$/.test(trimmedLine); + + if (isTableRow || isSeparatorRow) { + inTable = true; + continue; + } else { + if (inTable) { + inTable = false; + } + // Add the line (it's not part of a table) + result.push(line); + } + } + + return result.join('\n'); +} + +function stripEmbeddedImages(content: string): string { + // Strip all ![Embedded](...) markdown image syntax + // Pattern: ![Embedded](url) or ![Embedded](url "title") + // Handle URLs with parentheses by matching until the closing ) + return content.replace(/!\[Embedded\]\([^\)]*(?:\([^\)]*\))*[^\)]*\)/g, ''); +} + +function stripMarkdownLinks(content: string): string { + // Strip markdown links: [text](url) -> text + // Pattern: [link text](url) or [link text](url "title") + // Handle URLs with parentheses by processing from right to left + let result = content; + // Find all link patterns and replace them + // Match [text] followed by (url) where we need to find the correct closing ) + // Strategy: find [text]( and then match until the last ) before space/punctuation/end + const linkRegex = /\[([^\]]+)\]\(/g; + let match; + const replacements: Array<{ start: number; end: number; text: string }> = []; + + // Find all link starts + while ((match = linkRegex.exec(result)) !== null) { + const linkStart = match.index; + const text = match[1]; + const urlStart = match.index + match[0].length; + + // Find the matching closing parenthesis + // Look for ) that's followed by space, punctuation, or end of string + let parenCount = 1; // We already have the opening ( + let pos = urlStart; + let urlEnd = -1; + + while (pos < result.length && parenCount > 0) { + if (result[pos] === '(') parenCount++; + else if (result[pos] === ')') { + parenCount--; + if (parenCount === 0) { + // Check if this ) is followed by space, punctuation, or end + const nextChar = pos + 1 < result.length ? result[pos + 1] : ''; + if (nextChar === '' || /[\s.,;:!?)\]}]/.test(nextChar)) { + urlEnd = pos; + break; + } else { + // This ) is part of the URL, continue + parenCount++; + } + } + } + pos++; + } + + if (urlEnd > 0) { + replacements.push({ + start: linkStart, + end: urlEnd + 1, + text, + }); + } + } + + // Apply replacements from right to left to maintain indices + replacements.reverse(); + for (const repl of replacements) { + result = result.substring(0, repl.start) + repl.text + result.substring(repl.end); + } + + // Fallback: handle any remaining simple links + result = result.replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1'); + + return result; +} + +function reorganizeContent(content: string, functionName: string): string { + // Extract description section + const descriptionRegex = /\*\*Description\*\*\s*\n([\s\S]*?)(?=\*\*|\n\n\n|$)/; + const descriptionMatch = content.match(descriptionRegex); + + let descriptionText = ''; + if (descriptionMatch && descriptionMatch[1]) { + descriptionText = descriptionMatch[1].trim(); + } + + // Remove the original Description section + let reorganizedContent = content.replace(/\*\*Description\*\*\s*\n[\s\S]*?(?=\*\*|\n\n\n|$)/, ''); + + // If we found a description, prepend it to the top with function name as heading + if (descriptionText) { + // Strip embedded images and markdown links from description + descriptionText = stripEmbeddedImages(descriptionText); + descriptionText = stripMarkdownLinks(descriptionText); + const descriptionWithHeading = `# ${functionName}\n\n${descriptionText}`; + reorganizedContent = `${descriptionWithHeading}\n\n${reorganizedContent}`; + } else { + // Even if no description, add the heading + reorganizedContent = `# ${functionName}\n\n${reorganizedContent}`; + } + + reorganizedContent = stripMarkdownTables( + stripMarkdownLinks(stripEmbeddedImages(reorganizedContent)) + ); + + // Clean up multiple consecutive newlines + reorganizedContent = reorganizedContent.replace(/\n{3,}/g, '\n\n'); + + return reorganizedContent; +} + +function convertDefinitionsToMarkdown(content: string): string { + // Convert XML to markdown format optimized for LLM understanding + // Pattern: content + // Should become: ### Parameters\n#### `paramName`\ncontent + // Also remove the **Parameters** line if it exists before the definitions + + // First, remove **Parameters** lines that appear before definitions blocks + content = content.replace(/\*\*Parameters\*\*\s*\n\s*/g, ''); + + const definitionsRegex = /([\s\S]*?)<\/definitions>/g; + + return content.replace(definitionsRegex, (match, definitionsContent) => { + // Extract individual definition elements + const definitionRegex = /([\s\S]*?)<\/definition>/g; + const definitions: Array<{ term: string; content: string }> = []; + + let defMatch: RegExpExecArray | null; + while ((defMatch = definitionRegex.exec(definitionsContent)) !== null) { + const term = defMatch[1]; + const rawContent = defMatch[2].trim(); + + // Strip embedded images, markdown links, and markdown tables + let defContent = stripEmbeddedImages(rawContent); + defContent = stripMarkdownLinks(defContent); + defContent = stripMarkdownTables(defContent); + + // Normalize whitespace - replace multiple spaces/newlines with single newline + defContent = defContent.replace(/\n\s*\n\s*\n+/g, '\n\n'); + defContent = defContent.replace(/[ \t]+/g, ' '); + + definitions.push({ term, content: defContent }); + } + + if (definitions.length === 0) { + return match; // Return original if no definitions found + } + + // Build markdown format optimized for LLM parsing + // Clear structure: heading, parameter name, description + const markdown = ['### Parameters', '']; + for (const def of definitions) { + // Parameter name as clear heading + markdown.push(`#### \`${def.term}\``); + markdown.push(''); + // Clean description text + markdown.push(def.content.trim()); + markdown.push(''); + } + + return markdown.join('\n'); + }); +} + +/** + * Extract raw function sections from markdown (before processing) + * Returns sections with raw content for hashing + */ +function extractRawFunctionSections(content: string): Array<{ name: string; rawContent: string }> { + // Extract YAML block if present and get content after it + const yamlBlockRegex = /```yaml\n([\s\S]*?)```([\s\S]*)/; + const yamlMatch = content.match(yamlBlockRegex); + + let contentToProcess = content; + if (yamlMatch) { + // Use content after YAML block + contentToProcess = yamlMatch[2] || content; + } + + // Remove YAML frontmatter if present + contentToProcess = contentToProcess.replace(/^---[\s\S]*?---\n/, ''); + + // Split by ## headings (function sections) + // Match ## `FUNCTION_NAME` or ## FUNCTION_NAME + const functionSectionRegex = /^##\s+(?:`)?([^`\n]+)(?:`)?$/gm; + const sections: Array<{ name: string; rawContent: string }> = []; + + let match: RegExpExecArray | null; + const functionMatches: Array<{ name: string; startIndex: number }> = []; + + // Find all function section headers + while ((match = functionSectionRegex.exec(contentToProcess)) !== null) { + const functionName = match[1].trim(); + const startIndex = match.index; + functionMatches.push({ name: functionName, startIndex }); + } + + // Extract raw content for each function section (before processing) + for (let i = 0; i < functionMatches.length; i++) { + const currentMatch = functionMatches[i]; + const nextMatch = functionMatches[i + 1]; + + const sectionStart = currentMatch.startIndex; + const sectionEnd = nextMatch ? nextMatch.startIndex : contentToProcess.length; + + let rawSectionContent = contentToProcess.substring(sectionStart, sectionEnd).trim(); + + // Remove the ## heading line and keep the rest + const lines = rawSectionContent.split('\n'); + if (lines.length > 0 && lines[0].startsWith('##')) { + rawSectionContent = lines.slice(1).join('\n').trim(); + } + + if (rawSectionContent) { + sections.push({ + name: currentMatch.name.toLowerCase(), + rawContent: rawSectionContent, + }); + } + } + + return sections; +} + +/** + * Process a raw section into final content + */ +function processSection(rawContent: string, functionName: string): string { + let sectionContent = rawContent; + + // Convert definitions to markdown format + sectionContent = convertDefinitionsToMarkdown(sectionContent); + + // Rewrite syntax section with function signature + const functionNameUpper = functionName.toUpperCase().replace(/[`'"]/g, ''); + sectionContent = rewriteSyntaxSection(sectionContent, functionNameUpper); + + // Reorganize content: move description to top with function name + sectionContent = reorganizeContent(sectionContent, functionNameUpper); + + return sectionContent; +} + +/** + * Extract brief description from raw section content + */ +function extractBriefDescription(rawContent: string): string { + // Extract description section + const descriptionRegex = /\*\*Description\*\*\s*\n([\s\S]*?)(?=\*\*|\n\n\n|$)/; + const descriptionMatch = rawContent.match(descriptionRegex); + + if (descriptionMatch && descriptionMatch[1]) { + let description = descriptionMatch[1].trim(); + // Take first sentence or first line, whichever is shorter + const firstSentence = description.split(/[.!?]\s+/)[0]; + const firstLine = description.split('\n')[0]; + description = firstSentence.length < firstLine.length ? firstSentence : firstLine; + // Clean up: remove markdown formatting, limit length + description = description + .replace(/\*\*/g, '') + .replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1') + .replace(/`([^`]+)`/g, '$1') + .trim(); + // Limit to reasonable length (first 200 chars) + if (description.length > 200) { + description = description.substring(0, 197) + '...'; + } + return description; + } + return ''; +} + +/** + * Map markdown file name to syntax.txt section name + */ +function getSyntaxSectionName(mdFileName: string): string { + // Remove .md extension and convert to section format + const baseName = mdFileName.replace(/\.md$/, ''); + // Map specific file names to section names + const sectionMap: { [key: string]: string } = { + 'aggregation-functions': 'aggregation-functions', + 'time-series-aggregation-functions': 'time-series-aggregation-functions', + 'conditional-functions-and-expressions': 'conditional-functions', + 'date-time-functions': 'date-time-functions', + 'dense-vector-functions': 'dense-vector-functions', + 'grouping-functions': 'grouping-functions', + 'ip-functions': 'ip-functions', + 'math-functions': 'math-functions', + 'mv-functions': 'mv-functions', + operators: 'operators', + 'search-functions': 'search-functions', + 'spatial-functions': 'spatial-functions', + 'string-functions': 'string-functions', + 'type-conversion-functions': 'type-conversion-functions', + }; + return sectionMap[baseName] || baseName; +} + +/** + * Update syntax.txt file with function descriptions + */ +async function updateSyntaxFile( + syntaxFilePath: string, + mdFileName: string, + functionDescriptions: Array<{ name: string; description: string }>, + log: any +): Promise { + try { + // Read current syntax.txt + let syntaxContent = await Fs.readFile(syntaxFilePath, 'utf-8'); + const sectionName = getSyntaxSectionName(mdFileName); + const sectionTag = `<${sectionName}>`; + const closingTag = ``; + + // Find the section + const sectionStart = syntaxContent.indexOf(sectionTag); + + if (sectionStart === -1) { + log.warning( + `Section ${sectionTag} not found in syntax.txt, skipping update for ${mdFileName}` + ); + return; + } + + // Find the LAST occurrence of the closing tag (in case there are duplicates) + const sectionEnd = syntaxContent.lastIndexOf(closingTag); + if (sectionEnd === -1) { + log.warning( + `Closing tag ${closingTag} not found in syntax.txt, skipping update for ${mdFileName}` + ); + return; + } + + // Extract content before and after the section + const beforeSection = syntaxContent.substring(0, sectionStart + sectionTag.length); + // Find the position after the LAST closing tag + const afterSectionStart = sectionEnd + closingTag.length; + let afterSection = syntaxContent.substring(afterSectionStart); + + // Strip any leading closing tags from afterSection (in case of duplicates) + while (afterSection.trim().startsWith(closingTag)) { + afterSection = afterSection.substring(afterSection.indexOf(closingTag) + closingTag.length); + } + + // Build new section content + const sectionEntries = functionDescriptions + .filter((f) => f.description) // Only include functions with descriptions + .map((f) => { + const functionName = f.name.toUpperCase(); + return ` ${functionName}: ${f.description}`; + }) + .join('\n'); + + // Reconstruct syntax.txt with updated section + syntaxContent = `${beforeSection}\n${sectionEntries}\n ${closingTag}${afterSection}`; + + // Write updated content + await Fs.writeFile(syntaxFilePath, syntaxContent, 'utf-8'); + } catch (error) { + log.warning( + `Failed to update syntax.txt for ${mdFileName}: ${ + error instanceof Error ? error.message : String(error) + }` + ); + } +} + +interface FileToWrite { + name: string; + content: string; +} + +async function generateDoc({ + docFiles, + inferenceClient, + log, +}: { + docFiles: Array<{ name: string; content: string }>; + inferenceClient: ReturnType; + log: any; +}): Promise { + const filesToWrite: FileToWrite[] = []; + const limiter = pLimit(10); + + const callOutput = bindOutput({ + connectorId: inferenceClient.getConnectorId(), + output: inferenceClient.output, + }); + + // Create a minimal documentation object for context + // This helps the LLM understand the context when rewriting + const documentation = JSON.stringify( + { + note: 'This documentation is being generated from extracted ES|QL command and function documentation.', + }, + undefined, + 2 + ); + + await Promise.all( + docFiles.map(async (docFile) => { + return limiter(async () => { + // Determine if it's a command or function based on the content + // Commands typically start with # and have specific patterns + const isCommand = + docFile.content.includes('**Syntax**') && !docFile.content.match(/^# [A-Z_]+$/m); + + const rewrittenContent = await callOutput( + rewriteFunctionPagePrompt({ + content: docFile.content, + documentation, + command: isCommand, + }) + ); + filesToWrite.push({ + name: docFile.name, + content: rewrittenContent, + }); + }); + }) + ); + + return filesToWrite; +} + +yargs(process.argv.slice(2)) + .command( + '*', + 'Extract ES|QL documentation from zip file', + (y: Argv) => + y + .option('logLevel', { + describe: 'Log level', + string: true, + default: process.env.LOG_LEVEL || 'info', + choices: ['info', 'debug', 'silent', 'verbose'], + }) + .option('dryRun', { + describe: 'Do not write or delete any files', + boolean: true, + default: false, + }) + .option('force', { + describe: 'Force update all files even if content hash matches', + boolean: true, + default: false, + }) + .option('kibana', kibanaOption) + .option('elasticsearch', elasticsearchOption) + .option('connectorId', connectorIdOption), + (argv) => { + run( + async ({ log }) => { + // Set up inference client if connectorId is provided + let inferenceClient: ReturnType | undefined; + + if (argv.connectorId) { + const serviceUrls = await getServiceUrls({ + log, + elasticsearch: argv.elasticsearch, + kibana: argv.kibana, + }); + + const kibanaClient = new KibanaClient(log, serviceUrls.kibanaUrl); + + const connectors = await kibanaClient.getConnectors(); + if (!connectors.length) { + throw new Error('No connectors found'); + } + const connector = await selectConnector({ + connectors, + preferredId: argv.connectorId, + log, + }); + log.info(`Using connector ${connector.connectorId}`); + + inferenceClient = kibanaClient.createInferenceClient({ + connectorId: connector.connectorId, + }); + + try { + const callOutput = bindOutput({ + connectorId: inferenceClient.getConnectorId(), + output: inferenceClient.output, + }); + const resp = await callOutput({ + input: 'Test', + system: + 'You are a helpful assistant. Respond with "OK" to confirm you are working.', + }); + log.success(`✅ Connected to connector ${connector.connectorId} ${resp}`); + } catch (error) { + log.error(`❌ Unable to connect to connector ${connector.connectorId}: ${error}`); + throw error; + } + } + + const zipUrl = 'http://elastic.co/docs/llm.zip'; + const tempDir = Path.join(__dirname, '__tmp__'); + const zipPath = Path.join(tempDir, 'llm.zip'); + const extractTempDir = await mkdtemp(Path.join(Path.sep, 'tmp', 'esql-docs-')); + const extractDir = Path.join(extractTempDir, 'extracted'); + const commandsDir = Path.join( + extractDir, + 'reference', + 'query-languages', + 'esql', + 'commands' + ); + const functionsOperatorsDir = Path.join( + extractDir, + 'reference', + 'query-languages', + 'esql', + 'functions-operators' + ); + const outDir = Path.join(__dirname, '../../server/tasks/nl_to_esql/esql_docs'); + const syntaxFilePath = Path.join( + __dirname, + '../../server/tasks/nl_to_esql/prompts/syntax.txt' + ); + + try { + // Check if zip file already exists + const zipExists = await Fs.access(zipPath) + .then(() => true) + .catch(() => false); + + if (zipExists) { + log.info(`Zip file already exists at ${zipPath}, skipping download`); + } else { + log.info(`Downloading zip file from ${zipUrl}...`); + await Fs.mkdir(tempDir, { recursive: true }); + await downloadFile(zipUrl, zipPath); + log.info(`Downloaded to ${zipPath}`); + } + + log.info(`Extracting zip file to ${extractDir}...`); + try { + // Use native unzip command which is more robust + execSync(`unzip -q -o "${zipPath}" -d "${extractDir}"`, { + stdio: 'inherit', + }); + log.info(`Extracted to ${extractDir}`); + } catch (error) { + // Try with extract function as fallback + log.warning( + `Native unzip failed, trying alternative extraction method: ${ + error instanceof Error ? error.message : String(error) + }` + ); + const { extract } = await import('@kbn/dev-utils'); + try { + await extract({ + archivePath: zipPath, + targetDir: extractDir, + }); + } catch (extractError) { + log.warning( + `Extraction encountered errors: ${ + extractError instanceof Error ? extractError.message : String(extractError) + }` + ); + } + } + + const commandsPathExists = await Fs.access(commandsDir) + .then(() => true) + .catch(() => false); + + if (!commandsPathExists) { + throw new Error( + `Commands directory not found at ${commandsDir}. Please verify the zip file structure.` + ); + } + + const files = await Fs.readdir(commandsDir); + const mdFiles = files.filter((file) => file.endsWith('.md')); + + if (mdFiles.length === 0) { + throw new Error(`No .md files found in ${commandsDir}`); + } + + log.info(`Found ${mdFiles.length} markdown files in commands directory`); + + // Initialize cache + const cachePath = Path.join(__dirname, '.file-cache.json'); + const cache = await loadCache(cachePath); + let cacheUpdated = false; + + const docFiles: Array<{ name: string; content: string }> = []; + // Map output file names to source file paths for cache lookup + const outputToSourceMap = new Map(); + + // Process commands + for (const mdFile of mdFiles) { + const filePath = Path.join(commandsDir, mdFile); + const content = await Fs.readFile(filePath, 'utf-8'); + const contentHash = hashContent(content); + const cacheKey = normalizeCacheKey(filePath, extractDir); + + // Check if file is cached and unchanged (unless force flag is set) + const cached = cache[cacheKey]; + if (!argv.force && cached && cached.hash === contentHash) { + // File hash matches, verify output files exist on disk + if (cached.outputFiles && cached.outputFiles.length > 0) { + const allOutputFilesExist = await Promise.all( + cached.outputFiles.map(async (outputFileName) => { + const outputFilePath = Path.join(outDir, outputFileName); + try { + await Fs.access(outputFilePath); + return true; + } catch { + return false; + } + }) + ).then((results) => results.every((exists) => exists)); + + if (allOutputFilesExist) { + // All output files exist, skip processing + for (const outputFileName of cached.outputFiles) { + outputToSourceMap.set(outputFileName, cacheKey); + } + log.debug( + `Skipping extraction for ${mdFile} (hash: ${contentHash.substring( + 0, + 8 + )}...) - all output files exist` + ); + continue; + } else { + // Some output files are missing, process the file + } + } else { + // No output files in cache, skip + continue; + } + } + + // File changed or not in cache, process it + log.info(`Processing ${mdFile} (hash: ${contentHash.substring(0, 8)}...)`); + let yamlContent = extractYamlCodeBlocks(content); + + if (yamlContent) { + // Convert definitions to markdown format + yamlContent = convertDefinitionsToMarkdown(yamlContent); + + // Rewrite syntax section with command signature + const commandName = getCommandName(mdFile); + const commandNameUpper = commandName.toUpperCase(); + yamlContent = rewriteSyntaxSection(yamlContent, commandNameUpper); + + // Reorganize content: move description to top with command name + yamlContent = reorganizeContent(yamlContent, commandNameUpper); + + const outputFileName = `esql-${commandName}.txt`; + const outputFile = { + name: outputFileName, + content: yamlContent, + }; + docFiles.push(outputFile); + outputToSourceMap.set(outputFileName, cacheKey); + + // Update cache - only store file names, not content + cache[cacheKey] = { + hash: contentHash, + outputFiles: [outputFileName], + }; + cacheUpdated = true; + } else { + log.warning(`No YAML code blocks found in ${mdFile}, skipping`); + } + } + if (docFiles.length > 0) { + log.info(`✅ Found ${docFiles.length} new documents to process`); + } else { + log.info(`⏰ No new content detected compared to previous run`); + } + + // Process functions-operators + const functionsOperatorsPathExists = await Fs.access(functionsOperatorsDir) + .then(() => true) + .catch(() => false); + + if (functionsOperatorsPathExists) { + const functionFiles = await Fs.readdir(functionsOperatorsDir); + const functionMdFiles = functionFiles.filter((file) => file.endsWith('.md')); + + if (functionMdFiles.length > 0) { + for (const mdFile of functionMdFiles) { + const filePath = Path.join(functionsOperatorsDir, mdFile); + const content = await Fs.readFile(filePath, 'utf-8'); + const contentHash = hashContent(content); + const cacheKey = normalizeCacheKey(filePath, extractDir); + + // Check if file is cached and unchanged (unless force flag is set) + const cached = cache[cacheKey]; + if (!argv.force && cached && cached.hash === contentHash) { + // Source file hash matches, verify output files exist on disk + if (cached.outputFiles && cached.outputFiles.length > 0) { + const allOutputFilesExist = await Promise.all( + cached.outputFiles.map(async (outputFileName) => { + const outputFilePath = Path.join(outDir, outputFileName); + try { + await Fs.access(outputFilePath); + return true; + } catch { + return false; + } + }) + ).then((results) => results.every((exists) => exists)); + + if (allOutputFilesExist) { + // All output files exist, skip processing + log.debug( + `Skipping ${mdFile} (hash: ${contentHash.substring(0, 8)}... unchanged)` + ); + for (const outputFileName of cached.outputFiles) { + outputToSourceMap.set(outputFileName, cacheKey); + } + continue; + } else { + // Some output files are missing, process the file + } + } else { + // No output files in cache, skip + continue; + } + } + + // Extract raw sections and check which ones have changed + const rawSections = extractRawFunctionSections(content); + const outputFiles: Array<{ name: string; content: string }> = []; + const outputFileNames: string[] = []; + const sectionHashes: { [outputFileName: string]: string } = {}; + + if (rawSections.length > 0) { + // Initialize cache entry if it doesn't exist + if (!cache[cacheKey]) { + cache[cacheKey] = { + hash: contentHash, + sections: {}, + outputFiles: [], + }; + } + + for (const section of rawSections) { + const outputFileName = `esql-${section.name}.txt`; + const sectionHash = hashContent(section.rawContent); + sectionHashes[outputFileName] = sectionHash; + + // Check if this section has changed (unless force flag is set) + const cachedSectionHash = cache[cacheKey].sections?.[outputFileName]?.hash; + const sectionHashMatches = cachedSectionHash === sectionHash; + + // Also check if output file exists on disk + let outputFileExists = false; + if (sectionHashMatches && !argv.force) { + const outputFilePath = Path.join(outDir, outputFileName); + try { + await Fs.access(outputFilePath); + outputFileExists = true; + } catch { + outputFileExists = false; + } + } + + const sectionChanged = + argv.force || + !cachedSectionHash || + cachedSectionHash !== sectionHash || + !outputFileExists; + + if (sectionChanged) { + // Process the section + const processedContent = processSection(section.rawContent, section.name); + const outputFile = { + name: outputFileName, + content: processedContent, + }; + outputFiles.push(outputFile); + docFiles.push(outputFile); + outputToSourceMap.set(outputFileName, cacheKey); + } else { + // Section unchanged, skip processing + log.debug( + `Skipping unchanged section ${section.name} from ${mdFile} -> ${outputFileName}` + ); + } + + outputFileNames.push(outputFileName); + outputToSourceMap.set(outputFileName, cacheKey); + } + + // Update cache with section hashes and file list + cache[cacheKey].hash = contentHash; + if (!cache[cacheKey].sections) { + cache[cacheKey].sections = {}; + } + for (const [outputFileName, sectionHash] of Object.entries(sectionHashes)) { + if (!cache[cacheKey].sections![outputFileName]) { + cache[cacheKey].sections![outputFileName] = { hash: sectionHash }; + } else { + cache[cacheKey].sections![outputFileName].hash = sectionHash; + } + } + cache[cacheKey].outputFiles = outputFileNames; + cacheUpdated = true; + + // Extract descriptions and update syntax.txt + const functionDescriptions = rawSections.map((section) => ({ + name: section.name, + description: extractBriefDescription(section.rawContent), + })); + + await updateSyntaxFile(syntaxFilePath, mdFile, functionDescriptions, log); + } else { + log.warning(`No function sections found in ${mdFile}, skipping`); + } + } + } else { + log.warning(`No .md files found in ${functionsOperatorsDir}`); + } + } else { + log.warning(`Functions-operators directory not found at ${functionsOperatorsDir}`); + } + + // Use LLM to rewrite documentation if connectorId is provided + let finalDocFiles = docFiles; + if (inferenceClient) { + // Capture inferenceClient in a const for TypeScript narrowing + const client = inferenceClient; + // Process all files that made it through (unchanged files were already skipped) + const filesToProcess: Array<{ + name: string; + content: string; + sourcePath?: string; + }> = docFiles.map((file) => { + const sourcePath = outputToSourceMap.get(file.name); + return { ...file, sourcePath }; + }); + + if (filesToProcess.length > 0) { + // log.info(`Rewriting ${filesToProcess.length} documents using LLM...`); + const rewrittenFiles = await generateDoc({ + docFiles: filesToProcess, + inferenceClient: client, + log, + }); + log.info(`Successfully rewritten ${rewrittenFiles.length} documents`); + + // Enrich documentation with natural language descriptions for ES|QL queries + log.info( + `Enriching ${rewrittenFiles.length} documents with ES|QL query descriptions...` + ); + const limiter = pLimit(10); + finalDocFiles = await Promise.all( + rewrittenFiles.map(async (file) => { + return limiter(async () => { + const enrichedContent = await enrichDocumentation({ + content: file.content, + inferenceClient: client, + }); + // log.info(`Enriched ${file.name} with ES|QL query descriptions`); + return { + name: file.name, + content: enrichedContent, + }; + }); + }) + ); + log.info(`Successfully enriched ${finalDocFiles.length} documents`); + } + } + + if (!argv.dryRun) { + log.info(`Writing ${finalDocFiles.length} documents to disk to ${outDir}`); + + await Fs.mkdir(outDir, { recursive: true }); + + await Promise.all( + finalDocFiles.map(async (file) => { + const fileName = Path.join(outDir, file.name); + await Fs.writeFile(fileName, file.content); + }) + ); + + // log.info(`Successfully wrote ${finalDocFiles.length} files to ${outDir}`); + } else { + // log.info(`Dry run: Would write ${finalDocFiles.length} files to ${outDir}`); + } + + // Save cache if it was updated + if (cacheUpdated && !argv.dryRun) { + await saveCache(cachePath, cache); + log.info(`Cache updated and saved to ${cachePath}`); + } + } finally { + // Clean up extraction temp directory (but keep the zip file in _temp_) + log.info(`Cleaning up temporary extraction directory ${extractTempDir}...`); + await Fs.rm(extractTempDir, { recursive: true, force: true }).catch((err) => { + log.warning(`Failed to clean up temp directory: ${err.message}`); + }); + } + }, + { log: { defaultLevel: argv.logLevel as any }, flags: { allowUnexpected: true } } + ); + } + ) + .parse(); diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js index b9f96574c6e61..6363103d3f20b 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/index.js @@ -7,4 +7,4 @@ require('@kbn/babel-register').install(); -require('./load_esql_docs'); +require('./generate_esql_docs'); diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/enrich_documentation.ts b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/enrich_documentation.ts new file mode 100644 index 0000000000000..2cd30123a69e5 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/enrich_documentation.ts @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { PromptTemplate } from '../utils/output_executor'; + +/** + * Prompt used to enrich documentation by adding natural language descriptions + * for each ES|QL query example + */ +export const enrichDocumentationPrompt: PromptTemplate<{ + content: string; +}> = ({ content }) => { + return { + system: ` + You are a helpful assistant specialized in enriching technical documentation + about ES|QL, the new Query language from Elasticsearch written in Markdown format. + + Your job is to enrich documentation by adding natural language descriptions + for each ES|QL query example found in the content. + + An ES|QL query is composed of a source command followed by an optional + series of processing commands, separated by a pipe character: |. For + example: + + | + | + + An example of what an ES|QL query looks like: + + \`\`\`esql + FROM employees + | WHERE still_hired == true + | EVAL hired = DATE_FORMAT("YYYY", hire_date) + | STATS avg_salary = AVG(salary) BY languages + \`\`\` + + Instructions: + + - Remove any tags from the content. For example: {content} should be rewritten as Note: {content}. + + - Before each ES|QL example (surrounded by \`\`\`esql code blocks), if there's no description, add a short, concise + description explaining what the query is doing in a single sentence. + + - The description should be clear, succint, and explain the purpose and behavior + of the query in plain language. Do not prefix with 'This query'. + + - Place the description immediately before the code block. + + - Keep the rest of the content unchanged - only add descriptions for ES|QL queries. + + - If a query already has a description, you may enhance it or leave it as is if + it's already clear. + + - Do not modify the ES|QL queries themselves - only add descriptions. + + - Write descriptions in a way that helps readers understand what the query does + without needing to parse the ES|QL syntax themselves. + + - Please answer exclusively with the enriched content, without any additional messages, + information, thoughts or reasoning. DO NOT wrap the output with \`\`\`markdown. + `, + input: ` + Enrich this documentation by adding natural language descriptions for each ES|QL query example: + + \`\`\`markdown + ${content} + \`\`\` + `, + }; +}; diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/index.ts b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/index.ts index f5b54643fb3cb..411bce4307c76 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/index.ts +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/prompts/index.ts @@ -8,3 +8,4 @@ export { createDocumentationPagePrompt } from './create_documentation_page'; export { rewriteFunctionPagePrompt } from './rewrite_function_page'; export { convertToMarkdownPrompt } from './convert_to_markdown'; +export { enrichDocumentationPrompt } from './enrich_documentation'; diff --git a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/utils/output_executor.ts b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/utils/output_executor.ts index f4014db0e6e8d..9be87bc650980 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/utils/output_executor.ts +++ b/x-pack/platform/plugins/shared/inference/scripts/load_esql_docs/utils/output_executor.ts @@ -19,20 +19,96 @@ export type PromptCaller = (prompt: Prompt) => Promise; export type PromptCallerFactory = ({ connectorId, output, + maxRetries, + retryConfiguration, }: { connectorId: string; output: OutputAPI; + maxRetries?: number; + retryConfiguration?: { + retryOn?: 'all' | 'auto'; + initialDelay?: number; + backoffMultiplier?: number; + }; }) => PromptCaller; -export const bindOutput: PromptCallerFactory = ({ connectorId, output }) => { +export const bindOutput: PromptCallerFactory = ({ + connectorId, + output, + maxRetries = 5, + retryConfiguration, +}) => { return async ({ input, system }) => { - const response = await output({ - id: 'output', - connectorId, - input, - system, - }); - - return response.content ?? ''; + let lastError: Error | null = null; + const initialDelay = retryConfiguration?.initialDelay ?? 2000; + const backoffMultiplier = retryConfiguration?.backoffMultiplier ?? 2; + const totalRetries = maxRetries; + + for (let attempt = 0; attempt <= totalRetries; attempt++) { + try { + const response = await output({ + id: 'output', + connectorId, + input, + system, + // Pass through retry config to internal retry mechanism + maxRetries: retryConfiguration ? 3 : 0, // Use internal retries for non-429 errors + retryConfiguration: retryConfiguration + ? { + retryOn: retryConfiguration.retryOn ?? 'auto', + initialDelay: retryConfiguration.initialDelay, + backoffMultiplier: retryConfiguration.backoffMultiplier, + } + : undefined, + }); + + return response.content ?? ''; + } catch (error: any) { + lastError = error; + + // Check if it's a 429 error (rate limit) - handle AxiosError format + const statusCode = + error?.response?.status || + error?.status || + (error?.code === 'ECONNABORTED' ? undefined : error?.response?.statusCode); + + const isRateLimitError = + statusCode === 429 || + (error?.message && error.message.includes('429')) || + (error?.message && error.message.includes('Request failed with status code 429')); + + // For 429 errors, we always retry with backoff + // For other errors, check retry configuration + const shouldRetry = + isRateLimitError || + retryConfiguration?.retryOn === 'all' || + (retryConfiguration?.retryOn === 'auto' && statusCode >= 500 && statusCode < 600); + + if (!shouldRetry || attempt >= totalRetries) { + throw error; + } + + // Calculate delay with exponential backoff + const delay = initialDelay * Math.pow(backoffMultiplier, attempt); + + // Wait before retrying (longer wait for 429 errors - at least 5 seconds) + const waitTime = isRateLimitError ? Math.max(delay, 5000) : delay; + + // Log retry attempt for debugging + if (isRateLimitError) { + // eslint-disable-next-line no-console + console.warn( + `Rate limit error (429) on attempt ${attempt + 1}/${ + totalRetries + 1 + }. Retrying in ${waitTime}ms...` + ); + } + + await new Promise((resolve) => setTimeout(resolve, waitTime)); + } + } + + // If we get here, all retries failed + throw lastError || new Error('Failed after all retries'); }; }; diff --git a/x-pack/platform/plugins/shared/inference/scripts/util/cli_options.ts b/x-pack/platform/plugins/shared/inference/scripts/util/cli_options.ts index 8bbb6dabe406e..e8f00cedb9882 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/util/cli_options.ts +++ b/x-pack/platform/plugins/shared/inference/scripts/util/cli_options.ts @@ -20,7 +20,11 @@ export const elasticsearchOption = { describe: 'Where Elasticsearch is running', string: true as const, default: format({ - ...parse(config['elasticsearch.hosts']), + ...parse( + Array.isArray(config['elasticsearch.hosts']) + ? config['elasticsearch.hosts'][0] + : config['elasticsearch.hosts'] + ), auth: `${config['elasticsearch.username']}:${config['elasticsearch.password']}`, }), }; diff --git a/x-pack/platform/plugins/shared/inference/scripts/util/get_service_urls.ts b/x-pack/platform/plugins/shared/inference/scripts/util/get_service_urls.ts index 89af89a80fe43..09c7c18be691c 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/util/get_service_urls.ts +++ b/x-pack/platform/plugins/shared/inference/scripts/util/get_service_urls.ts @@ -45,15 +45,20 @@ async function getKibanaUrl({ kibana, log }: { kibana: string; log: ToolingLog } log.debug(`Checking Kibana URL ${kibanaUrlWithoutAuth} for a redirect`); - const unredirectedResponse = await fetch(kibanaUrlWithoutAuth, { - headers: { - ...(parsedKibanaUrl.auth - ? { Authorization: `Basic ${Buffer.from(parsedKibanaUrl.auth).toString('base64')}` } - : {}), - }, - method: 'HEAD', - redirect: 'manual', - }); + let unredirectedResponse; + try { + unredirectedResponse = await fetch(kibanaUrlWithoutAuth, { + headers: { + ...(parsedKibanaUrl.auth + ? { Authorization: `Basic ${Buffer.from(parsedKibanaUrl.auth).toString('base64')}` } + : {}), + }, + method: 'HEAD', + redirect: 'manual', + }); + } catch (fetchError: any) { + throw fetchError; + } log.debug('Unredirected response', unredirectedResponse.headers.get('location')); @@ -74,21 +79,32 @@ async function getKibanaUrl({ kibana, log }: { kibana: string; log: ToolingLog } auth: parsedTarget.auth, }); - const redirectedResponse = await fetch(discoveredKibanaUrlWithAuth, { - method: 'HEAD', + // Strip credentials from URL for fetch (Node.js fetch doesn't support credentials in URLs) + const discoveredKibanaUrlWithoutAuth = format({ + ...parsedDiscoveredUrl, + auth: undefined, }); + let redirectedResponse; + try { + redirectedResponse = await fetch(discoveredKibanaUrlWithoutAuth, { + method: 'HEAD', + headers: { + ...(parsedTarget.auth + ? { Authorization: `Basic ${Buffer.from(parsedTarget.auth).toString('base64')}` } + : {}), + }, + }); + } catch (fetchError: any) { + throw fetchError; + } + if (redirectedResponse.status !== 200) { throw new Error( - `Expected HTTP 200 from ${discoveredKibanaUrlWithAuth}, got ${redirectedResponse.status}` + `Expected HTTP 200 from ${discoveredKibanaUrlWithoutAuth}, got ${redirectedResponse.status}` ); } - const discoveredKibanaUrlWithoutAuth = format({ - ...parsedDiscoveredUrl, - auth: undefined, - }); - log.info( `Discovered kibana running at: ${ isCI ? discoveredKibanaUrlWithoutAuth : discoveredKibanaUrlWithAuth @@ -96,8 +112,26 @@ async function getKibanaUrl({ kibana, log }: { kibana: string; log: ToolingLog } ); return discoveredKibanaUrlWithAuth.replace(/\/$/, ''); - } catch (error) { - throw new Error(`Could not connect to Kibana: ` + error.message); + } catch (error: any) { + const parsedKibanaUrl = parse(kibana); + const kibanaUrlWithoutAuth = format(omit(parsedKibanaUrl, 'auth')); + const errorCode = error?.code || error?.cause?.code; + const isConnectionError = + errorCode === 'ECONNREFUSED' || + errorCode === 'ENOTFOUND' || + errorCode === 'ETIMEDOUT' || + error?.message?.includes('fetch failed') || + error?.message?.includes('ECONNREFUSED'); + + if (isConnectionError) { + throw new Error( + `Could not connect to Kibana at ${kibanaUrlWithoutAuth}. ` + + `Please ensure Kibana is running and accessible at this URL. ` + + `Error: ${error.message || errorCode || 'Unknown connection error'}` + ); + } + + throw new Error(`Could not connect to Kibana at ${kibanaUrlWithoutAuth}: ${error.message}`); } } diff --git a/x-pack/platform/plugins/shared/inference/scripts/util/kibana_client.ts b/x-pack/platform/plugins/shared/inference/scripts/util/kibana_client.ts index 3ffabfd570907..30b411137752d 100644 --- a/x-pack/platform/plugins/shared/inference/scripts/util/kibana_client.ts +++ b/x-pack/platform/plugins/shared/inference/scripts/util/kibana_client.ts @@ -205,7 +205,14 @@ export class KibanaClient { pathname: `/internal/inference/chat_complete/stream`, }), body, - { responseType: 'stream', timeout: 0 } + { + responseType: 'stream', + timeout: 0, + headers: { + 'kbn-xsrf': 'true', + 'x-elastic-internal-origin': 'foo', + }, + } ) ) as ChatCompleteAPIResponse; } @@ -216,7 +223,13 @@ export class KibanaClient { pathname: `/internal/inference/chat_complete`, }), body, - { timeout: 0 } + { + timeout: 0, + headers: { + 'kbn-xsrf': 'true', + 'x-elastic-internal-origin': 'foo', + }, + } ) .then((response) => { return response.data; @@ -244,12 +257,10 @@ export class KibanaClient { } async getConnectors() { - const connectors: AxiosResponse<{ connectors: InferenceConnector[] }> = await axios.get( - this.getUrl({ - pathname: '/internal/inference/connectors', - }) - ); + const response = await this.callKibana<{ connectors: InferenceConnector[] }>('GET', { + pathname: '/internal/inference/connectors', + }); - return connectors.data.connectors; + return response.data.connectors; } } diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-abs.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-abs.txt index 05781981d1cf2..5a48536c3296a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-abs.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-abs.txt @@ -1,6 +1,6 @@ # ABS -The `ABS` function returns the absolute value of a numeric expression. +Returns the absolute value of a numeric expression. If the input is `null`, the function returns `null`. ## Syntax @@ -8,23 +8,21 @@ The `ABS` function returns the absolute value of a numeric expression. ### Parameters -#### `number` +#### number -A numeric expression. If the value is `null`, the function returns `null`. +Numeric expression to calculate the absolute value for. If the value is `null`, the function returns `null`. ## Examples +Calculates the absolute value of -1.0 and stores it in a new column. ```esql ROW number = -1.0 | EVAL abs_number = ABS(number) ``` -Calculate the absolute value of a negative number. - +Calculates the absolute value of the difference between 0.0 and the height column for each employee, and adds it as a new column. ```esql FROM employees | KEEP first_name, last_name, height | EVAL abs_height = ABS(0.0 - height) ``` - -Calculate the absolute value of the difference between `0.0` and the `height` column. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-absent.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-absent.txt new file mode 100644 index 0000000000000..08e25a164972d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-absent.txt @@ -0,0 +1,38 @@ +# ABSENT + +The ABSENT function returns true if the input expression yields no non-null values within the current aggregation context; otherwise, it returns false. + +## Syntax + +`ABSENT(field)` + +### Parameters + +#### field + +Expression that outputs values to be checked for absence. + +## Examples + +Determines whether the `languages` field is missing for employee number 10020. + +```esql +FROM employees +| WHERE emp_no == 10020 +| STATS is_absent = ABSENT(languages) +``` + +Checks if the `salary` field is absent within each group of employees sharing the same language. + +```esql +FROM employees +| STATS is_absent = ABSENT(salary) BY languages +``` + +Returns 1 if the `languages` field is absent and 0 if it is present for employee number 10020. + +```esql +FROM employees +| WHERE emp_no == 10020 +| STATS is_absent = TO_INTEGER(ABSENT(languages)) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-absent_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-absent_over_time.txt new file mode 100644 index 0000000000000..77e9e6c34947e --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-absent_over_time.txt @@ -0,0 +1,27 @@ +# ABSENT_OVER_TIME + +Calculates the absence of a field in the output result over a specified time range. + +## Syntax + +`ABSENT_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the absence for. + +#### window + +The time window over which to compute the absence. + +## Examples + +Calculates the maximum absence of the `events_received` field for each pod within 2-minute time buckets, filtering for the "prod" cluster and pod named "two". + +```esql +TS k8s +| WHERE cluster == "prod" AND pod == "two" +| STATS events_received = MAX(ABSENT_OVER_TIME(events_received)) BY pod, time_bucket = TBUCKET(2 minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-acos.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-acos.txt index 5288762efe78c..be2e13dd52be0 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-acos.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-acos.txt @@ -1,6 +1,6 @@ # ACOS -Returns the arccosine of a number as an angle, expressed in radians. +The ACOS function returns the arccosine of a number as an angle in radians. ## Syntax @@ -8,16 +8,15 @@ Returns the arccosine of a number as an angle, expressed in radians. ### Parameters -#### `number` +#### number -- A number between -1 and 1. -- If `null`, the function returns `null`. +A number between -1 and 1. If the value is `null`, the function returns `null`. ## Examples -```esql -ROW a = .9 -| EVAL acos = ACOS(a) -``` +Calculates the arccosine of 0.9 and stores the result in a new column named `acos`: -Calculate the arccosine of the value `0.9` and store the result in a new column named `acos`. \ No newline at end of file +```esql +ROW a=.9 +| EVAL acos=ACOS(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-asin.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-asin.txt index 92f2a18cd43bf..3dc895b2a1385 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-asin.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-asin.txt @@ -1,6 +1,6 @@ # ASIN -Returns the arcsine of the input numeric expression as an angle, expressed in radians. +Returns the arcsine of a numeric input as an angle in radians. ## Syntax @@ -8,16 +8,15 @@ Returns the arcsine of the input numeric expression as an angle, expressed in ra ### Parameters -#### `number` +#### number -- A number between -1 and 1. -- If `null`, the function returns `null`. +A number between -1 and 1. If the value is `null`, the function returns `null`. ## Examples +Calculates the arcsine of 0.9 and stores the result in a new column named `asin`: + ```esql -ROW a = .9 -| EVAL asin = ASIN(a) +ROW a=.9 +| EVAL asin=ASIN(a) ``` - -Calculate the arcsine of the value `0.9` and return the result in radians. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan.txt index fa64d90163c4d..05443957d92ba 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan.txt @@ -1,6 +1,6 @@ # ATAN -Returns the arctangent of the input numeric expression as an angle, expressed in radians. +The ATAN function returns the arctangent of a numeric expression as an angle in radians. ## Syntax @@ -8,20 +8,15 @@ Returns the arctangent of the input numeric expression as an angle, expressed in ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression for which to calculate the arctangent. If the value is `null`, the function returns `null`. ## Examples -```esql -ROW a=12.9 -| EVAL atan = ATAN(a) -``` - -Calculate the arctangent of the value `12.9` and store the result in a new column named `atan`. +Calculates the arctangent of the value in column `a` and stores the result in a new column called `atan`: ```esql -ROW x=5.0, y=3.0 -| EVAL atan_yx = ATAN(y / x) -``` +ROW a=12.9 +| EVAL atan=ATAN(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan2.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan2.txt index f4da581885ef7..6582bdc1f9ab9 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan2.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-atan2.txt @@ -1,15 +1,26 @@ -## ATAN2 +# ATAN2 -The `ATAN2` function calculates the angle between the positive x-axis and the ray from the origin to the point (x, y) in the Cartesian plane, expressed in radians. +The ATAN2 command calculates the angle, in radians, between the positive x-axis and the ray from the origin to the point (x, y) in the Cartesian plane. -### Examples +## Syntax -```esql -ROW y=12.9, x=.6 -| EVAL atan2 = ATAN2(y, x) -``` +`ATAN2(y_coordinate, x_coordinate)` + +### Parameters + +#### y_coordinate + +The y coordinate. If this value is `null`, the function returns `null`. + +#### x_coordinate + +The x coordinate. If this value is `null`, the function returns `null`. + +## Examples + +Calculates the angle in radians between the positive x-axis and the point (0.6, 12.9) using the ATAN2 function. ```esql -ROW y=5.0, x=3.0 -| EVAL atan2 = ATAN2(y, x) +ROW y=12.9, x=.6 +| EVAL atan2=ATAN2(y, x) ``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg.txt index cfedb5c2e0295..7da9f49130fe5 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg.txt @@ -1,6 +1,6 @@ # AVG -The `AVG` function calculates the average of a numeric field. +The AVG function calculates the average value of a numeric field or expression. ## Syntax @@ -8,26 +8,22 @@ The `AVG` function calculates the average of a numeric field. ### Parameters -#### `number` +#### number -A numeric field to calculate the average. +Expression that outputs the values to be averaged. ## Examples -Basic Usage +Calculates the average height of all employees. ```esql FROM employees | STATS AVG(height) ``` -Calculate the average height of employees. - -Using Inline Functions +Calculates the average salary change by first averaging multiple salary change values per employee using `MV_AVG`, then rounding the overall average to 10 decimal places. ```esql FROM employees | STATS avg_salary_change = ROUND(AVG(MV_AVG(salary_change)), 10) -``` - -Calculate the average salary change by first averaging multiple values per row using `MV_AVG`, and then applying the `AVG` function with rounding to 10 decimal places. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg_over_time.txt new file mode 100644 index 0000000000000..e73733e79669b --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-avg_over_time.txt @@ -0,0 +1,26 @@ +# AVG_OVER_TIME + +Calculates the average value of a numeric field over a specified time window. + +## Syntax + +`AVG_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the average for. + +#### window + +The time window over which to compute the average. + +## Examples + +Finds the maximum average network cost per cluster, grouped into 1-minute time buckets. + +```esql +TS k8s +| STATS max_cost=MAX(AVG_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-binary operators.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-binary operators.txt new file mode 100644 index 0000000000000..85218a3b1c9a6 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-binary operators.txt @@ -0,0 +1,90 @@ +# BINARY OPERATORS + +Binary operators perform comparisons and arithmetic operations between two fields or values. If either field is multivalued, the result is `null`. For comparison operators, if one side is a constant and the other is a field in the index with both a mapping-index and doc-values, the operation is pushed to the underlying search index. Division between two integer types yields an integer result, rounding towards zero; for floating point division, cast one argument to `DOUBLE`. + +## Syntax + +` ` + +### Parameters + +#### + +Represents the left or right operand for the binary operation. Can be a field or a constant value. + +#### + +The binary operator to apply. Supported operators include: +- Equality: `==` +- Inequality: `!=` +- Less than: `<` +- Less than or equal to: `<=` +- Greater than: `>` +- Greater than or equal to: `>=` +- Add: `+` +- Subtract: `-` +- Multiply: `*` +- Divide: `/` +- Modulus: `%` + +## Examples + +Checks if the values in columns `a` and `b` are equal and stores the result in a new column `is_equal`. +```esql +ROW a = 5, b = 5 +| EVAL is_equal = a == b +``` + +Checks if the values in columns `a` and `b` are not equal and stores the result in a new column `is_unequal`. +```esql +ROW a = 5, b = 3 +| EVAL is_unequal = a != b +``` + +Checks if the value in column `a` is less than the value in column `b` and stores the result in a new column `is_less`. +```esql +ROW a = 2, b = 5 +| EVAL is_less = a < b +``` + +Adds the values in columns `a` and `b` and stores the result in a new column `sum`. +```esql +ROW a = 2, b = 3 +| EVAL sum = a + b +``` + +Subtracts the value in column `b` from the value in column `a` and stores the result in a new column `difference`. +```esql +ROW a = 5, b = 3 +| EVAL difference = a - b +``` + +Multiplies the values in columns `a` and `b` and stores the result in a new column `product`. +```esql +ROW a = 2, b = 3 +| EVAL product = a * b +``` + +Divides the value in column `a` by the value in column `b` and stores the result in a new column `quotient`. +```esql +ROW a = 6, b = 2 +| EVAL quotient = a / b +``` + +Divides the value in column `a` by the value in column `b` after casting `b` to `DOUBLE` to get a floating point result, and stores it in a new column `float_quotient`. +```esql +ROW a = 7, b = 2 +| EVAL float_quotient = a / b::DOUBLE +``` + +Calculates the remainder when the value in column `a` is divided by the value in column `b` and stores it in a new column `remainder`. +```esql +ROW a = 7, b = 3 +| EVAL remainder = a % b +``` + +## Limitations + +- If either operand is multivalued, the result is `null`. +- Division between two integer types yields an integer result, rounding towards zero. For floating point division, cast one argument to `DOUBLE`. +- For comparison operators, the operation is pushed to the underlying search index only if one side is a constant and the other is a field in the index with both a mapping-index and doc-values. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bit_length.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bit_length.txt index 4cae922384db9..b56f7c1568b78 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bit_length.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bit_length.txt @@ -1,8 +1,6 @@ # BIT_LENGTH -Returns the bit length of a string. - -**Note:** All strings are in UTF-8, so a single character can use multiple bytes. +Returns the bit length of a string. All strings are in UTF-8, so a single character can use multiple bytes. ## Syntax @@ -10,17 +8,17 @@ Returns the bit length of a string. ### Parameters -#### `string` +#### string String expression. If `null`, the function returns `null`. ## Examples +Calculates the number of characters and the bit length (in bits) of the city names for airports located in India. + ```esql FROM airports | WHERE country == "India" | KEEP city | EVAL fn_length = LENGTH(city), fn_bit_length = BIT_LENGTH(city) -``` - -This example calculates both the character length and the bit length of city names in airports located in India. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bucket.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bucket.txt index d8a109f6a3ec1..c7b310f943a59 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bucket.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-bucket.txt @@ -1,6 +1,6 @@ # BUCKET -The `BUCKET` function creates groups of values—buckets—out of a datetime or numeric input. The size of the buckets can either be provided directly or chosen based on a recommended count and values range. +The BUCKET function creates groups of values, known as buckets, from a datetime or numeric input. Buckets can be sized directly or determined based on a recommended count and value range. This function is useful for generating histograms and time-based aggregations. ## Syntax @@ -8,139 +8,92 @@ The `BUCKET` function creates groups of values—buckets—out of a datetime or ### Parameters -#### `field` +#### field -A numeric or date expression from which to derive buckets. +Numeric or date expression from which to derive buckets. -#### `buckets` +#### buckets -The target number of buckets or the desired bucket size if the `from` and `to` parameters are omitted. +Target number of buckets, or desired bucket size if `from` and `to` parameters are omitted. -#### `from` (optional) +#### from -The start of the range. Can be a number, a date, or a date expressed as a string. +Start of the range. Can be a number, a date, or a date expressed as a string. -#### `to` (optional) +#### to -The end of the range. Can be a number, a date, or a date expressed as a string. - -## Important notes: - -BUCKET can operate in two modes: -- one where the bucket size is computed based on a bucket count recommendation and a range, -- and another where the bucket size is provided directly. - -When the bucket size is provided directly for time interval, it is expressed as a **timespan literal**, e.g. -- GOOD: `BUCKET(@timestamp, 1 month)` -- BAD: `BUCKET(@timestamp, "month")` +End of the range. Can be a number, a date, or a date expressed as a string. ## Examples -Using a target number of buckets, a start of a range, and an end of a range - +Groups employee hire dates within 1985 into 20 monthly buckets and sorts the hire dates within each bucket. ```esql FROM employees | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" | STATS hire_date = MV_SORT(VALUES(hire_date)) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") -| SORT hire_date ``` - -This example creates buckets for hire dates in 1985, aiming for 20 buckets. The actual number of buckets may vary depending on the range. - -Combine BUCKET with an aggregation to create a histogram - +Counts the number of employees hired per month in 1985 by creating 20 monthly buckets and sorting them. ```esql FROM employees | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" | STATS hires_per_month = COUNT(*) BY month = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") | SORT month ``` - -This example calculates the number of hires per month in 1985. - -Asking for more buckets can result in a smaller range - +Counts the number of employees hired per week in 1985 by creating 100 weekly buckets. ```esql FROM employees | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" | STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 100, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") -| SORT week ``` - -This example creates weekly buckets for hire dates in 1985, aiming for 100 buckets. - -Providing the bucket size directly - +Counts the number of employees hired per week in 1985 by specifying a bucket size of one week and sorting the results. ```esql FROM employees | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" | STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week) | SORT week ``` - -This example creates weekly buckets for hire dates in 1985 by directly specifying the bucket size. - -Creating a salary histogram - +Creates a histogram of employee salaries by dividing the salary range 25324 to 74999 into 20 buckets and counting employees in each bucket. ```esql FROM employees -| STATS COUNT(*) BY bs = BUCKET(salary, 20, 25324, 74999) +| STATS COUNT(*) by bs = BUCKET(salary, 20, 25324, 74999) | SORT bs ``` - -This example creates a histogram of salaries, dividing the range into 20 buckets. - -Omitting the range when the bucket size is known - +Counts the number of employees in each salary bucket of size 5000 for hires in 1985 and sorts the buckets. ```esql FROM employees | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" -| STATS c = COUNT(*) BY b = BUCKET(salary, 5000.) +| STATS c = COUNT(1) BY b = BUCKET(salary, 5000.) | SORT b ``` - -This example creates salary buckets with a fixed size of 5000. - -Create hourly buckets for the last 24 hours - +Counts the number of events per hour for the last 24 hours by creating 25 hourly buckets. ```esql FROM sample_data -| WHERE @timestamp >= NOW() - 1 day AND @timestamp < NOW() +| WHERE @timestamp >= NOW() - 1 day and @timestamp < NOW() | STATS COUNT(*) BY bucket = BUCKET(@timestamp, 25, NOW() - 1 day, NOW()) ``` - -This example creates hourly buckets for the last 24 hours. - -Create monthly buckets for the year 1985 - +Calculates the average salary of employees hired in 1985 by grouping them into 20 monthly buckets. ```esql FROM employees | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" | STATS AVG(salary) BY bucket = BUCKET(hire_date, 20, "1985-01-01T00:00:00Z", "1986-01-01T00:00:00Z") -| SORT bucket ``` - -This example calculates the average salary for each month in 1985. - -Using BUCKET in both aggregating and grouping parts of STATS - +Performs advanced bucketing by using BUCKET in both the aggregation and grouping parts of the STATS command, with additional calculations. ```esql FROM employees | STATS s1 = b1 + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY b1 = BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.) | SORT b1, b2 | KEEP s1, b1, s2, b2 ``` - -This example demonstrates advanced usage of `BUCKET` in both aggregation and grouping. - -Adjusting bucket start value with an offset - +Adjusts the start value of each birth date bucket by adding one hour, then subtracts one hour to align the buckets, and counts the number of dates in each bucket. ```esql FROM employees | STATS dates = MV_SORT(VALUES(birth_date)) BY b = BUCKET(birth_date + 1 HOUR, 1 YEAR) - 1 HOUR | EVAL d_count = MV_COUNT(dates) -| SORT d_count, b -| LIMIT 3 ``` -This example adjusts the bucket start value by adding and subtracting an offset. +## Limitations + +- BUCKET does not create buckets that do not match any documents; only buckets with matching data are returned. +- BUCKET does not filter rows outside the provided range; it returns a bucket value for out-of-range values. Use WHERE to filter rows. +- When specifying the bucket size directly, it must be a time duration or date period, and the reference is epoch (`0001-01-01T00:00:00Z`). +- For numeric fields, you must manually determine the min and max values for the range, as there is no automatic way to do this. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-byte_length.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-byte_length.txt index 77f1f07c20ee3..0f5ae51a5f558 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-byte_length.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-byte_length.txt @@ -1,6 +1,6 @@ # BYTE_LENGTH -Returns the byte length of a string. Since all strings are in UTF-8, a single character may use multiple bytes. +Returns the number of bytes used by a string. All strings are in UTF-8, so a single character can use multiple bytes. ## Syntax @@ -8,17 +8,17 @@ Returns the byte length of a string. Since all strings are in UTF-8, a single ch ### Parameters -#### `string` +#### string -String expression. If `null`, the function returns `null`. +String expression to measure. If the value is `null`, the function returns `null`. ## Examples +Calculates the number of characters and the number of bytes for each city name in airports located in India. + ```esql FROM airports | WHERE country == "India" | KEEP city | EVAL fn_length = LENGTH(city), fn_byte_length = BYTE_LENGTH(city) ``` - -This example calculates both the character length and the byte length of city names in airports located in India. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-case.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-case.txt index c881010f905b4..f33ed140793e4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-case.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-case.txt @@ -1,43 +1,39 @@ -## CASE +# CASE -The `CASE` function evaluates a series of conditions and returns a value corresponding to the first condition that evaluates to `true`. If no conditions match, a default value or `null` is returned. +The CASE function evaluates pairs of conditions and values, returning the value associated with the first condition that evaluates to `true`. If no condition matches, a default value is returned if provided; otherwise, the function returns `null`. ## Syntax -`CASE (condition, trueValue, elseValue)` +`CASE(condition, trueValue, elseValue)` ### Parameters -#### `condition` +#### condition A condition to evaluate. -#### `trueValue` +#### trueValue The value returned when the corresponding condition is the first to evaluate to `true`. If no condition matches, the default value is returned. -#### `elseValue` +#### elseValue -The value returned when no condition evaluates to `true`. +(Optional) The value returned when no condition evaluates to `true`. ## Examples -### Determine whether employees are monolingual, bilingual, or polyglot - -Classify employees based on the number of languages they speak: +Classify employees based on the number of languages they speak as monolingual, bilingual, or polyglot. ```esql FROM employees | EVAL type = CASE( languages <= 1, "monolingual", languages <= 2, "bilingual", - "polyglot") + "polyglot") | KEEP emp_no, languages, type ``` -### Calculate the total connection success rate based on log messages - -Determine the success rate of connections by analyzing log messages: +Assign a success value based on log message content and calculate the average success rate. ```esql FROM sample_data @@ -48,14 +44,12 @@ FROM sample_data | STATS success_rate = AVG(successful) ``` -### Calculate an hourly error rate as a percentage of the total number of log messages - -Compute the error rate for each hour based on log messages: +Flag error messages and compute the average error rate per hour. ```esql FROM sample_data | EVAL error = CASE(message LIKE "*error*", 1, 0) | EVAL hour = DATE_TRUNC(1 hour, @timestamp) -| STATS error_rate = AVG(error) BY hour +| STATS error_rate = AVG(error) by hour | SORT hour -``` +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-categorize.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-categorize.txt index 4e5a3eeeea642..7353e8e937e8a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-categorize.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-categorize.txt @@ -1,34 +1,45 @@ # CATEGORIZE -Groups text messages into categories of similarly formatted text values. +The CATEGORIZE function groups text messages into categories based on similarly formatted text values. It is useful for identifying patterns and grouping similar log messages or textual data. ## Syntax -`CATEGORIZE(field)` +`CATEGORIZE(field, options, output_format, similarity_threshold, analyzer)` ### Parameters -#### `field` +#### field Expression to categorize. -## Examples +#### options + +(Optional) Additional options for categorization, provided as function named parameters. + +#### output_format + +(keyword) Specifies the output format of the categories. Defaults to regex. + +#### similarity_threshold + +(integer) Sets the minimum percentage of token weight that must match for text to be added to a category bucket. Must be between 1 and 100. Higher values create narrower categories and increase memory usage. Defaults to 70. -Categorizing server log messages +#### analyzer -Categorizes server log messages into categories and aggregates their counts. +(keyword) Analyzer used to convert the field into tokens for text categorization. + +## Examples + +Groups similar log messages from the `sample_data` source into categories and counts how many messages fall into each category. ```esql FROM sample_data -| STATS count = COUNT() BY category=CATEGORIZE(message) +| STATS count=COUNT() BY category=CATEGORIZE(message) ``` +This example groups similar log messages into categories and counts the number of messages in each category. ## Limitations - Cannot be used within other expressions. -- Cannot be used with multiple groupings. -- Cannot be used or referenced within aggregate functions. - -## Additional Notes - -- The `CATEGORIZE` function requires a platinum license. +- Cannot be used more than once in the groupings. +- Cannot be used or referenced within aggregate functions and must be the first grouping. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cbrt.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cbrt.txt index cff1384c1b45b..a9b0dde0c927d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cbrt.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cbrt.txt @@ -1,6 +1,6 @@ # CBRT -Returns the cube root of a number. The input can be any numeric value, and the return value is always a double. Cube roots of infinities are `null`. +The CBRT function returns the cube root of a numeric value. The result is always a double. If the input is infinity, the function returns null. ## Syntax @@ -8,15 +8,15 @@ Returns the cube root of a number. The input can be any numeric value, and the r ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression. If the value is null, the function returns null. ## Examples +Calculates the cube root of 1000.0 and stores the result in a new column. + ```esql ROW d = 1000.0 -| EVAL c = cbrt(d) +| EVAL c = CBRT(d) ``` - -Calculate the cube root of the value `1000.0`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ceil.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ceil.txt index 076ae9a7231fb..9da88910fb67d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ceil.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ceil.txt @@ -1,6 +1,6 @@ # CEIL -Rounds a number up to the nearest integer. +Rounds a number up to the nearest integer. For `double` values, it returns the closest `double` value that is greater than or equal to the input, similar to `Math.ceil`. For `long` and `integer` types (including unsigned), this function does not change the value. ## Syntax @@ -8,21 +8,15 @@ Rounds a number up to the nearest integer. ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression to be rounded up. If the value is `null`, the function returns `null`. ## Examples -Rounding up a decimal number +Rounds the value 1.8 up to the nearest integer using the CEIL function: ```esql ROW a=1.8 -| EVAL a = CEIL(a) +| EVAL a=CEIL(a) ``` - -This example rounds the value `1.8` up to the nearest integer, resulting in `2`. - -## Limitations - -- This function is a no-op for `long` (including unsigned) and `integer` types. For `double`, it selects the closest `double` value to the integer, similar to `Math.ceil`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-change-point.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-change-point.txt new file mode 100644 index 0000000000000..782c2206666e5 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-change-point.txt @@ -0,0 +1,43 @@ +# CHANGE-POINT + +The CHANGE_POINT command detects spikes, dips, and change points in a metric. It adds columns to the table indicating the change point type and a p-value, which reflects how extreme the change is (lower values indicate greater changes). The possible change point types are: `dip`, `distribution_change`, `spike`, `step_change`, and `trend_change`. + +## Syntax + +`CHANGE_POINT value [ON key] [AS type_name, pvalue_name]` + +### Parameters + +#### value + +The column containing the metric where you want to detect change points. + +#### key + +Optional. The column used to order the values. If not specified, `@timestamp` is used. + +#### type_name + +Optional. The name of the output column for the change point type. Defaults to `type`. + +#### pvalue_name + +Optional. The name of the output column for the p-value indicating the extremity of the change point. Defaults to `pvalue`. + +## Examples + +Detects a statistically significant step change in the `value` column, ordered by `key`, and filters the results to show only detected change points. + +```esql +ROW key=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] +| MV_EXPAND key +| EVAL value = CASE(key<13, 0, 42) +| CHANGE_POINT value ON key +| WHERE type IS NOT NULL +``` +This example identifies a statistically significant step change in the `value` column, ordered by `key`, and filters the results to show only detected change points. + +## Limitations + +- At least 22 values are required for change point detection. Fewer than 1,000 values is preferred. +- The CHANGE_POINT command requires a platinum license. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-change_point.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-change_point.txt deleted file mode 100644 index 7fad5314a43c9..0000000000000 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-change_point.txt +++ /dev/null @@ -1,45 +0,0 @@ -# CHANGE_POINT - -The CHANGE_POINT command detects spikes, dips, and change points in a metric. - -## Syntax - -`CHANGE_POINT value [ON key] [AS type_name, pvalue_name]` - -### Parameters - -#### value - -The column with the metric in which you want to detect a change point. - -#### key - -The column with the key to order the values by. If not specified, `@timestamp` is used. - -#### type_name - -Optional. The name of the output column with the change point type. If not specified, `type` is used. - -#### pvalue_name - -Optional. The name of the output column with the p-value that indicates how extreme the change point is. If not specified, `pvalue` is used. - -## Examples - -Detect change points in a generated sequence, where the first half of the values are 0 and the second half are 42: - -```esql -ROW key=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25] -| MV_EXPAND key -| EVAL value = CASE(key<13, 0, 42) -| CHANGE_POINT value ON key -| WHERE type IS NOT NULL -``` - -This example creates a sequence of keys, assigns values that change at key 13, detects change points in the `value` column, and filters to show only detected change points. - -## Limitations - -- There must be at least 22 values for change point detection. Fewer than 1,000 is preferred. -- The CHANGE_POINT command requires a platinum license. -- This functionality is in technical preview and may be changed or removed in a future release. Features in technical preview are not subject to the support SLA of official GA features. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-chunk.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-chunk.txt new file mode 100644 index 0000000000000..91a466f6fc388 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-chunk.txt @@ -0,0 +1,57 @@ +# CHUNK + +The CHUNK function splits a text field into smaller chunks using a sentence-based chunking strategy. It can be applied to fields of the text family, such as text and semantic_text. You can specify the number of chunks returned and the length of the sentences used to create the chunks. + +## Syntax + +`CHUNK(field, chunking_settings, separator_group, overlap, sentence_overlap, strategy, max_chunk_size, separators)` + +### Parameters + +#### field + +The input field to be chunked. + +#### chunking_settings + +Options to customize chunking behavior. Defaults to `{"strategy":"sentence","max_chunk_size":300,"sentence_overlap":0}`. + +#### separator_group + +Optional. Sets a predefined list of separators based on the selected text type. Values can be `markdown` or `plaintext`. Only applicable to the `recursive` chunking strategy. When using the `recursive` strategy, either `separators` or `separator_group` must be specified. + +#### overlap + +Optional. The number of overlapping words for chunks. Only applicable to the `word` chunking strategy. This value cannot be higher than half the `max_chunk_size` value. + +#### sentence_overlap + +Optional. The number of overlapping sentences for chunks. Only applicable to the `sentence` chunking strategy. Can be either `1` or `0`. + +#### strategy + +Optional. The chunking strategy to use. Default value is `sentence`. + +#### max_chunk_size + +Optional. The maximum size of a chunk in words. Cannot be lower than `20` for the `sentence` strategy or `10` for the `word` or `recursive` strategies. This value should not exceed the window size for any associated models using the output of this function. + +#### separators + +Optional. A list of strings used as possible split points when chunking text. Each string can be a plain string or a regular expression pattern. The system tries each separator in order to split the text, starting from the first item in the list. After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the `max_chunk_size` limit, to reduce the total number of chunks generated. Only applicable to the `recursive` chunking strategy. When using the `recursive` strategy, either `separators` or `separator_group` must be specified. + +## Examples + +Splits the provided text into chunks of up to 10 words each, with an overlap of 1 word between consecutive chunks, using the word chunking strategy and expands the resulting chunks into separate rows. + +```esql +ROW result = CHUNK("It was the best of times, it was the worst of times, it was the age of wisdom, it was the age of foolishness, it was the epoch of belief.", {"strategy": "word", "max_chunk_size": 10, "overlap": 1}) +| MV_EXPAND result +``` + +## Limitations + +- The minimum value for `max_chunk_size` is `20` for the `sentence` strategy and `10` for the `word` or `recursive` strategies. +- For the `recursive` chunking strategy, either `separators` or `separator_group` must be specified. +- The `overlap` parameter cannot be higher than half the `max_chunk_size` value. +- The output chunk size should not exceed the window size for any associated models using the output of this function. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cidr_match.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cidr_match.txt index 7b2167b1f0724..481c223cabec9 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cidr_match.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cidr_match.txt @@ -1,6 +1,6 @@ -## CIDR_MATCH +# CIDR_MATCH -The `CIDR_MATCH` function checks if a given IP address is contained within one or more specified CIDR blocks. +The CIDR_MATCH function checks if a given IP address is contained within one or more specified CIDR blocks. It returns true if the IP matches any of the provided blocks. ## Syntax @@ -8,31 +8,20 @@ The `CIDR_MATCH` function checks if a given IP address is contained within one o ### Parameters -#### `ip` +#### ip -The IP address to test. Must be of type `ip` (supports both IPv4 and IPv6). +The IP address to check, of type `ip`. Both IPv4 and IPv6 addresses are supported. -#### `blockX` +#### blockX -One or more CIDR blocks to test the IP address against. +The CIDR block(s) to test the IP address against. ## Examples -Filtering IP addresses - -```esql -FROM hosts -| WHERE CIDR_MATCH(ip1, "127.0.0.2/32") -| KEEP host, ip1 -``` - -Filtering IP addresses within specific CIDR blocks - +Filters the `hosts` data to include only rows where the `ip1` address matches either "127.0.0.2/32" or "127.0.0.3/32", and then keeps only the `card`, `host`, `ip0`, and `ip1` columns. ```esql FROM hosts | WHERE CIDR_MATCH(ip1, "127.0.0.2/32", "127.0.0.3/32") -| KEEP host, ip1 +| KEEP card, host, ip0, ip1 ``` - -This example filters rows where the `ip1` column contains an IP address that falls within the specified CIDR blocks (`127.0.0.2/32` or `127.0.0.3/32`). It then keeps the `card`, `host`, `ip0`, and `ip1` columns in the output. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp.txt new file mode 100644 index 0000000000000..2eef5e5f22b41 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp.txt @@ -0,0 +1,31 @@ +# CLAMP + +The CLAMP function restricts values within a specified range, ensuring that all samples are no less than a minimum value and no greater than a maximum value. + +## Syntax + +`CLAMP(field, min, max)` + +### Parameters + +#### field + +Numeric expression. If the value is `null`, the function returns `null`. + +#### min + +The minimum value to clamp data into. + +#### max + +The maximum value to clamp data into. + +## Examples + +Clamps the `network.cost` field to a dynamic range, where the minimum is determined by the maximum of 5 and `network.bytes_in`, and the maximum is `network.bytes_in` divided by 100, then keeps only the clamped cost and timestamp. + +```esql +TS k8s +| EVAL full_clamped_cost = clamp(network.cost, clamp_max(network.bytes_in, 5), network.bytes_in / 100) +| KEEP full_clamped_cost, @timestamp +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp_max.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp_max.txt new file mode 100644 index 0000000000000..800b53bfb84d8 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp_max.txt @@ -0,0 +1,26 @@ +# CLAMP_MAX + +The CLAMP_MAX function restricts all input sample values to a specified upper bound. Any value above the defined maximum is set to the maximum value. + +## Syntax + +`CLAMP_MAX(field, max)` + +### Parameters + +#### field + +The field whose values you want to clamp. + +#### max + +The upper bound to which values are clamped. + +## Examples + +Calculates the sum of network costs after clamping values to specified minimum and maximum bounds, grouping results by one-minute time buckets. + +```esql +TS k8s +| STATS full_clamped_cost=sum(clamp(network.cost, 1, 2)), clamped_cost=sum(clamp_max(network.cost, 1)), clamped_min_cost=sum(clamp_min(network.cost, 10)) BY time_bucket = bucket(@timestamp,1minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp_min.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp_min.txt new file mode 100644 index 0000000000000..45a4c7938750a --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-clamp_min.txt @@ -0,0 +1,26 @@ +# CLAMP_MIN + +The CLAMP_MIN function limits all input sample values to a specified lower bound. Any value below the minimum is set to the minimum value. + +## Syntax + +`CLAMP_MIN(field, min)` + +### Parameters + +#### field + +The field whose values you want to clamp. + +#### min + +The minimum value to clamp data to. + +## Examples + +Calculates the sum of network costs using different clamping functions, including clamping to a minimum value of 10, and groups the results by 1-minute time buckets. + +```esql +FROM k8s +| STATS full_clamped_cost=sum(clamp(network.cost, 1, 2)), clamped_cost=sum(clamp_max(network.cost, 1)), clamped_min_cost=sum(clamp_min(network.cost, 10)) BY time_bucket = bucket(@timestamp,1minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-coalesce.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-coalesce.txt index 94e8eb2b38f56..bb1ab63f1aa90 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-coalesce.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-coalesce.txt @@ -1,6 +1,6 @@ # COALESCE -Returns the first argument that is not null. If all arguments are null, it returns `null`. +The COALESCE function returns the first argument that is not null. If all arguments are null, it returns `null`. ## Syntax @@ -8,32 +8,19 @@ Returns the first argument that is not null. If all arguments are null, it retur ### Parameters -#### `first` +#### first Expression to evaluate. -#### `rest` +#### rest Other expressions to evaluate. ## Examples -Returning the first non-null value +Returns the first non-null value between columns `a` and `b`. ```esql ROW a=null, b="b" | EVAL COALESCE(a, b) ``` - -#### Result - -| a | b | EVAL_COALESCE_a_b | -|------|-----|-------------------| -| null | "b" | "b" | - -COALESCE supports any number of rest parameters: - -```esql -ROW x=null, y=null, z="z" -| EVAL first_non_null = COALESCE(x, y, z) -``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-completion.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-completion.txt new file mode 100644 index 0000000000000..6936e5ed3558b --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-completion.txt @@ -0,0 +1,66 @@ +# COMPLETION + +The COMPLETION command provides an interface for text generation tasks using a Large Language Model (LLM). It enables you to send prompts and context to an LLM directly within your queries, supporting a wide range of tasks such as question answering, summarization, translation, content rewriting, and creative generation. Each row processed by the COMPLETION command generates a separate API call to the LLM endpoint. + +## Syntax + +For version 9.2.0 and above: +`COMPLETION [column =] prompt WITH { "inference_id" : "my_inference_endpoint" }` + +For version 9.1.x only: +`COMPLETION [column =] prompt WITH my_inference_endpoint` + +### Parameters + +#### column + +Optional. The name of the output column that will contain the LLM's response. If not specified, the results are stored in a column named `completion`. If the specified column already exists, it will be overwritten. + +#### prompt + +The input text or expression used to prompt the LLM. This can be a string literal or a reference to a column containing text. + +#### my_inference_endpoint + +The ID of the inference endpoint to use for the task. The endpoint must be configured with the `completion` task type. + +## Examples + +Generate a response to the question "What is Elasticsearch?" and store the result in the `completion` column. + +```esql +ROW question = "What is Elasticsearch?" +| COMPLETION question WITH { "inference_id" : "my_inference_endpoint" } +| KEEP question, completion +``` + +Generate a response to the question "What is Elasticsearch?" and store the result in the `answer` column. + +```esql +ROW question = "What is Elasticsearch?" +| COMPLETION answer = question WITH { "inference_id" : "my_inference_endpoint" } +| KEEP question, answer +``` + +Generate a summary for each of the top 10 highest-rated movies using a custom prompt and store the result in the `summary` column. + +```esql +FROM movies +| SORT rating DESC +| LIMIT 10 +| EVAL prompt = CONCAT( + "Summarize this movie using the following information: \n", + "Title: ", title, "\n", + "Synopsis: ", synopsis, "\n", + "Actors: ", MV_CONCAT(actors, ", "), "\n", + ) +| COMPLETION summary = prompt WITH { "inference_id" : "my_inference_endpoint" } +| KEEP title, summary, rating +``` + +## Limitations + +- Every row processed by the COMPLETION command generates a separate API call to the LLM endpoint, which may result in high consumption and costs. +- Starting in version 9.3.0, processing is automatically limited to 100 rows by default to prevent accidental high consumption. This limit can be adjusted or the command can be disabled via cluster settings. +- COMPLETION commands may time out when processing large datasets or complex prompts. The default timeout is 10 minutes, and increasing it depends on your deployment type. +- To use this command, you must deploy your LLM model as an inference endpoint with the `completion` task type. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-concat.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-concat.txt index 26b99dd19cdc8..a8527ac9797e1 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-concat.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-concat.txt @@ -1,6 +1,6 @@ # CONCAT -Concatenates two or more strings. +The CONCAT function combines two or more strings into a single string. ## Syntax @@ -8,26 +8,20 @@ Concatenates two or more strings. ### Parameters -#### `string1` +#### string1 The first string to concatenate. -#### `string2` +#### string2 The second string to concatenate. ## Examples -```esql -FROM address -| KEEP street_1, street_2 -| EVAL fullstreet = CONCAT(street_1, street_2) -``` - -CONCAT supports any number of string parameters. The following example concatenates the `first_name` and `last_name` fields with a space in between: +Combines the first and last names of employees into a single full name field: ```esql FROM employees | KEEP first_name, last_name | EVAL fullname = CONCAT(first_name, " ", last_name) -``` +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-contains.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-contains.txt new file mode 100644 index 0000000000000..63417b156a0c6 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-contains.txt @@ -0,0 +1,26 @@ +# CONTAINS + +The CONTAINS function returns a boolean value indicating whether a specified keyword substring exists within another string. If either parameter is null, the function returns null. + +## Syntax + +`CONTAINS(string, substring)` + +### Parameters + +#### string + +The input string to check against. If this value is null, the function returns null. + +#### substring + +The substring to search for within the input string. If this value is null, the function returns null. + +## Examples + +Checks whether the substring "ll" is present in the string "hello" and stores the result in a new column. + +```esql +ROW a = "hello" +| EVAL has_ll = CONTAINS(a, "ll") +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-copy_sign.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-copy_sign.txt new file mode 100644 index 0000000000000..2f788bf1ad966 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-copy_sign.txt @@ -0,0 +1,26 @@ +# COPY_SIGN + +Returns a value with the magnitude of the first argument and the sign of the second argument. This function is similar to Java's Math.copySign(double magnitude, double sign) and IEEE 754's `copysign`. + +## Syntax + +`COPY_SIGN(magnitude, sign)` + +### Parameters + +#### magnitude + +The expression providing the magnitude of the result. Must be a numeric type. + +#### sign + +The expression providing the sign of the result. Must be a numeric type. + +## Examples + +Calculates a new field `cs1` that takes the absolute value of `salary` but applies the sign of the smallest value in `salary_change` for each employee. + +```esql +FROM employees +| EVAL cs1 = COPY_SIGN(salary, LEAST(salary_change)) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cos.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cos.txt index 5eb5eb5d0348a..fd013eb52e171 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cos.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cos.txt @@ -1,6 +1,6 @@ # COS -The `COS` function returns the cosine of a given angle. +Returns the cosine of an angle, where the angle is specified in radians. ## Syntax @@ -8,15 +8,15 @@ The `COS` function returns the cosine of a given angle. ### Parameters -#### `angle` +#### angle -An angle, in radians. If `null`, the function returns `null`. +An angle in radians. If the value is `null`, the function returns `null`. ## Examples +Calculates the cosine of the value in column `a` (which is set to 1.8 radians) and stores the result in a new column called `cos`: + ```esql ROW a=1.8 -| EVAL cos = COS(a) -``` - -Calculate the cosine of the angle `1.8` radians. \ No newline at end of file +| EVAL cos=COS(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cosh.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cosh.txt index b2b1974b6b5f4..72ee5f89b60d4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cosh.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-cosh.txt @@ -14,9 +14,9 @@ Numeric expression. If `null`, the function returns `null`. ## Examples +Calculates the hyperbolic cosine of the value in column `a` (which is set to 1.8). + ```esql ROW a=1.8 -| EVAL cosh = COSH(a) -``` - -Calculate the hyperbolic cosine of the value `1.8`. \ No newline at end of file +| EVAL cosh=COSH(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count.txt index 8b6f16e243ea9..7f812220a7ac5 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count.txt @@ -1,6 +1,6 @@ -## COUNT +# COUNT -The `COUNT` function returns the total number of input values. If no field is specified, it counts the number of rows. +The COUNT function returns the total number of input values. If no field is specified, it counts the number of rows. ## Syntax @@ -8,22 +8,20 @@ The `COUNT` function returns the total number of input values. If no field is sp ### Parameters -#### `field` +#### field -An expression that outputs values to be counted. If omitted, the function is equivalent to `COUNT(*)`, which counts the number of rows. +Expression that outputs values to be counted. If omitted, COUNT returns the number of rows, equivalent to `COUNT(*)`. ## Examples -### Count specific field values +Counts the number of non-null values in the `height` field from the `employees` data. ```esql FROM employees | STATS COUNT(height) ``` -Count the number of non-null values in the `height` field. - -### Count the number of rows +Counts the total number of rows for each language in the `employees` data, sorting the results by language in descending order. ```esql FROM employees @@ -31,18 +29,14 @@ FROM employees | SORT languages DESC ``` -Count the total number of rows grouped by the `languages` field and sort the results in descending order. - -### Count values using inline functions +Counts the number of elements produced by splitting the `words` string on the semicolon character. ```esql ROW words="foo;bar;baz;qux;quux;foo" | STATS word_count = COUNT(SPLIT(words, ";")) ``` -Count the number of elements in a string split by the `;` delimiter. - -### Count values based on a condition +Counts the number of times the value of `n` is less than 0 by filtering with `WHERE`. ```esql ROW n=1 @@ -50,13 +44,9 @@ ROW n=1 | STATS COUNT(n) ``` -Count the number of rows where the value of `n` is less than 0. - -### Count based on two different expressions +Counts the number of times `n` is greater than 0 and less than 0, respectively, by evaluating two different expressions in the same data stream. ```esql ROW n=1 | STATS COUNT(n > 0 OR NULL), COUNT(n < 0 OR NULL) -``` - -Count the number of rows where `n > 0` and `n < 0` separately. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct.txt index 965faa5327f9e..fe922bb221404 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct.txt @@ -1,6 +1,6 @@ -## COUNT_DISTINCT +# COUNT_DISTINCT -The `COUNT_DISTINCT` function returns the approximate number of distinct values in a column or expression. +The COUNT_DISTINCT function returns the approximate number of distinct values in a column or expression. It uses the HyperLogLog++ algorithm, which provides configurable precision and fixed memory usage, making it suitable for high-cardinality sets and large datasets. Counts are approximate, and the accuracy depends on the configured precision threshold and the dataset. ## Syntax @@ -8,45 +8,39 @@ The `COUNT_DISTINCT` function returns the approximate number of distinct values ### Parameters -#### `field` +#### field -The column or literal for which to count the number of distinct values. +Column or literal for which to count the number of distinct values. -#### `precision` +#### precision -(Optional) The precision threshold. The maximum supported value is 40,000. Thresholds above this value will behave as if set to 40,000. The default value is 3,000. Higher precision thresholds may increase memory usage and processing time. +Optional. Precision threshold that controls the trade-off between memory usage and accuracy. The maximum supported value is 40000; values above this will be treated as 40000. The default value is 3000. ## Examples -Counting distinct values in multiple columns +Counts the number of unique values in the `ip0` and `ip1` columns from the `hosts` dataset. ```esql FROM hosts | STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1) ``` -This example calculates the approximate number of distinct values in the `ip0` and `ip1` columns. - -Configuring the precision threshold +Calculates the distinct count for `ip0` with a high precision threshold and for `ip1` with a low precision threshold. ```esql FROM hosts | STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5) ``` -This example demonstrates how to specify a precision threshold for each column. The `ip0` column uses a high precision threshold of 80,000, while the `ip1` column uses a low threshold of 5. - -Counting distinct values from a split string +Counts the number of unique words in a semicolon-separated string after splitting it into individual words. ```esql ROW words="foo;bar;baz;qux;quux;foo" | STATS distinct_word_count = COUNT_DISTINCT(SPLIT(words, ";")) ``` -This example splits the `words` string into multiple values using the `SPLIT` function and counts the unique values. The result is the number of distinct words in the string. - -### Notes +## Limitations -- Computing exact counts requires loading values into a set and returning its size, which doesn't scale well for high-cardinality sets or large values due to memory usage and communication overhead. -- The HyperLogLog++ algorithm's accuracy depends on the leading zeros of hashed values, and the exact distributions of hashes in a dataset can affect the accuracy of the cardinality. -- Even with a low threshold, the error remains very low (1-6%) even when counting millions of items. +- Counts are approximate and not exact. +- The maximum supported precision threshold is 40000; higher values have no additional effect. +- Accuracy depends on the dataset and the configured precision threshold. For low thresholds, error rates remain low (1-6%) even for large cardinalities. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct_over_time.txt new file mode 100644 index 0000000000000..5eecf084cf32b --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_distinct_over_time.txt @@ -0,0 +1,32 @@ +# COUNT_DISTINCT_OVER_TIME + +The COUNT_DISTINCT_OVER_TIME function calculates the count of distinct values for a specified field over time. + +## Syntax + +`COUNT_DISTINCT_OVER_TIME(field, precision)` + +### Parameters + +#### field + +The metric field for which to calculate the distinct count over time. + +#### precision + +Optional. Sets the precision threshold for the calculation. The maximum supported value is 40000; values above this will behave as if set to 40000. The default is 3000. For more details, refer to `AGG-COUNT-DISTINCT-APPROXIMATE`. + +## Examples + +Calculate the distinct count of `network.cost` over time for each cluster and 1-minute time bucket, showing both the default and a custom precision threshold. + +```esql +TS k8s +| STATS distincts=COUNT_DISTINCT(COUNT_DISTINCT_OVER_TIME(network.cost)), + distincts_imprecise=COUNT_DISTINCT(COUNT_DISTINCT_OVER_TIME(network.cost, 100)) + BY cluster, time_bucket = TBUCKET(1minute) +``` + +## Limitations + +- The maximum supported precision value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_over_time.txt new file mode 100644 index 0000000000000..59ebe12f6e9fe --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-count_over_time.txt @@ -0,0 +1,27 @@ +# COUNT_OVER_TIME + +Calculates the count of values for a specified field within a given time window. + +## Syntax + +`COUNT_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the count for. + +#### window + +The time window over which to compute the count. + +## Examples + +Counts the number of `network.cost` values for each cluster in one-minute intervals. + +```esql +TS k8s +| STATS count=COUNT(COUNT_OVER_TIME(network.cost)) + BY cluster, time_bucket = BUCKET(@timestamp,1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_diff.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_diff.txt index 84943f18bb487..b02582ddf1c8f 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_diff.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_diff.txt @@ -1,6 +1,6 @@ -## DATE_DIFF +# DATE_DIFF -Calculates the difference between two timestamps in multiples of a specified unit. If the start timestamp is later than the end timestamp, the result will be negative. +The DATE_DIFF function subtracts the `startTimestamp` from the `endTimestamp` and returns the difference in multiples of the specified `unit`. If `startTimestamp` is later than `endTimestamp`, the result will be negative. ## Syntax @@ -8,45 +8,35 @@ Calculates the difference between two timestamps in multiples of a specified uni ### Parameters -#### `unit` +#### unit -The unit of time for the difference calculation. +Time difference unit. -#### `startTimestamp` +#### startTimestamp -A string representing the starting timestamp. +A string representing a start timestamp. -#### `endTimestamp` +#### endTimestamp -A string representing the ending timestamp. +A string representing an end timestamp. ## Examples -Calculate the difference in microseconds between two timestamps: +Calculates the difference in microseconds between two datetime values. ```esql -ROW date1 = TO_DATETIME("2023-12-02T11:00:00.000Z"), date2 = TO_DATETIME("2023-12-02T11:00:00.001Z") +ROW date1 = TO_DATETIME("2023-12-02T11:00:00.000Z"), + date2 = TO_DATETIME("2023-12-02T11:00:00.001Z") | EVAL dd_ms = DATE_DIFF("microseconds", date1, date2) ``` -Calculate the difference in calendar units (e.g., years) between timestamps. Only fully elapsed units are counted. To include remainders, switch to a smaller unit and perform additional calculations: +Calculates the difference in years between several datetime values, counting only fully elapsed calendar years. ```esql -ROW end_23=TO_DATETIME("2023-12-31T23:59:59.999Z"), - start_24=TO_DATETIME("2024-01-01T00:00:00.000Z"), - end_24=TO_DATETIME("2024-12-31T23:59:59.999") +ROW end_23 = TO_DATETIME("2023-12-31T23:59:59.999Z"), + start_24 = TO_DATETIME("2024-01-01T00:00:00.000Z"), + end_24 = TO_DATETIME("2024-12-31T23:59:59.999") | EVAL end23_to_start24 = DATE_DIFF("year", end_23, start_24) -| EVAL end23_to_end24 = DATE_DIFF("year", end_23, end_24) -| EVAL start_to_end_24 = DATE_DIFF("year", start_24, end_24) -``` - -## Limitations - -- The function’s supported units and ES|QL’s time span literals are distinct and not interchangeable. -- Supported abbreviations align with other established implementations but may differ from Elasticsearch’s date-time nomenclature. - -## Notes - -- If the `startTimestamp` is later than the `endTimestamp`, the function will return a negative value. - -- It's important to note that while there is some overlap between the units supported by this function and ESQL's time span literals, these sets are not interchangeable. Also, the abbreviations supported by this function are shared with other established products and may not align with the date-time nomenclature used by Elasticsearch. +| EVAL end23_to_end24 = DATE_DIFF("year", end_23, end_24) +| EVAL start_to_end_24 = DATE_DIFF("year", start_24, end_24) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_extract.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_extract.txt index 30647f30e905a..689f1f6f7a730 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_extract.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_extract.txt @@ -1,6 +1,6 @@ # DATE_EXTRACT -Extracts specific parts of a date, such as the year, month, day, or hour. +The DATE_EXTRACT function retrieves specific components from a date, such as the year, month, day, or hour. ## Syntax @@ -8,63 +8,25 @@ Extracts specific parts of a date, such as the year, month, day, or hour. ### Parameters -#### `datePart` +#### datePart -The part of the date to extract. Supported values include: +Specifies which part of the date to extract. Possible values include: `aligned_day_of_week_in_month`, `aligned_day_of_week_in_year`, `aligned_week_of_month`, `aligned_week_of_year`, `ampm_of_day`, `clock_hour_of_ampm`, `clock_hour_of_day`, `day_of_month`, `day_of_week`, `day_of_year`, `epoch_day`, `era`, `hour_of_ampm`, `hour_of_day`, `instant_seconds`, `micro_of_day`, `micro_of_second`, `milli_of_day`, `milli_of_second`, `minute_of_day`, `minute_of_hour`, `month_of_year`, `nano_of_day`, `nano_of_second`, `offset_seconds`, `proleptic_month`, `second_of_day`, `second_of_minute`, `year`, or `year_of_era`. If set to `null`, the function returns `null`. -- `aligned_day_of_week_in_month` -- `aligned_day_of_week_in_year` -- `aligned_week_of_month` -- `aligned_week_of_year` -- `ampm_of_day` -- `clock_hour_of_ampm` -- `clock_hour_of_day` -- `day_of_month` -- `day_of_week` -- `day_of_year` -- `epoch_day` -- `era` -- `hour_of_ampm` -- `hour_of_day` -- `instant_seconds` -- `micro_of_day` -- `micro_of_second` -- `milli_of_day` -- `milli_of_second` -- `minute_of_day` -- `minute_of_hour` -- `month_of_year` -- `nano_of_day` -- `nano_of_second` -- `offset_seconds` -- `proleptic_month` -- `second_of_day` -- `second_of_minute` -- `year` -- `year_of_era` +#### date -Refer to `java.time.temporal.ChronoField` for detailed descriptions of these values. If `null`, the function returns `null`. - -#### `date` - -The date expression from which to extract the specified part. If `null`, the function returns `null`. +The date expression from which to extract the specified part. If set to `null`, the function returns `null`. ## Examples -### Extracting the Year from a Date - -Extract the year from a given date: - +Extracts the year component from a date parsed from the string "2022-05-06". ```esql ROW date = DATE_PARSE("yyyy-MM-dd", "2022-05-06") | EVAL year = DATE_EXTRACT("year", date) ``` -### Filtering Events Outside Business Hours - -Retrieve all events that occurred outside of business hours (before 9 AM or after 5 PM): - +Finds all events in the `sample_data` index that occurred before 9 AM or after 5 PM based on the `@timestamp` field. ```esql FROM sample_data -| WHERE DATE_EXTRACT("hour_of_day", @timestamp) < 9 AND DATE_EXTRACT("hour_of_day", @timestamp) >= 17 +| WHERE DATE_EXTRACT("hour_of_day", @timestamp) < 9 + AND DATE_EXTRACT("hour_of_day", @timestamp) >= 17 ``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_format.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_format.txt index 42b0ea4e2672b..e2c758d875122 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_format.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_format.txt @@ -1,6 +1,6 @@ # DATE_FORMAT -The `DATE_FORMAT` function returns a string representation of a date in the specified format. +The DATE_FORMAT function returns a string representation of a date using the specified format. If no format is provided, it defaults to `yyyy-MM-dd'T'HH:mm:ss.SSSZ`. ## Syntax @@ -8,25 +8,20 @@ The `DATE_FORMAT` function returns a string representation of a date in the spec ### Parameters -#### `dateFormat` +#### dateFormat -- **Optional** -- Specifies the date format. If no format is provided, the default format `yyyy-MM-dd'T'HH:mm:ss.SSSZ` is used. -- If `null`, the function returns `null`. +Optional. Specifies the date format to use. If not provided, the default format is `yyyy-MM-dd'T'HH:mm:ss.SSSZ`. If set to `null`, the function returns `null`. -#### `date` +#### date -- A date expression. -- If `null`, the function returns `null`. +The date expression to format. If set to `null`, the function returns `null`. ## Examples -Formatting a date to `yyyy-MM-dd` +Formats the `hire_date` column as a string in the `yyyy-MM-dd` format and adds it as a new column called `hired`: ```esql FROM employees | KEEP first_name, last_name, hire_date | EVAL hired = DATE_FORMAT("yyyy-MM-dd", hire_date) -``` - -This example formats the `hire_date` field into the `yyyy-MM-dd` format and stores the result in a new column named `hired`. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_parse.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_parse.txt index ae615fad8c9ab..60dd18dfa5c48 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_parse.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_parse.txt @@ -1,28 +1,38 @@ # DATE_PARSE -Parses a date string into a date object using the specified format. +The DATE_PARSE function returns a date by parsing a string using the format specified in the first argument. ## Syntax -`DATE_PARSE(datePattern, dateString)` +`DATE_PARSE(datePattern, dateString, options, time_zone, locale)` ### Parameters -#### `datePattern` +#### datePattern -The date format. Refer to the `DateTimeFormatter` documentation for the syntax. If `null`, the function returns `null`. +The date format to use for parsing. Refer to the `DateTimeFormatter` documentation for the syntax. If `null`, the function returns `null`. -#### `dateString` +#### dateString -A date expression as a string. If `null` or an empty string, the function returns `null`. +The date expression as a string. If `null` or an empty string, the function returns `null`. -## Examples +#### options + +Optional. Additional options for date parsing, allowing you to specify time zone and locale as function named parameters. + +#### time_zone + +Coordinated Universal Time (UTC) offset or IANA time zone used to convert date values in the query string to UTC. -Parsing a date string +#### locale +The locale to use when parsing the date, relevant when parsing month names or week days. + +## Examples + +Parse a date string in the format "yyyy-MM-dd" and convert it to a date value. ```esql ROW date_string = "2022-05-06" | EVAL date = DATE_PARSE("yyyy-MM-dd", date_string) ``` - -This example parses the string `"2022-05-06"` into a date object using the format `"yyyy-MM-dd"`. +Calculate a date value from a string formatted as "yyyy-MM-dd". diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_trunc.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_trunc.txt index 78aaa51d10956..772fe86b92fd3 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_trunc.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-date_trunc.txt @@ -1,6 +1,6 @@ -## DATE_TRUNC +# DATE_TRUNC -The `DATE_TRUNC` function rounds down a date to the closest specified interval. +The DATE_TRUNC function rounds down a date to the closest interval since the epoch, which starts at `0001-01-01T00:00:00Z`. ## Syntax @@ -8,17 +8,17 @@ The `DATE_TRUNC` function rounds down a date to the closest specified interval. ### Parameters -#### `interval` +#### interval -The interval to which the date is rounded down, expressed using the timespan literal syntax. +Interval expressed using the timespan literal syntax. -#### `date` +#### date -The date expression to be truncated. +Date expression. ## Examples -Truncate hire dates to the year +Rounds each employee's hire date down to the nearest year and displays their first name, last name, and original hire date. ```esql FROM employees @@ -26,9 +26,7 @@ FROM employees | EVAL year_hired = DATE_TRUNC(1 year, hire_date) ``` -This example truncates the `hire_date` field to the beginning of the year and stores the result in a new column named `year_hired`. - -Number of hires per year +Counts the number of employees hired each year by truncating hire dates to the year and aggregating by year. ```esql FROM employees @@ -37,16 +35,12 @@ FROM employees | SORT year ``` -This example calculates the number of hires per year by truncating the `hire_date` field to the year and grouping the results. - -Hourly error rate +Calculates the average error rate for each hour by marking error messages, truncating timestamps to the hour, and averaging the error flag. ```esql FROM sample_data | EVAL error = CASE(message LIKE "*error*", 1, 0) | EVAL hour = DATE_TRUNC(1 hour, @timestamp) -| STATS error_rate = AVG(error) BY hour +| STATS error_rate = AVG(error) by hour | SORT hour -``` - -This example calculates the hourly error rate by truncating the `@timestamp` field to the hour and averaging the `error` values for each hour. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-day_name.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-day_name.txt new file mode 100644 index 0000000000000..747f606efb3bc --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-day_name.txt @@ -0,0 +1,22 @@ +# DAY_NAME + +Returns the name of the weekday for a given date, using the configured Locale. + +## Syntax + +`DAY_NAME(date)` + +### Parameters + +#### date + +Date expression to extract the weekday name from. If `null`, the function returns `null`. + +## Examples + +Extracts the weekday name from a specific date value. + +```esql +ROW dt = to_datetime("1953-09-02T00:00:00.000Z") +| EVAL weekday = DAY_NAME(dt); +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-decay.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-decay.txt new file mode 100644 index 0000000000000..70b1b0347ba51 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-decay.txt @@ -0,0 +1,49 @@ +# DECAY + +Calculates a relevance score that decreases based on the distance of a numeric, spatial, or date value from a target origin, using configurable decay functions. The score ranges from 0 to 1, depending on how far the field value is from the specified origin. The distance can be numeric, spatial, or temporal, depending on the data type. Additional options for the decay function can be specified using function named parameters. For spatial queries, scale and offset for geo points use distance units (such as "10km" or "5mi"), while cartesian points use numeric values. For date queries, scale and offset use time duration values. For numeric queries, numeric values are used. + +## Syntax + +`DECAY(value, origin, scale, options, offset, type, decay)` + +### Parameters + +#### value + +The input value to apply decay scoring to. + +#### origin + +The central point from which distances are calculated. + +#### scale + +The distance from the origin where the function returns the decay value. + +#### options + +Function named parameters that allow you to specify additional options for the decay function. + +#### offset + +(Optional) The distance from the origin where no decay occurs. Can be a double, integer, long, time_duration, keyword, or text. + +#### type + +(Optional) The decay function to use. Supported values are: linear, exponential, or gaussian. + +#### decay + +(Optional) The multiplier value returned at the scale distance from the origin. + +## Examples + +Calculates a decay score for each employee's salary using a linear decay function, with 0 as the origin, 100,000 as the scale, an offset of 5, and a decay multiplier of 0.5, then sorts the results by the decay score in descending order. + +```esql +FROM employees +| EVAL decay_result = decay(salary, 0, 100000, {"offset": 5, "decay": 0.5, "type": "linear"}) +| SORT decay_result DESC +``` + +This example calculates a decay score for each employee's salary, using 0 as the origin, 100,000 as the scale, an offset of 5, a decay multiplier of 0.5, and the linear decay function, then sorts the results by the decay score in descending order. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-delta.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-delta.txt new file mode 100644 index 0000000000000..953fb51cd5af6 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-delta.txt @@ -0,0 +1,27 @@ +# DELTA + +Calculates the absolute change of a gauge field within a specified time window. + +## Syntax + +`DELTA(field, window)` + +### Parameters + +#### field + +The metric field for which to calculate the absolute change. + +#### window + +The time window over which to compute the delta. + +## Examples + +Calculates the sum of absolute changes in the `network.bytes_in` metric for pod "one", grouped by cluster and 10-minute time buckets. + +```esql +TS k8s +| WHERE pod == "one" +| STATS tx = SUM(DELTA(network.bytes_in)) BY cluster, time_bucket = TBUCKET(10minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-deriv.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-deriv.txt new file mode 100644 index 0000000000000..11afeb2d4a835 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-deriv.txt @@ -0,0 +1,27 @@ +# DERIV + +Calculates the derivative over time of a numeric field using linear regression. + +## Syntax + +`DERIV(field, window)` + +### Parameters + +#### field + +The metric field to calculate the derivative for. + +#### window + +The time window over which to compute the derivative. + +## Examples + +Calculates the maximum rate of change (derivative) of the `network.cost` field for each 5-minute time bucket and pod named "three". + +```esql +TS datenanos-k8s +| WHERE pod == "three" +| STATS max_deriv = MAX(DERIV(network.cost)) BY time_bucket = BUCKET(@timestamp,5minute), pod +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-dissect.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-dissect.txt index 56a1885ca1341..0958395f8d3a4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-dissect.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-dissect.txt @@ -1,11 +1,6 @@ -## DISSECT +# DISSECT -The `DISSECT` command is used to extract structured data from a string. It matches the string against a delimiter-based pattern and extracts the specified keys as columns. - -### Use Cases -- **Log Parsing**: Extracting timestamps, log levels, and messages from log entries. -- **Data Transformation**: Converting unstructured text data into structured columns for further analysis. -- **Data Cleaning**: Removing or reformatting specific parts of a string to make the data more usable. +The DISSECT command is used to extract structured data from a string. It matches the string against a delimiter-based pattern and extracts the specified keys as columns. ## Syntax @@ -13,49 +8,33 @@ The `DISSECT` command is used to extract structured data from a string. It match ### Parameters -#### `input` +#### input -The column containing the string you want to structure. If the column has multiple values, `DISSECT` will process each value. +The column containing the string you want to structure. If the column has multiple values, DISSECT will process each value. -#### `pattern` +#### pattern A dissect pattern. If a field name conflicts with an existing column, the existing column is dropped. If a field name is used more than once, only the rightmost duplicate creates a column. -#### `` +#### A string used as the separator between appended values, when using the append modifier. ## Examples -Parsing a string with a timestamp, text, and IP address - -Extracts the `date`, `msg`, and `ip` fields from a structured string. +Extracts the date, message, and IP address from a structured log string and keeps only those fields. ```esql ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip ``` -Converting output to another type - -Converts the `date` field from a string to a datetime type after extracting it. +Parses a log string to extract the date, message, and IP address, then converts the extracted date string to a datetime type. ```esql ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" +| DISSECT a """%{date} - %{msg} - %{ip}""" | KEEP date, msg, ip | EVAL date = TO_DATETIME(date) -``` -In this example, we use the `APPEND_SEPARATOR` to concatenate values with a custom separator: - -```esql -ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" -| DISSECT a "%{date} - %{msg} - %{ip}" APPEND_SEPARATOR=" | " -| KEEP date, msg, ip -``` - -### Limitations -- If a field name conflicts with an existing column, the existing column is dropped. -- If a field name is used more than once, only the rightmost duplicate creates a column. -- DISSECT does not support reference keys. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-drop.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-drop.txt index ad2cf4bd6b1cd..65c96941a9950 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-drop.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-drop.txt @@ -1,6 +1,6 @@ # DROP -The `DROP` command removes one or more columns from the result set. +The DROP command removes one or more columns from the result set. You can specify columns by name or use wildcards to match multiple columns. ## Syntax @@ -10,33 +10,18 @@ The `DROP` command removes one or more columns from the result set. #### columns -A comma-separated list of columns to remove. Supports wildcards. +A comma-separated list of columns to remove. Wildcards are supported to match column names by pattern. ## Examples -Remove a specific column: - +Removes the column named `height` from the results: ```esql FROM employees | DROP height ``` -This example shows how to drop columns that match a more complex pattern using wildcards. - -```esql -FROM employees -| DROP emp_* -``` - -This example demonstrates how to use the `DROP` command in conjunction with other commands like `KEEP` and `SORT`. - +Removes all columns with names that start with `height` using a wildcard: ```esql FROM employees -| KEEP first_name, last_name, height, weight -| DROP weight -| SORT height DESC +| DROP height* ``` - -### Limitations -- The `DROP` command does not support nested fields. -- It cannot be used to drop columns of unsupported types as specified in the ES|QL limitations. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-e.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-e.txt index 08f9aba8a3456..25dd0f32a5f78 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-e.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-e.txt @@ -1,6 +1,6 @@ # E -Returns Euler’s number. +The E function returns Euler’s number, which is approximately 2.718281828459045. ## Syntax @@ -8,12 +8,12 @@ Returns Euler’s number. ### Parameters -This function does not take any parameters. +This function does not require any parameters. ## Examples +Returns a single row containing Euler’s number. + ```esql ROW E() -``` - -This example returns Euler’s number. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ends_with.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ends_with.txt index dbbbbe2f07584..e419d9a4fb8ba 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ends_with.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ends_with.txt @@ -1,6 +1,6 @@ # ENDS_WITH -Determines whether a keyword string ends with a specified suffix and returns a boolean value. +Returns a boolean value indicating whether a keyword string ends with a specified suffix string. ## Syntax @@ -8,20 +8,20 @@ Determines whether a keyword string ends with a specified suffix and returns a b ### Parameters -#### `str` +#### str -String expression. If `null`, the function returns `null`. +The string expression to evaluate. If this value is `null`, the function returns `null`. -#### `suffix` +#### suffix -String expression. If `null`, the function returns `null`. +The string expression to check as the suffix. If this value is `null`, the function returns `null`. ## Examples +Determines whether each employee's last name ends with the letter "d" and adds the result as a new column. + ```esql FROM employees | KEEP last_name | EVAL ln_E = ENDS_WITH(last_name, "d") ``` - -This example checks if the `last_name` column values end with the letter "d" and stores the result in a new column `ln_E`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-enrich.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-enrich.txt index 3a6dfef829a7c..534ea987473a4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-enrich.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-enrich.txt @@ -1,6 +1,6 @@ -## ENRICH +# ENRICH -The `ENRICH` command allows you to add data from existing indices as new columns using an enrich policy. +The ENRICH command allows you to add data from existing indices as new columns using an enrich policy. This is useful for augmenting your data with additional information from other sources. Before using ENRICH, you must create and execute an enrich policy. ## Syntax @@ -8,71 +8,52 @@ The `ENRICH` command allows you to add data from existing indices as new columns ### Parameters -#### `policy` +#### policy -The name of the enrich policy. You must create and execute the enrich policy before using it. +The name of the enrich policy. You need to create and execute the enrich policy first. -#### `mode` +#### mode -(Optional) The mode of the enrich command in cross-cluster queries. Refer to enrich across clusters for more details. +(Optional) The mode of the enrich command in cross cluster usage. -#### `match_field` +#### match_field -(Optional) The field used to match records in the enrich index. If not specified, the match is performed on the column with the same name as the `match_field` defined in the enrich policy. +(Optional) The column used to match records in the enrich index. If not specified, the match is performed on the column with the same name as the match_field defined in the enrich policy. -#### `fieldX` +#### fieldX -(Optional) The enrich fields from the enrich index to be added as new columns. If a column with the same name as the enrich field already exists, it will be replaced. If not specified, all enrich fields defined in the policy are added. Columns with the same name as the enrich fields will be dropped unless renamed. +(Optional) The enrich fields from the enrich index to add as new columns. If a column with the same name as the enrich field already exists, it will be replaced. If not specified, all enrich fields defined in the policy are added. -#### `new_nameX` +#### new_nameX -(Optional) Allows you to rename the columns added for each enrich field. Defaults to the enrich field name. If a column with the same name as the new name already exists, it will be discarded. If a name (new or original) occurs more than once, only the rightmost duplicate creates a column. +(Optional) Allows you to rename the column added for each enrich field. Defaults to the enrich field name. If a column has the same name as the new name, it will be discarded. If a name (new or original) occurs more than once, only the rightmost duplicate creates a new column. ## Examples -Basic usage - -Add a new column for each enrich field defined in the `languages_policy` enrich policy. The match is performed using the `match_field` defined in the policy, requiring the input table to have a column with the same name (`language_code` in this case). - +Enriches a row by adding all fields defined in the `languages_policy` policy, matching on the `language_code` column. ```esql ROW language_code = "1" | ENRICH languages_policy ``` -Using a different match field - -Use a column with a different name than the `match_field` defined in the policy as the match field. - +Enriches a row by matching on the `a` column instead of the default match field defined in the policy. ```esql ROW a = "1" | ENRICH languages_policy ON a ``` -Selecting specific enrich fields - -Explicitly select the enrich fields to be added as columns. - +Enriches a row by matching on the `a` column and adding only the `language_name` field from the enrich index. ```esql ROW a = "1" | ENRICH languages_policy ON a WITH language_name ``` -Renaming added columns - -Rename the columns added using the `WITH` clause. - +Enriches a row by matching on the `a` column and adding the `language_name` field as a column named `name`. ```esql ROW a = "1" | ENRICH languages_policy ON a WITH name = language_name ``` -In case of name collisions, the newly created columns will override existing columns. - ## Limitations -- The `ENRICH` command requires an existing enrich policy to be created and executed beforehand. -- The `match_field` in the `ENRICH` command must match the type defined in the enrich policy. For example: - - A `geo_match` policy requires a `match_field` of type `geo_point` or `geo_shape`. - - A `range` policy requires a `match_field` of type `integer`, `long`, `date`, or `ip`, depending on the range field type in the enrich index. - - For `range` policies, if the `match_field` is of type `KEYWORD`, field values are parsed during query execution. If parsing fails, the output values for that row are set to `null`, and a warning is produced. -- The `geo_match` enrich policy type only supports the `intersects` spatial relation. +Before you can use ENRICH, you must create and execute an enrich policy. If a column name collides with an enrich field or new name, the existing column will be replaced or discarded. If a name occurs more than once, only the rightmost duplicate creates a new column. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-eval.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-eval.txt index 24666bfa2878f..6b31ae7a88232 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-eval.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-eval.txt @@ -1,6 +1,6 @@ # EVAL -The `EVAL` command allows you to add new columns with calculated values to your dataset. +The EVAL command allows you to add new columns to your results by calculating values using expressions, literals, or functions. You can use existing columns or previously defined columns in your calculations. ## Syntax @@ -8,35 +8,17 @@ The `EVAL` command allows you to add new columns with calculated values to your ### Parameters -#### `columnX` +#### columnX -- The name of the column to be added or updated. -- If a column with the same name already exists, it will be replaced by the new column. -- If a column name is used multiple times, only the rightmost definition is applied. +The name of the column to be created or replaced. If a column with the same name already exists, it will be dropped and replaced by the new column. If a column name is used more than once, only the rightmost duplicate creates a column. -#### `valueX` +#### valueX -- The value to assign to the column. This can be a literal, an expression, or a function. -- You can reference columns defined earlier in the same `EVAL` command. - -## Notes - -EVAL supports the following types of functions: -- Mathematical functions -- String functions -- Date-time functions -- Type conversation functions -- Conditional functions and expressions -- Multi-value functions - -Aggregation functions are NOT supported for EVAL. +The value assigned to the column. This can be a literal, an expression, or a function. You can reference columns defined to the left of this one. ## Examples -### Adding new calculated columns - -Add two new columns, `height_feet` and `height_cm`, by performing calculations on the `height` column: - +Calculate height in feet and centimeters for each employee: ```esql FROM employees | SORT emp_no @@ -44,10 +26,7 @@ FROM employees | EVAL height_feet = height * 3.281, height_cm = height * 100 ``` -### Overwriting an existing column - -Replace the `height` column with a new value calculated by converting it to feet: - +Replace the existing height column with its value converted to feet: ```esql FROM employees | SORT emp_no @@ -55,10 +34,7 @@ FROM employees | EVAL height = height * 3.281 ``` -### Adding a column without specifying a name - -If no column name is provided, the new column will be named after the expression itself. For example, this query adds a column named `height*3.281`: - +Add a new column for height in feet without specifying a name, so the column is named after the expression: ```esql FROM employees | SORT emp_no @@ -66,25 +42,9 @@ FROM employees | EVAL height * 3.281 ``` -### Using a column with special characters in subsequent commands - -When a column name contains special characters, enclose it in backticks (`) to reference it in later commands: - +Calculate height in feet and then compute the average height in feet, referencing the column with special characters using backticks: ```esql FROM employees | EVAL height * 3.281 | STATS avg_height_feet = AVG(`height * 3.281`) -``` - -Any number of evaluations can be performed in a single EVAL command - -```esql -FROM triangle -| EVAL cos = COS(angle), tan = TAN(angle), sin = SIN(angle), acos=ACOS(angle), asin=ASIN(angle) -| SORT angle DESC -| LIMIT 10 -``` - -### Limitations -- If a column with the same name already exists, the existing column is dropped. -- If a column name is used more than once, only the rightmost duplicate creates a column. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-exp.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-exp.txt index 571a678aadd63..04053c527c709 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-exp.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-exp.txt @@ -1,6 +1,6 @@ # EXP -Returns the value of e raised to the power of the given number. +Returns the value of e raised to the power of the specified number. ## Syntax @@ -8,15 +8,15 @@ Returns the value of e raised to the power of the given number. ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression to be used as the exponent. If the value is `null`, the function returns `null`. ## Examples +Calculates the exponential value of 5.0 (e^5.0) and stores it in a new column. + ```esql ROW d = 5.0 | EVAL s = EXP(d) -``` - -Calculate e raised to the power of 5.0. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-first_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-first_over_time.txt new file mode 100644 index 0000000000000..91f68d7db1f7c --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-first_over_time.txt @@ -0,0 +1,26 @@ +# FIRST_OVER_TIME + +Calculates the earliest value of a field, with recency determined by the `@timestamp` field. + +## Syntax + +`FIRST_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the earliest value for. + +#### window + +The time window over which to compute the first value. + +## Examples + +Finds the maximum network cost from the earliest recorded value, grouped by cluster and 1-minute time intervals: + +```esql +TS k8s +| STATS max_cost=MAX(FIRST_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-floor.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-floor.txt index 2375a19cf645c..d0e83abda92e6 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-floor.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-floor.txt @@ -1,6 +1,6 @@ # FLOOR -Rounds a number down to the nearest integer. For `double` values, it selects the closest `double` representation of the integer, similar to `Math.floor`. For `long` (including unsigned) and `integer`, this operation has no effect. +Rounds a number down to the nearest integer. For `double` values, it selects the closest `double` value to the integer, similar to Math.floor. For `long` (including unsigned) and `integer`, this operation has no effect. ## Syntax @@ -8,26 +8,15 @@ Rounds a number down to the nearest integer. For `double` values, it selects the ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression to be rounded down. If the value is `null`, the function returns `null`. ## Examples -```esql -ROW a=1.8 -| EVAL a = FLOOR(a) -``` - -Rounds the value `1.8` down to `1`. +Rounds the value 1.8 down to the nearest integer using the FLOOR function: ```esql -FROM employees -| KEEP first_name, last_name, height -| EVAL height_floor = FLOOR(height) +ROW a=1.8 +| EVAL a=FLOOR(a) ``` -Rounds all values in the column `height` down to nearest integer - -## Notes - -- The FLOOR function is a no-operation for `long` (including unsigned) and `integer` types. For `double` type, this function picks the closest `double` value to the integer, similar to the Math.floor method in programming languages. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-fork.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-fork.txt new file mode 100644 index 0000000000000..74699ef5a50d5 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-fork.txt @@ -0,0 +1,32 @@ +# FORK + +The FORK command creates multiple execution branches that operate on the same input data and combines the results into a single output table. A discriminator column (`_fork`) is added to identify which branch each row originated from. This enables hybrid search by combining and scoring results from multiple queries. + +## Syntax + +`FORK ( ) ( ) ... ( )` + +### Parameters + +#### + +A set of processing commands to be executed in each branch. Each branch operates independently on the same input data. + +## Examples + +Selects employees with `emp_no` 10001 and 10002 in separate branches and combines the results, showing the branch origin in the `_fork` column. + +```esql +FROM employees +| FORK ( WHERE emp_no == 10001 ) + ( WHERE emp_no == 10002 ) +| KEEP emp_no, _fork +| SORT emp_no +``` + +## Limitations + +- FORK supports at most 8 execution branches. +- In versions older than 9.3.0, using remote cluster references with FORK is not supported. +- Using more than one FORK command in a query is not supported. +- FORK branches default to `LIMIT 1000` if no `LIMIT` is provided. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from.txt index e0d71fcf2bb39..35d054e36ab7f 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from.txt @@ -1,6 +1,6 @@ -## FROM +# FROM -The `FROM` command retrieves data from a data stream, index, or alias and returns it as a table. Each row in the table represents a document, and each column corresponds to a field that can be accessed by its name. +The FROM command returns a table containing data from a data stream, index, or alias. Each row in the resulting table represents a document, and each column corresponds to a field that can be accessed by its name. By default, queries without an explicit LIMIT use an implicit limit of 1000 rows. ## Syntax @@ -8,44 +8,46 @@ The `FROM` command retrieves data from a data stream, index, or alias and return ### Parameters -#### `index_pattern` +#### index_pattern -A list of indices, data streams, or aliases. Supports wildcards and date math. +A list of indices, data streams, or aliases. Wildcards and date math are supported. -#### `fields` +#### fields -A comma-separated list of metadata fields to retrieve. +A comma-separated list of metadata fields to retrieve. This parameter is optional. ## Examples -### Basic Example - -Retrieve all documents from the `employees` index: - +Returns all documents from the `employees` index. ```esql FROM employees ``` -### Querying Multiple Data Streams, Indices, or Aliases - -Query multiple data streams, indices, or aliases using a comma-separated list or wildcards: +Retrieves documents from today's index using date math in the index name. +```esql +FROM +``` +Fetches documents from multiple data streams, indices, or aliases using a comma-separated list or wildcards. ```esql FROM employees-00001,other-employees-* ``` -### Querying Across Clusters - -Query data streams and indices on remote clusters using the format `:`: - +Queries data streams and indices located on remote clusters using the specified cluster and target format. ```esql FROM cluster_one:employees-00001,cluster_two:other-employees-* ``` -### Using the `METADATA` Directive - -Enable metadata fields by using the optional `METADATA` directive: +Retrieves documents from the `employees` index and includes the `_id` metadata field. +```esql +FROM employees METADATA _id +``` +Accesses indices with special characters in their names by escaping them with double or triple double quotes. ```esql -FROM employees METADATA _id, _score +FROM "this=that", """this[that""" ``` + +## Limitations + +By default, a query without an explicit LIMIT uses an implicit limit of 1000 rows. For example, `FROM employees` is executed as `FROM employees | LIMIT 1000`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from_base64.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from_base64.txt index 19090768a9db4..787ccea49293d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from_base64.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-from_base64.txt @@ -1,15 +1,23 @@ -## FROM_BASE64 +# FROM_BASE64 -Decodes a base64 string. +The FROM_BASE64 command decodes a base64-encoded string. -### Examples +## Syntax + +`FROM_BASE64(string)` + +### Parameters + +#### string + +A base64 string to be decoded. + +## Examples + +Decodes the base64 string in column `a` and stores the decoded result in a new column `d`. ```esql ROW a = "ZWxhc3RpYw==" | EVAL d = FROM_BASE64(a) ``` - -```esql -ROW encoded = "U29tZSBzYW1wbGUgdGV4dA==" -| EVAL decoded = FROM_BASE64(encoded) -``` \ No newline at end of file +This example decodes the base64 string in column `a` and stores the result in column `d`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-fuse.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-fuse.txt new file mode 100644 index 0000000000000..18997bcb49633 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-fuse.txt @@ -0,0 +1,81 @@ +# FUSE + +The FUSE command merges rows from multiple result sets and assigns new relevance scores. It enables hybrid search by combining and scoring results from multiple queries, typically used together with the FORK command. FUSE works by merging rows with matching key column values and assigning new relevance scores using the specified algorithm and values from the group and score columns. + +## Syntax + +Use default parameters: +`FUSE` + +Specify custom parameters: +`FUSE SCORE BY GROUP BY KEY BY WITH ` + +### Parameters + +#### fuse_method + +Defaults to `RRF`. Can be either `RRF` (Reciprocal Rank Fusion) or `LINEAR` (linear combination of scores). Specifies the method used to assign new relevance scores. + +#### options + +Options for the selected fuse method. + +- For `RRF`: + - `rank_constant`: Defaults to `60`. Represents the constant used in the RRF formula. + - `weights`: Defaults to `{}`. Allows setting different weights for RRF scores based on group column values. + +- For `LINEAR`: + - `normalizer`: Defaults to `none`. Can be `none` or `minmax`. Specifies the score normalization method. + - `weights`: Defaults to `{}`. Allows setting different weights for scores based on group column values. + +#### score_column + +Defaults to `_score`. Specifies which column to use for retrieving relevance scores of the input rows and where to output the new relevance scores of the merged rows. + +#### group_column + +Defaults to `_fork`. Specifies which column represents the result set. + +#### key_columns + +Defaults to `_id, _index`. Rows with matching key column values are merged. + +## Examples + +Calculate relevance scores using Reciprocal Rank Fusion (RRF) to merge results from lexical and semantic queries: +```esql +FROM books METADATA _id, _index, _score +| FORK (WHERE title:"Shakespeare" | SORT _score DESC) + (WHERE semantic_title:"Shakespeare" | SORT _score DESC) +| FUSE +``` + +Combine scores from lexical and semantic queries using a linear combination method: +```esql +FROM books METADATA _id, _index, _score +| FORK (WHERE title:"Shakespeare" | SORT _score DESC) + (WHERE semantic_title:"Shakespeare" | SORT _score DESC) +| FUSE LINEAR +``` + +Apply minmax normalization to scores from each result set before combining them with a linear method: +```esql +FROM books METADATA _id, _index, _score +| FORK (WHERE title:"Shakespeare" | SORT _score DESC) + (WHERE semantic_title:"Shakespeare" | SORT _score DESC) +| FUSE LINEAR WITH { "normalizer": "minmax" } +``` + +Use custom weights for each query branch and minmax normalization to control the influence of each result set when combining scores: +```esql +FROM books METADATA _id, _index, _score +| FORK (WHERE title:"Shakespeare" | SORT _score DESC) + (WHERE semantic_title:"Shakespeare" | SORT _score DESC) +| FUSE LINEAR WITH { "weights": { "fork1": 0.7, "fork2": 0.3 }, "normalizer": "minmax" } +``` + +## Limitations + +- FUSE assumes that key columns are single valued. If key columns are multivalued, FUSE can produce unreliable relevance scores. +- FUSE automatically assigns a score value of NULL if the score column or group column are multivalued. +- FUSE assumes that the combination of key columns and group column is unique. If not, FUSE can produce unreliable relevance scores. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-geometry functions.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-geometry functions.txt new file mode 100644 index 0000000000000..4a3df828ddfc3 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-geometry functions.txt @@ -0,0 +1,93 @@ +# ST_X + +The ST_X command extracts the `x` coordinate from a supplied point. For points of type `geo_point`, this is equivalent to extracting the longitude value. + +## Syntax + +`ST_X(point)` + +### Parameters + +#### point + +Expression of type `geo_point` or `cartesian_point`. If the value is `null`, the function returns `null`. + +## Examples + +Extracts the x (longitude) and y (latitude) coordinates from a geo_point. +```esql +ROW point = TO_GEOPOINT("POINT(42.97109629958868 14.7552534006536)") +| EVAL x = ST_X(point), y = ST_Y(point) +``` + +# ST_Y + +The ST_Y command extracts the `y` coordinate from a supplied point. For points of type `geo_point`, this is equivalent to extracting the latitude value. + +## Syntax + +`ST_Y(point)` + +### Parameters + +#### point + +Expression of type `geo_point` or `cartesian_point`. If the value is `null`, the function returns `null`. + +## Examples + +Extracts the x (longitude) and y (latitude) coordinates from a geo_point. +```esql +ROW point = TO_GEOPOINT("POINT(42.97109629958868 14.7552534006536)") +| EVAL x = ST_X(point), y = ST_Y(point) +``` + +# ST_NPOINTS + +The ST_NPOINTS command counts the number of points in the supplied geometry. + +## Syntax + +`ST_NPOINTS(geometry)` + +### Parameters + +#### geometry + +Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If the value is `null`, the function returns `null`. + +## Examples + +Counts the number of points in the city boundary geometry for the airport with abbreviation "CPH". +```esql +FROM airport_city_boundaries +| WHERE abbrev == "CPH" +| EVAL points = ST_NPOINTS(city_boundary) +| KEEP abbrev, airport, points +``` + +# ST_SIMPLIFY + +The ST_SIMPLIFY command simplifies the input geometry by applying the Douglas-Peucker algorithm with a specified tolerance. Vertices within the tolerance distance from the simplified shape are removed. The resulting geometry may be invalid, even if the original input was valid. + +## Syntax + +`ST_SIMPLIFY(geometry, tolerance)` + +### Parameters + +#### geometry + +Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If the value is `null`, the function returns `null`. + +#### tolerance + +Tolerance for the geometry simplification, in the units of the input spatial reference system (SRS). + +## Examples + +Simplifies a polygon geometry using a tolerance of 0.7 units. +```esql +ROW wkt = "POLYGON ((7.998 53.827, 9.470 53.068, 15.754 53.801, 16.523 57.160, 11.162 57.868, 8.064 57.445, 6.219 55.317, 7.998 53.827))" +| EVAL simplified = ST_SIMPLIFY(TO_GEOSHAPE(wkt), 0.7) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-geospatial predicates.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-geospatial predicates.txt new file mode 100644 index 0000000000000..8f49afbeb82f5 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-geospatial predicates.txt @@ -0,0 +1,67 @@ +# GEOSPATIAL PREDICATES + +Geospatial predicates are functions used to perform spatial operations and comparisons between geometric objects, such as points, shapes, and geo-grids. These commands allow you to compute distances, check for intersections, containment, and spatial relationships between geometries. + +## Syntax + +`ST_DISTANCE(geomA, geomB)` + +`ST_INTERSECTS(geomA, geomB)` + +`ST_DISJOINT(geomA, geomB)` + +`ST_CONTAINS(geomA, geomB)` + +`ST_WITHIN(geomA, geomB)` + +### Parameters + +#### geomA + +Expression representing a geometry. Supported types include `geo_point`, `cartesian_point`, `geo_shape`, `cartesian_shape`, or geo-grid values such as `geohash`, `geotile`, `geohex` (for ST_INTERSECTS and ST_DISJOINT). If `null`, the function returns `null`. + +#### geomB + +Expression representing a geometry. Supported types include `geo_point`, `cartesian_point`, `geo_shape`, `cartesian_shape`, or geo-grid values such as `geohash`, `geotile`, `geohex` (for ST_INTERSECTS and ST_DISJOINT). If `null`, the function returns `null`. The coordinate system of `geomB` must match that of `geomA`; mixing `geo_*` and `cartesian_*` types is not allowed. + +## Examples + +Calculates the distance between the airport's location and its city location for the airport with abbreviation "CPH". + +```esql +FROM airports +| WHERE abbrev == "CPH" +| EVAL distance = ST_DISTANCE(location, city_location) +| KEEP abbrev, name, location, city_location, distance +``` + +Filters airports whose location intersects with a specified polygon. + +```esql +FROM airports +| WHERE ST_INTERSECTS(location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))")) +``` + +Finds airport city boundaries that are completely disjoint from a large specified polygon. + +```esql +FROM airport_city_boundaries +| WHERE ST_DISJOINT(city_boundary, TO_GEOSHAPE("POLYGON((-10 -60, 120 -60, 120 60, -10 60, -10 -60))")) +| KEEP abbrev, airport, region, city, city_location +``` + +Returns airport city boundaries that fully contain a given polygon. + +```esql +FROM airport_city_boundaries +| WHERE ST_CONTAINS(city_boundary, TO_GEOSHAPE("POLYGON((109.35 18.3, 109.45 18.3, 109.45 18.4, 109.35 18.4, 109.35 18.3))")) +| KEEP abbrev, airport, region, city, city_location +``` + +Selects airport city boundaries that are entirely within a specified polygon. + +```esql +FROM airport_city_boundaries +| WHERE ST_WITHIN(city_boundary, TO_GEOSHAPE("POLYGON((109.1 18.15, 109.6 18.15, 109.6 18.65, 109.1 18.65, 109.1 18.15))")) +| KEEP abbrev, airport, region, city, city_location +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-greatest.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-greatest.txt index 2ec2f1bee371d..ebd409425fe1d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-greatest.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-greatest.txt @@ -1,6 +1,6 @@ # GREATEST -Returns the maximum value from multiple columns. This function is similar to `MV_MAX` but is designed to operate on multiple columns simultaneously. +The GREATEST function returns the maximum value from multiple columns. When used with string fields, it returns the last string in alphabetical order. For boolean columns, it returns `true` if any value is `true`. ## Syntax @@ -8,26 +8,18 @@ Returns the maximum value from multiple columns. This function is similar to `MV ### Parameters -#### `first` +#### first The first column to evaluate. -#### `rest` +#### rest The remaining columns to evaluate. ## Examples -Finding the maximum value between two columns - +Finds the maximum value between columns `a` and `b` and stores it in column `g`. ```esql ROW a = 10, b = 20 | EVAL g = GREATEST(a, b) -``` - -This example evaluates the maximum value between columns `a` and `b`, resulting in `g = 20`. - -## Notes - -- When applied to `keyword` or `text` fields, the function returns the last string in alphabetical order. -- When applied to `boolean` columns, the function returns `true` if any of the values are `true`. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grid encoding functions.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grid encoding functions.txt new file mode 100644 index 0000000000000..834005daeee85 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grid encoding functions.txt @@ -0,0 +1,70 @@ +# GRID ENCODING FUNCTIONS + +The GRID ENCODING FUNCTIONS calculate spatial grid encodings for a given geo_point at a specified precision. These include geotile, geohex (H3 cell-id), and geohash encodings. The result is long encoded and can be converted to a string, long, or geo_shape bounding geometry using TO_STRING, TO_LONG, or TO_GEOSHAPE respectively. These functions are related to the geo_grid query and the corresponding grid aggregations. + +## Syntax + +`GRID ENCODING FUNCTIONS(geometry, precision, bounds)` + +### Parameters + +#### geometry + +Expression of type `geo_point`. If `null`, the function returns `null`. + +#### precision + +Expression of type `integer`. If `null`, the function returns `null`. Valid values depend on the function: +- ST_GEOTILE: 0 to 29 +- ST_GEOHEX: 0 to 15 +- ST_GEOHASH: 1 to 12 + +#### bounds + +Optional. Bounds to filter the grid tiles, specified as a `geo_shape` of type `BBOX`. Use `ST_ENVELOPE` if the `geo_shape` is of any other type. + +## Examples + +Calculates the geotile grid encoding for each airport location at precision 2, then aggregates by geotile, computes the count and centroid, converts the geotile to a string, sorts by count and geotile string, and keeps relevant fields. + +```esql +FROM airports +| EVAL geotile = ST_GEOTILE(location, 2) +| STATS + count = COUNT(geotile), + centroid = ST_CENTROID_AGG(location) + BY geotile +| EVAL geotileString = TO_STRING(geotile) +| SORT count DESC, geotileString ASC +| KEEP count, centroid, geotileString +``` + +Calculates the geohex (H3 cell-id) grid encoding for each airport location at precision 1, aggregates by geohex, computes the count and centroid, filters for cells with at least 10 airports, converts the geohex to a string, and sorts the results. + +```esql +FROM airports +| EVAL geohex = ST_GEOHEX(location, 1) +| STATS + count = COUNT(geohex), + centroid = ST_CENTROID_AGG(location) + BY geohex +| WHERE count >= 10 +| EVAL geohexString = TO_STRING(geohex) +| KEEP count, centroid, geohexString +| SORT count DESC, geohexString ASC +``` + +Calculates the geohash grid encoding for each airport location at precision 1, aggregates by geohash, computes the count and centroid, filters for geohashes with at least 10 airports, converts the geohash to a string, and sorts the results. + +```esql +FROM airports +| EVAL geohash = ST_GEOHASH(location, 1) +| STATS + count = COUNT(geohash), + centroid = ST_CENTROID_AGG(location) + BY geohash +| WHERE count >= 10 +| EVAL geohashString = TO_STRING(geohash) +| KEEP count, centroid, geohashString +| SORT count DESC, geohashString ASC +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grok.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grok.txt index 8948d9c64aab6..de201f9584473 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grok.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-grok.txt @@ -1,6 +1,6 @@ -## GROK +# GROK -The `GROK` command is used to extract structured data from a string. It matches the string against patterns based on regular expressions and extracts the specified patterns as columns. +The GROK command is used to extract structured data from a string by matching it against patterns based on regular expressions. It extracts the specified patterns as columns. ## Syntax @@ -8,42 +8,31 @@ The `GROK` command is used to extract structured data from a string. It matches ### Parameters -#### `input` +#### input -The column containing the string you want to structure. If the column has multiple values, `GROK` will process each value. +The column containing the string you want to structure. If the column has multiple values, GROK will process each value. -#### `pattern` +#### pattern -A grok pattern. -- If a field name conflicts with an existing column, the existing column is discarded. -- If a field name is used more than once, a multi-valued column will be created with one value for each occurrence of the field name. +A grok pattern. If a field name conflicts with an existing column, the existing column is discarded. If a field name is used more than once, a multi-valued column will be created with one value per each occurrence of the field name. ## Examples -Parsing a string with multiple data types - -Parse a string containing a timestamp, an IP address, an email address, and a number: - +Extracts the date, IP address, email address, and number from a string and keeps only those fields. ```esql ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" | GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num}""" | KEEP date, ip, email, num ``` -Type conversion for numeric fields - -Convert numeric fields to specific types by appending `:type` to the semantics in the pattern. For example, `{NUMBER:num:int}` converts the `num` field to an integer: - +Extracts the date, IP address, email address, and converts the extracted number to an integer type. ```esql ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" | GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" | KEEP date, ip, email, num ``` -Using type conversion functions - -For other type conversions, use type conversion functions like `TO_DATETIME`: - +Extracts the date, IP address, email address, converts the number to integer, and then converts the date to a datetime type. ```esql ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" | GROK a """%{TIMESTAMP_ISO8601:date} %{IP:ip} %{EMAILADDRESS:email} %{NUMBER:num:int}""" @@ -51,19 +40,9 @@ ROW a = "2023-01-23T12:15:00.000Z 127.0.0.1 some.email@foo.com 42" | EVAL date = TO_DATETIME(date) ``` -Handling multi-valued columns - -When a field name is used more than once, `GROK` creates a multi-valued column: - +Splits a zip code into two parts by extracting two words into a multi-valued column named `zip_parts`. ```esql FROM addresses | KEEP city.name, zip_code | GROK zip_code """%{WORD:zip_parts} %{WORD:zip_parts}""" -``` - -### Limitations - -- If a field name conflicts with an existing column, the existing column is discarded. -- If a field name is used more than once, a multi-valued column will be created with one value per each occurrence of the field name. -- The `GROK` command does not support configuring custom patterns or multiple patterns. -- The `GROK` command is not subject to Grok watchdog settings. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hash.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hash.txt index 86aaac976f99d..67121b696c06a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hash.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hash.txt @@ -1,6 +1,6 @@ # HASH -Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, and SHA-512. +The HASH function computes the hash of the input using algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, and SHA-512. ## Syntax @@ -8,21 +8,21 @@ Computes the hash of the input using various algorithms such as MD5, SHA, SHA-22 ### Parameters -#### `algorithm` +#### algorithm Hash algorithm to use. -#### `input` +#### input Input to hash. ## Examples +Calculates the MD5 and SHA-256 hashes of the `message` column for all rows except those where the message is "Connection error", and displays the original message along with its hashes. + ```esql FROM sample_data | WHERE message != "Connection error" | EVAL md5 = hash("md5", message), sha256 = hash("sha256", message) | KEEP message, md5, sha256 ``` - -This example computes the MD5 and SHA-256 hashes of the `message` field for rows where the `message` is not "Connection error". diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hypot.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hypot.txt index db1f05d31fb6f..9a981214c0b0e 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hypot.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-hypot.txt @@ -1,6 +1,6 @@ # HYPOT -Calculates the hypotenuse of two numbers. The input can be any numeric values, and the return value is always a double. If either input is `null`, the function returns `null`. Hypotenuses of infinities are also `null`. +The HYPOT function calculates the hypotenuse of two numeric values. The result is always a double. If either input is infinity, the function returns null. ## Syntax @@ -10,17 +10,16 @@ Calculates the hypotenuse of two numbers. The input can be any numeric values, a #### number1 -Numeric expression. If `null`, the function returns `null`. +Numeric expression. If this parameter is null, the function returns null. #### number2 -Numeric expression. If `null`, the function returns `null`. +Numeric expression. If this parameter is null, the function returns null. ## Examples +Calculates the hypotenuse of a right triangle with sides 3.0 and 4.0, storing the result in column `c`. ```esql ROW a = 3.0, b = 4.0 | EVAL c = HYPOT(a, b) -``` - -Calculates the hypotenuse of a right triangle with sides `a = 3.0` and `b = 4.0`. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-idelta.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-idelta.txt new file mode 100644 index 0000000000000..0fb4c3e26e53c --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-idelta.txt @@ -0,0 +1,26 @@ +# IDELTA + +Calculates the idelta of a gauge, which is the absolute change between the last two data points. It only considers the last two data points in each time period, making it more responsive to recent changes compared to the delta function. + +## Syntax + +`IDELTA(field, window)` + +### Parameters + +#### field + +The metric field to calculate the idelta value for. + +#### window + +The time window over which to compute the idelta. + +## Examples + +Calculates the sum of the absolute change in events received for each pod, grouped into 10-minute time buckets. + +```esql +TS k8s +| STATS events = SUM(IDELTA(events_received)) by pod, time_bucket = TBUCKET(10minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-increase.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-increase.txt new file mode 100644 index 0000000000000..31b04c30dfbba --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-increase.txt @@ -0,0 +1,27 @@ +# INCREASE + +Calculates the absolute increase of a counter field within a specified time window. + +## Syntax + +`INCREASE(field, window)` + +### Parameters + +#### field + +The metric field for which to calculate the increase. + +#### window + +The time window over which to compute the increase. + +## Examples + +Calculates the total increase in `network.total_bytes_in` for each cluster and 10-minute interval, filtering for pods named "one". + +```esql +TS k8s +| WHERE pod == "one" +| STATS increase_bytes_in = SUM(INCREASE(network.total_bytes_in)) BY cluster, time_bucket = TBUCKET(10minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-infix operators.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-infix operators.txt new file mode 100644 index 0000000000000..37dc9f88b098a --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-infix operators.txt @@ -0,0 +1,116 @@ +# INFIX OPERATORS + +Infix operators provide convenient ways to perform type casting, pattern matching, and value comparisons directly within queries. These operators include cast (`::`), `IN`, `LIKE`, `RLIKE`, and match (`:`). + +## Syntax + +` :: ` +` IN (, , ...)` +` LIKE ` +` RLIKE ` +` : ` + +### Parameters + +#### Cast (`::`) + +Provides an alternative syntax for type conversion, allowing you to cast values to a specific type. + +#### IN + +Tests whether a field or expression equals any element in a list of literals, fields, or expressions. + +#### LIKE + +Filters data based on string patterns using wildcards. The left-hand side is typically a field or literal, and the right-hand side is the pattern. Supports `*` (zero or more characters) and `?` (one character) wildcards. Patterns can be single or multiple, and escaping is supported using backslash `\` or triple quotes `"""`. + +#### RLIKE + +Filters data using regular expression patterns. The left-hand side is typically a field or literal, and the right-hand side is the regex pattern. Supports single or multiple patterns, and escaping is supported using backslash `\` or triple quotes `"""`. + +#### Match (`:`) + +Performs a match query on the specified field, returning true if the provided query matches the row. Equivalent to the match function. + +## Examples + +Concatenates a version string by casting and combining values, then casting the result to a VERSION type. +```esql +ROW ver = CONCAT(("0"::INT + 1)::STRING, ".2.3")::VERSION +``` + +Checks if the result of subtracting `a` from `c` is present in a list of values, including literals and expressions. +```esql +ROW a = 1, b = 4, c = 3 +| WHERE c-a IN (3, b / 2, a) +``` + +Filters employees whose first name matches the pattern `?b*`, where `?` is any single character and `*` is any sequence of characters. +```esql +FROM employees +| WHERE first_name LIKE """?b*""" +| KEEP first_name, last_name +``` + +Matches the string "foo * bar" by escaping the `*` character in the pattern. +```esql +ROW message = "foo * bar" +| WHERE message LIKE "foo \\* bar" +``` + +Matches the string "foo * bar" using triple quotes to simplify escaping of the `*` character. +```esql +ROW message = "foo * bar" +| WHERE message LIKE """foo \* bar""" +``` + +Returns true if the `message` field matches any of the patterns "foo*" or "bar?". +```esql +ROW message = "foobar" +| WHERE message like ("foo*", "bar?") +``` + +Filters employees using a pattern provided as a query parameter for the `first_name` field. +```esql +FROM employees +| WHERE first_name LIKE ?pattern +| KEEP first_name, last_name +``` + +Filters employees whose first name matches the regular expression `.leja.*`. +```esql +FROM employees +| WHERE first_name RLIKE """.leja.*""" +| KEEP first_name, last_name +``` + +Matches the string "foo ( bar" by escaping the `(` character in the regex pattern. +```esql +ROW message = "foo ( bar" +| WHERE message RLIKE "foo \\( bar" +``` + +Matches the string "foo ( bar" using triple quotes to simplify escaping of the `(` character in the regex pattern. +```esql +ROW message = "foo ( bar" +| WHERE message RLIKE """foo \( bar""" +``` + +Returns true if the `message` field matches any of the regex patterns "foo.*" or "bar.". +```esql +ROW message = "foobar" +| WHERE message RLIKE ("foo.*", "bar.") +``` + +Filters employees using a regex pattern provided as a query parameter for the `first_name` field. +```esql +FROM employees +| WHERE first_name RLIKE ?pattern +| KEEP first_name, last_name +``` + +Filters books where the `author` field matches the value "Faulkner". +```esql +FROM books +| WHERE author:"Faulkner" +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-inlinestats-by.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-inlinestats-by.txt new file mode 100644 index 0000000000000..2e5ecf130d48d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-inlinestats-by.txt @@ -0,0 +1,75 @@ +# INLINESTATS-BY + +The INLINE STATS command groups rows based on a common value specified after BY and calculates one or more aggregated values over the grouped rows. The output table retains all columns from the input table, with new columns added or existing columns overridden if their names overlap. Aggregated values can be filtered using WHERE for each calculation, and if BY is omitted, aggregations are applied to the entire dataset. The new columns appear in the order defined in the INLINE STATS command. + +## Syntax + +`INLINE STATS [column1 =] expression1 [WHERE boolean_expression1][, + ..., + [columnN =] expressionN [WHERE boolean_expressionN]] + [BY [grouping_name1 =] grouping_expression1[, + ..., + [grouping_nameN = ] grouping_expressionN]]` + +### Parameters + +#### columnX + +The name for the aggregated value returned. If omitted, the name defaults to the corresponding expression. If multiple columns share the same name, only the rightmost column with that name is kept. + +#### expressionX + +An expression that computes an aggregated value. + +#### grouping_expressionX + +An expression that outputs the values to group by. If its name matches an existing or computed column, that column will be overridden. + +#### boolean_expressionX + +The condition that determines which rows are included when evaluating expressionX. + +## Examples + +Calculates the maximum salary for each language group and adds it as a new column. + +```esql +FROM employees +| KEEP emp_no, languages, salary +| INLINE STATS max_salary = MAX(salary) BY languages +``` + +Calculates the maximum salary across all employees and adds it as a new column to every row. + +```esql +FROM employees +| KEEP emp_no, languages, salary +| INLINE STATS max_salary = MAX(salary) +``` + +Calculates the average salary and the count of employees, grouped by both language and tenure. + +```esql +FROM employees +| WHERE still_hired +| KEEP emp_no, languages, salary, hire_date +| EVAL tenure = DATE_DIFF("year", hire_date, "2025-09-18T00:00:00") +| DROP hire_date +| INLINE STATS avg_salary = AVG(salary), count = count(*) BY languages, tenure +``` + +Calculates rounded average salaries for employees in different salary ranges, using conditional WHERE clauses for each aggregation. + +```esql +FROM employees +| KEEP emp_no, salary +| INLINE STATS avg_lt_50 = ROUND(AVG(salary)) WHERE salary < 50000, + avg_lt_60 = ROUND(AVG(salary)) WHERE salary >=50000 AND salary < 60000, + avg_gt_60 = ROUND(AVG(salary)) WHERE salary >= 60000 +``` + +## Limitations + +- The CATEGORIZE grouping function is not currently supported. +- You cannot use LIMIT (explicit or implicit) before INLINE STATS, as this can lead to unexpected results. +- You cannot use FORK before INLINE STATS, because FORK adds an implicit LIMIT to each branch, which can lead to unexpected results. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ip_prefix.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ip_prefix.txt index 5f49a5e012571..5014c02f20c0b 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ip_prefix.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ip_prefix.txt @@ -1,6 +1,6 @@ # IP_PREFIX -Truncates an IP address to a specified prefix length. +The IP_PREFIX function truncates an IP address to a specified prefix length, supporting both IPv4 and IPv6 formats. ## Syntax @@ -8,25 +8,23 @@ Truncates an IP address to a specified prefix length. ### Parameters -#### `ip` +#### ip -The IP address to truncate. Supports both IPv4 and IPv6 addresses and must be of type `ip`. +IP address of type `ip`. Both IPv4 and IPv6 addresses are supported. -#### `prefixLengthV4` +#### prefixLengthV4 -The prefix length to apply for IPv4 addresses. +Prefix length to use for truncating IPv4 addresses. -#### `prefixLengthV6` +#### prefixLengthV6 -The prefix length to apply for IPv6 addresses. +Prefix length to use for truncating IPv6 addresses. ## Examples -Truncating IPv4 and IPv6 addresses +Truncates an IPv4 address to a /24 prefix and an IPv6 address to a /112 prefix, showing the original and truncated values for each. ```esql -ROW ip4 = TO_IP("1.2.3.4"), ip6 = TO_IP("fe80::cae2:65ff:fece:feb9") -| EVAL ip4_prefix = IP_PREFIX(ip4, 24, 0), ip6_prefix = IP_PREFIX(ip6, 0, 112) +ROW ip4 = to_ip("1.2.3.4"), ip6 = TO_IP("fe80::cae2:65ff:fece:feb9") +| EVAL ip4_prefix = IP_PREFIX(ip4, 24, 0), ip6_prefix = IP_PREFIX(ip6, 0, 112); ``` - -This example truncates the IPv4 address `1.2.3.4` to a `/24` prefix and the IPv6 address `fe80::cae2:65ff:fece:feb9` to a `/112` prefix. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-irate.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-irate.txt new file mode 100644 index 0000000000000..f2f7f7493bf3e --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-irate.txt @@ -0,0 +1,26 @@ +# IRATE + +Calculates the irate of a counter field, which is the per-second rate of increase between the last two data points. This function is more responsive to recent changes in the rate of increase, as it only considers the last two data points in each time period. + +## Syntax + +`IRATE(field, window)` + +### Parameters + +#### field + +The metric field to calculate the irate for. + +#### window + +The time window over which to compute the irate. + +## Examples + +Calculates the per-second rate of increase for the `network.total_bytes_in` field for each Kubernetes pod named "one", grouping the results by cluster and 10-minute time buckets. + +```esql +TS k8s | WHERE pod == "one" +| STATS irate_bytes_in = SUM(IRATE(network.total_bytes_in)) BY cluster, time_bucket = TBUCKET(10minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-keep.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-keep.txt index c5cb2f56a8f34..2b37a7acef13a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-keep.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-keep.txt @@ -1,6 +1,6 @@ -## KEEP +# KEEP -The `KEEP` command specifies which columns are returned and the order in which they appear in the output. +The KEEP command allows you to specify which columns are returned and the order in which they appear. When a field name matches multiple expressions, precedence rules determine which expression is used: complete field names have the highest priority, followed by partial wildcard expressions, and finally the wildcard-only expression. If two expressions have the same precedence, the rightmost one takes priority. ## Syntax @@ -8,76 +8,50 @@ The `KEEP` command specifies which columns are returned and the order in which t ### Parameters -#### `columns` +#### columns -A comma-separated list of columns to retain. Supports wildcards. If a column matches multiple expressions, precedence rules determine the final output. - -## Note - -The KEEP command is used to specify which columns to return and their order. - -When a field name matches multiple expressions, precedence rules are applied. Fields are added in the order they appear. If one field matches multiple expressions, the following precedence rules apply (from highest to lowest priority): - -1. Complete field name (without wildcards) -2. Partial wildcard expressions (like `fieldNam*`) -3. Only wildcard (`*`) - -If a field matches two expressions with the same precedence, the rightmost expression wins. - -Important: only the columns in the KEEP command can be used after a KEEP command. +A comma-separated list of columns to keep. Wildcards are supported. If a column matches multiple expressions, precedence rules are applied to determine which expression is used. ## Examples -### Return columns in a specific order - -The following query returns the `emp_no`, `first_name`, `last_name`, and `height` columns in the specified order: +Selects and orders the columns emp_no, first_name, last_name, and height. ```esql FROM employees | KEEP emp_no, first_name, last_name, height ``` -### Use wildcards to match column names - -This query keeps all columns with names starting with `h`: +Returns all columns whose names start with "h". ```esql FROM employees | KEEP h* ``` -### Combine specific columns and wildcards - -The asterisk wildcard (`*`) matches all columns not explicitly specified. This query returns all columns starting with `h` first, followed by all other columns: +Returns columns starting with "h" followed by all remaining columns. ```esql FROM employees | KEEP h*, * ``` -### Precedence of complete field names over wildcards - -When a column matches both a complete field name and a wildcard, the complete field name takes precedence: +Returns first_name and last_name, with first_name taking precedence over the wildcard. ```esql FROM employees | KEEP first_name, last_name, first_name* ``` -### Wildcard precedence and ordering - -If a column matches multiple wildcard expressions, the rightmost expression takes precedence, even if it is less specific: +Returns columns matching first_name* and first_na*, with the rightmost expression taking precedence. ```esql FROM employees | KEEP first_name*, last_name, first_na* ``` -### Lowest precedence for the `*` wildcard - -The `*` wildcard has the lowest precedence. The order of other arguments determines the output order: +Returns all columns, with first_name appearing last due to its position in the argument list. ```esql FROM employees | KEEP *, first_name -``` +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-knn.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-knn.txt new file mode 100644 index 0000000000000..cca52131dccf9 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-knn.txt @@ -0,0 +1,61 @@ +# KNN + +The KNN function finds the k nearest vectors to a query vector using a similarity metric. It performs approximate search on indexed dense_vector or semantic_text fields to identify the closest matches. + +## Syntax + +`KNN(field, query, options, boost, k, visit_percentage, min_candidates, rescore_oversample, similarity)` + +### Parameters + +#### field + +The field to target for the query. The function works with dense_vector or semantic_text fields. Other text fields are not allowed. + +#### query + +The vector value for which to find the top nearest neighbors. + +#### options + +(Optional) Additional kNN options as function named parameters. Refer to the knn query documentation for more details. + +#### boost + +(Optional) A floating point number that adjusts the relevance scores of the query. Defaults to 1.0. + +#### k + +(Optional) The number of nearest neighbors to return from each shard. This value must be less than or equal to num_candidates and is automatically set if a LIMIT is applied. + +#### visit_percentage + +(Optional) The percentage of vectors to explore per shard during knn search with bbq_disk. Must be between 0 and 100. Defaults to approximately 1% per shard for every 1 million vectors. Increasing this value can improve accuracy. + +#### min_candidates + +(Optional) The minimum number of nearest neighbor candidates to consider per shard. Cannot exceed 10,000. Defaults to 1.5 times k (or LIMIT) used for the query. Increasing this value can improve accuracy. + +#### rescore_oversample + +(Optional) A double value that applies oversampling for rescoring quantized vectors. + +#### similarity + +(Optional) The minimum similarity required for a document to be considered a match. This value relates to the raw similarity used, not the document score. + +## Examples + +Find the nearest colors to a given RGB vector and sort the results by score and color name: + +```esql +from colors metadata _score +| where knn(rgb_vector, [0, 120, 0]) +| sort _score desc, color asc +``` + +Limitations + +- The KNN function can only be used with dense_vector or semantic_text fields. Other text fields are not supported. +- The min_candidates parameter cannot exceed 10,000. +- The k parameter must be less than or equal to num_candidates. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-kql.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-kql.txt index 8d98b9f6b016a..fff5e6b97f645 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-kql.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-kql.txt @@ -1,44 +1,59 @@ -## KQL +# KQL -Performs a KQL query and returns `true` if the provided KQL query string matches the row. +The KQL function performs a query using the KQL query string format and returns true if the provided query matches the row. Below characters must be escaped with a backslash. - \():<>"* - ## Syntax -`KQL(query)` +`KQL(query, options, boost, time_zone, case_insensitive, default_field)` ### Parameters -#### `query` +#### query Query string in KQL query string format. -## Examples +#### options + +Optional. KQL additional options as function named parameters. Available in stack version 9.3.0 and later. + +#### boost + +Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0. + +#### time_zone + +UTC offset or IANA time zone used to interpret date literals in the query string. + +#### case_insensitive -### Example 1 +If true, performs case-insensitive matching for keyword fields. Defaults to false. +#### default_field +Default field to search if no field is provided in the query string. Supports wildcards (*). + +## Examples + +Filters the `books` table to only include rows where the author is Faulkner. ```esql FROM books | WHERE KQL("author: Faulkner") -| KEEP book_no, author -| SORT book_no -| LIMIT 5 ``` -This example filters rows where the `author` field matches "Faulkner," keeps the `book_no` and `author` columns, sorts the results by `book_no`, and limits the output to 5 rows. +Filters the `employees` table to only include rows where the first name matches "mary", using case-insensitive matching and a custom boost value. +```esql +FROM employees +| WHERE KQL("mary", {"case_insensitive": true, "default_field": "first_name", "boost": 1.5}) +``` -### Example 3 +Filters rows where any field contains the term "Great" and limits the output to 10 rows. ```esql FROM books | WHERE KQL("""Great""") | LIMIT 10 - ``` - -This example filters rows where any field contains the term "Great" and limits the output to 10 rows. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-last_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-last_over_time.txt new file mode 100644 index 0000000000000..a6fbf2df827df --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-last_over_time.txt @@ -0,0 +1,26 @@ +# LAST_OVER_TIME + +Calculates the latest value of a field, with recency determined by the `@timestamp` field. + +## Syntax + +`LAST_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field for which to calculate the latest value. + +#### window + +The time window over which to find the latest value. + +## Examples + +Finds the maximum network cost from the most recent value within each 1-minute interval, grouped by cluster. + +```esql +TS k8s +| STATS max_cost=MAX(LAST_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-least.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-least.txt index 0a05a9b57c2cc..1926a46f1f969 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-least.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-least.txt @@ -1,6 +1,6 @@ -## LEAST +# LEAST -Returns the minimum value from multiple columns. This function is similar to `MV_MIN` but is designed to operate on multiple columns simultaneously. +The LEAST function returns the minimum value from multiple columns. It is designed to evaluate several columns at once and select the smallest value among them. ## Syntax @@ -8,19 +8,19 @@ Returns the minimum value from multiple columns. This function is similar to `MV ### Parameters -#### `first` +#### first The first column to evaluate. -#### `rest` +#### rest The remaining columns to evaluate. ## Examples +Finds the smaller value between columns `a` and `b` and stores it in a new column `l`: + ```esql ROW a = 10, b = 20 | EVAL l = LEAST(a, b) ``` - -This example calculates the minimum value between columns `a` and `b`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-left.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-left.txt index 26492f9b15e03..b534c943ca1bb 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-left.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-left.txt @@ -1,6 +1,6 @@ # LEFT -Returns a substring that extracts a specified number of characters from the beginning of a string. +The LEFT function returns a substring containing a specified number of characters from the beginning (left side) of a string. ## Syntax @@ -8,22 +8,20 @@ Returns a substring that extracts a specified number of characters from the begi ### Parameters -#### `string` +#### string -The string from which to return a substring. +The string from which the substring will be extracted. -#### `length` +#### length -The number of characters to return. +The number of characters to return from the left side of the string. ## Examples +Extracts the first three characters from the `last_name` column for each employee. + ```esql FROM employees | KEEP last_name | EVAL left = LEFT(last_name, 3) -| SORT last_name ASC -| LIMIT 5 -``` - -Extracts the first three characters from the `last_name` column, sorts the results alphabetically, and limits the output to the first five rows. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-length.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-length.txt index 4c614fc42bd57..aec6ea8f1b62f 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-length.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-length.txt @@ -1,6 +1,6 @@ -## LENGTH +# LENGTH -Returns the character length of a string. +Returns the character length of a string. All strings are in UTF-8, so a single character can use multiple bytes. ## Syntax @@ -8,17 +8,17 @@ Returns the character length of a string. ### Parameters -#### `string` +#### string -String expression. If `null`, the function returns `null`. +String expression to measure the length of. If the value is `null`, the function returns `null`. ## Examples +Calculates the number of characters in each city name for airports located in India. + ```esql FROM airports | WHERE country == "India" | KEEP city | EVAL fn_length = LENGTH(city) -``` - -This example calculates the character length of the `city` field for airports located in India. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-limit.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-limit.txt index 758fa7df786a5..076615bd2701a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-limit.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-limit.txt @@ -1,6 +1,6 @@ -## LIMIT +# LIMIT -The `LIMIT` command restricts the number of rows returned by a query. +The LIMIT command restricts the number of rows returned by a query. It is useful for controlling the size of the result set, especially when working with large datasets. ## Syntax @@ -8,23 +8,13 @@ The `LIMIT` command restricts the number of rows returned by a query. ### Parameters -#### `max_number_of_rows` +#### max_number_of_rows The maximum number of rows to return. -## Description - -The `LIMIT` command restricts the number of rows returned by a query. For example: - -```esql -FROM index -| WHERE field == "value" -| LIMIT 1000 -``` - ## Examples -Limit the result to the first 5 rows, sorted by `emp_no` in ascending order: +Returns the first 5 employees after sorting all employees by their employee number in ascending order. ```esql FROM employees @@ -34,9 +24,4 @@ FROM employees ## Limitations -- Queries cannot return more than 10,000 rows, even if the `LIMIT` value exceeds this threshold. - -To work around this limitation: - - Reduce the size of the result set by modifying the query to only return relevant data. This can be achieved by using the WHERE command to select a smaller subset of the data. - - Shift any post-query processing to the query itself. The ES|QL STATS ... BY command can be used to aggregate data within the query. - +Queries do not return more than 10,000 rows, regardless of the LIMIT value. This is a configurable upper limit. The default and maximum limits can be changed using the dynamic cluster settings `esql.query.result_truncation_default_size` and `esql.query.result_truncation_max_size`. Increasing these limits may result in higher memory usage, longer processing times, and increased internode traffic. The upper limit applies only to the number of rows output by the query, not to the number of documents processed. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-locate.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-locate.txt index 7f4fb7d5267c4..b91e18ed4febf 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-locate.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-locate.txt @@ -1,6 +1,6 @@ # LOCATE -Returns an integer indicating the position of a substring within another string. If the substring is not found, it returns `0`. Note that string positions start from `1`. +Returns an integer indicating the position of a substring within another string. If the substring is not found, the function returns `0`. String positions start from `1`. ## Syntax @@ -8,30 +8,23 @@ Returns an integer indicating the position of a substring within another string. ### Parameters -#### `string` +#### string -An input string. +The input string in which to search for the substring. -#### `substring` +#### substring -A substring to locate within the input string. +The substring to locate within the input string. -#### `start` +#### start -The start index. This parameter is optional. +The start index from which to begin the search. ## Examples -Locate a substring within a string +Finds the position of the substring "ll" within the string "hello" and stores the result in a new column. ```esql ROW a = "hello" | EVAL a_ll = LOCATE(a, "ll") ``` - -This example finds the position of the substring `"ll"` within the string `"hello"`. The result is `3`. - -## Notes - -- String positions start from `1`. -- If the substring cannot be found, the function returns `0`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log.txt index 72ed7be5105c6..542c8c8ffbd10 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log.txt @@ -1,6 +1,6 @@ # LOG -Calculates the logarithm of a numeric value to a specified base. If the base is not provided, it defaults to the natural logarithm (base e). +The LOG function calculates the logarithm of a numeric value to a specified base. The result is always a double. If the input value is zero, negative, or the base is one, the function returns `null` and issues a warning. ## Syntax @@ -8,34 +8,24 @@ Calculates the logarithm of a numeric value to a specified base. If the base is ### Parameters -#### `base` +#### base -- Base of the logarithm. If `null`, the function returns `null`. If not provided, the function calculates the natural logarithm (base e). +Base of the logarithm. If `null`, the function returns `null`. If not provided, the function returns the natural logarithm (base e) of the value. -#### `number` +#### number -- Numeric expression. If `null`, the function returns `null`. +Numeric expression to compute the logarithm for. If `null`, the function returns `null`. ## Examples -Logarithm with a specified base - -Calculate the logarithm of 8 to base 2: - +Calculates the logarithm of 8 with base 2. ```esql ROW base = 2.0, value = 8.0 | EVAL s = LOG(base, value) ``` -Natural logarithm (base e) - -Calculate the natural logarithm of 100: - +Calculates the natural logarithm (base e) of 100. ```esql ROW value = 100 -| EVAL s = LOG(value) -``` - -## Limitations - -- Logs of zero, negative numbers, and a base of one return `null` and generate a warning. +| EVAL s = LOG(value); +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log10.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log10.txt index f8b9748577624..b97d564b3ffee 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log10.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-log10.txt @@ -1,15 +1,22 @@ -## LOG10 +# LOG10 -The `LOG10` function returns the logarithm of a value to base 10. The input can be any numeric value, and the return value is always a double. Logs of 0 and negative numbers return null as well as a warning. +The LOG10 command calculates the base 10 logarithm of a numeric value. The result is always a double. If the input is 0 or a negative number, the function returns `null` and issues a warning. -### Examples +## Syntax + +`LOG10(number)` + +### Parameters + +#### number + +Numeric expression. If the value is `null`, the function returns `null`. + +## Examples + +Calculates the base 10 logarithm of the value 1000.0 and stores the result in a new column. ```esql ROW d = 1000.0 | EVAL s = LOG10(d) -``` - -```esql -ROW value = 100 -| EVAL log_value = LOG10(value) ``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-logical operators.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-logical operators.txt new file mode 100644 index 0000000000000..4e9f0a4e458f4 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-logical operators.txt @@ -0,0 +1,18 @@ +# LOGICAL OPERATORS + +Logical operators are used to combine or negate conditions in queries. The supported logical operators are: +- `AND` +- `OR` +- `NOT` + +## Syntax + +`AND`, `OR`, `NOT` + +### Parameters + +None. + +## Examples + +No examples provided. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-lookup-join.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-lookup-join.txt index ab26619d7d318..6ddfd21db2b9b 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-lookup-join.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-lookup-join.txt @@ -1,172 +1,79 @@ -# LOOKUP JOIN +# LOOKUP-JOIN -The `LOOKUP JOIN` command combines data from a query results table with matching records from a specified lookup index. It adds fields from the lookup index as new columns to the results table based on matching values in the join field. This is particularly useful for enriching or correlating data across multiple indices, such as logs, IPs, user IDs, or hosts. +The LOOKUP JOIN command adds new columns to your query results by finding documents in a lookup index that share the same join field value as your result rows. For each row that matches a document in the lookup index based on the join fields, all fields from the matching document are added as new columns to that row. If multiple documents in the lookup index match a single row, the output will contain one row for each matching combination. This command enables you to enrich your results with data from another index, simplifying data enrichment and analysis workflows. ## Syntax -`LOOKUP JOIN ON ` +`FROM | LOOKUP JOIN ON ` ### Parameters -#### lookup_index +#### -The name of the lookup index. This must be a specific index name—wildcards, aliases, and remote cluster references are not supported. Indices used for lookups must be configured with the `lookup` mode. +The name of the lookup index. This must be a specific index name; wildcards, aliases, and remote cluster references are not supported. Indices used for lookups must be configured with the `lookup` index mode. -#### join_condition +#### Can be one of the following: +- A single field name. +- A comma-separated list of field names, for example `, , `. +- An expression with one or more predicates linked by `AND`, for example ` >= AND == `. Each predicate compares a field from the left index with a field from the lookup index using binary operators (`==`, `>=`, `<=`, `>`, `<`, `!=`). Each field name in the join condition must exist in only one of the indexes. Use RENAME to resolve naming conflicts. +- An expression that includes full text functions and other Lucene-pushable functions, for example `MATCH(, "search term") AND == `. These functions can be combined with binary operators and logical operators (`AND`, `OR`, `NOT`) to create complex join conditions. At least one condition that relates the lookup index fields to the left side of the join fields is still required. -- A single field name -- A comma-separated list of field names, for example , , -- An expression with one or more predicates linked by AND, for example >= AND == . Each predicate compares a field from the left index with a field from the lookup index using binary operators (==, >=, <=, >, <, !=). Each field name in the join condition must exist in only one of the indexes. Use RENAME to resolve naming conflicts. -- An expression that includes full text functions and other Lucene-pushable functions, for example MATCH(, "search term") AND == . These functions can be combined with binary operators and logical operators (AND, OR, NOT) to create complex join conditions. At least one condition that relates the lookup index fields to the left side of the join fields is still required. +If using join on a single field or a field list, the fields used must exist in both your current query results and in the lookup index. If the fields contain multi-valued entries, those entries will not match anything (the added fields will contain null for those rows). -If using join on a single field or a field list, the fields used must exist in both your current query results and in the lookup index. If the fields contains multi-valued entries, those entries will not match anything (the added fields will contain null for those rows). +## Examples - -### Syntax Examples - -``` -LOOKUP JOIN ON - -LOOKUP JOIN ON , , - -LOOKUP JOIN ON >= AND == - -LOOKUP JOIN ON MATCH(lookup_field, "search term") AND == - -``` - -## Query Examples - -### Example 1: Enriching Firewall Logs with Threat Data - -This example demonstrates how to enrich firewall logs with threat data from a lookup index. - -#### Sample Data Setup - -##### Create the `threat_list` index +Correlate source IPs with known malicious addresses by joining firewall logs with a threat list on the source IP field. ```esql -PUT threat_list -{ - "settings": { - "index.mode": "lookup" - }, - "mappings": { - "properties": { - "source.ip": { "type": "ip" }, - "dest.ip": { "type": "ip" }, - "threat_level": { "type": "keyword" }, - "threat_type": { "type": "keyword" }, - "last_updated": { "type": "date" } - } - } -} +FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP ``` -##### Create the `firewall_logs` index +Show only firewall logs where the source IP matches a known threat by filtering for rows with a non-null threat level after joining. ```esql -PUT firewall_logs -{ - "mappings": { - "properties": { - "timestamp": { "type": "date" }, - "source.ip": { "type": "ip" }, - "destination.ip": { "type": "ip" }, - "action": { "type": "keyword" }, - "bytes_transferred": { "type": "long" } - } - } -} +FROM firewall_logs +| LOOKUP JOIN threat_list ON source.IP +| WHERE threat_level IS NOT NULL ``` -##### Add sample data to `threat_list` +Enrich system metrics with host inventory and ownership information by joining with both the host inventory and ownerships indices on the host name. ```esql -POST threat_list/_bulk -{"index":{}} -{"source.ip":"203.0.113.5","threat_level":"high","threat_type":"C2_SERVER","last_updated":"2025-04-22", "dest.ip":"10.0.0.100"} -{"index":{}} -{"source.ip":"198.51.100.2","threat_level":"medium","threat_type":"SCANNER","last_updated":"2025-04-23", "dest.ip":"10.0.0.44"} +FROM system_metrics +| LOOKUP JOIN host_inventory ON host.name +| LOOKUP JOIN ownerships ON host.name ``` -##### Add sample data to `firewall_logs` +Add service ownership details to application logs by joining with the service owners index on the service ID. ```esql -POST firewall_logs/_bulk -{"index":{}} -{"timestamp":"2025-04-23T10:00:01Z","source.ip":"192.0.2.1","destination.ip":"10.0.0.100","action":"allow","bytes_transferred":1024} -{"index":{}} -{"timestamp":"2025-04-23T10:00:05Z","source.ip":"203.0.113.5","destination.ip":"10.0.0.55","action":"allow","bytes_transferred":2048} -{"index":{}} -{"timestamp":"2025-04-23T10:00:08Z","source.ip":"198.51.100.2","destination.ip":"10.0.0.200","action":"block","bytes_transferred":0} -{"index":{}} -{"timestamp":"2025-04-23T10:00:15Z","source.ip":"203.0.113.5","destination.ip":"10.0.0.44","action":"allow","bytes_transferred":4096} -{"index":{}} -{"timestamp":"2025-04-23T10:00:30Z","source.ip":"192.0.2.1","destination.ip":"10.0.0.100","action":"allow","bytes_transferred":512} +FROM app_logs +| LOOKUP JOIN service_owners ON service_id ``` -#### Query the Data based on common field between lookup and source index +Filter employees by employee number before joining with language lookup data to demonstrate optimizer behavior with the WHERE clause before the LOOKUP JOIN. ```esql -FROM firewall_logs -| LOOKUP JOIN threat_list ON source.ip -| WHERE threat_level IS NOT NULL -| SORT timestamp -| KEEP source.ip, action, threat_level, threat_type -| LIMIT 10 +FROM employees +| EVAL language_code = languages +| WHERE emp_no >= 10091 AND emp_no < 10094 +| LOOKUP JOIN languages_lookup ON language_code ``` -This query: -- Matches the `source.ip` field in `firewall_logs` with the `source.ip` field in `threat_list`. -- Filters rows to include only those with non-null `threat_level`. -- Sorts the results by `timestamp`. -- Keeps only the `source.ip`, `action`, `threat_level`, and `threat_type` fields. -- Limits the output to 10 rows. - -#### Response - -| source.ip | action | threat_type | threat_level | -|---------------|--------|-------------|--------------| -| 203.0.113.5 | allow | C2_SERVER | high | -| 198.51.100.2 | block | SCANNER | medium | -| 203.0.113.5 | allow | C2_SERVER | high | - -In this example, the `source.ip` field from `firewall_logs` is matched with the `source.ip` field in `threat_list`, and the corresponding `threat_level` and `threat_type` fields are added to the output. - - -#### Query the Data based on fields with different name in lookup and source index +Join employees with language lookup data, then filter by employee number to demonstrate optimizer behavior with the WHERE clause after the LOOKUP JOIN. The optimizer will move the filter before the join when possible. ```esql -FROM firewall_logs -| LOOKUP JOIN threat_list ON destination.ip == dest.ip -| WHERE threat_level IS null -| SORT timestamp -| KEEP destination.ip, action, bytes_transferred +FROM employees +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup ON language_code +| WHERE emp_no >= 10091 AND emp_no < 10094 ``` -This query: -- Matches the `destination.ip` field in `firewall_logs` with the `dest.ip ` field in `threat_list`. -- Filters rows to include only those with null `threat_level`. This means no threat was found for matched destination IPs. -- Sorts the results by `timestamp`. -- Keeps only the `destination.ip`, `action`, and `bytes_transferred` fields. - -#### Response - -|destination.ip | action | bytes_transferred | -|----------------|--------|-------------------| -|10.0.0.55 | allow | 2048 | -|10.0.0.200 | block | 0 | - - ## Limitations -The following are the current limitations with LOOKUP JOIN: -- Indices in `lookup` mode are always single-sharded. -- Only equality-based matching is supported. -- `LOOKUP JOIN` can only use a single match field and a single index. -- Wildcards, aliases, datemath, and datastreams are not supported. -- The query may circuit break if there are too many matching documents in the lookup index or if the documents are too large. `LOOKUP JOIN` processes data in batches of approximately 10,000 rows, which can require significant heap space for large matching documents. -- Cross-cluster `LOOKUP JOIN` can not be used after aggregations (`STATS`), `SORT` and `LIMIT` commands, and coordinator-side `ENRICH` commands. +- The lookup index must be a specific index name; wildcards, aliases, and remote cluster references are not supported. +- Indices used for lookups must be configured with the `lookup` index mode. +- If the join fields contain multi-valued entries, those entries will not match anything, and the added fields will contain null for those rows. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ltrim.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ltrim.txt index 9f129aa039177..2470166fd08f8 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ltrim.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ltrim.txt @@ -8,18 +8,18 @@ Removes leading whitespaces from a string. ### Parameters -#### `string` +#### string -String expression. If `null`, the function returns `null`. +String expression to process. If the value is `null`, the function returns `null`. ## Examples +Removes leading spaces from the `message` and `color` columns, then wraps the trimmed results in single quotes. + ```esql ROW message = " some text ", color = " red " | EVAL message = LTRIM(message) | EVAL color = LTRIM(color) | EVAL message = CONCAT("'", message, "'") | EVAL color = CONCAT("'", color, "'") -``` - -This example removes leading whitespaces from the `message` and `color` columns, then wraps the resulting strings in single quotes. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match.txt index 53a5a98f9f8fd..c69d0440982fe 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match.txt @@ -1,59 +1,84 @@ # MATCH -The `MATCH` function performs a match query on the specified field. It is equivalent to the `match` query in the Elasticsearch Query DSL and can be used to search for values in various field types, including text, semantic_text, keyword, boolean, dates, and numeric types. - -`MATCH` supports function named parameters to specify additional options for the match query. For a simplified syntax, the match operator `:` can be used instead of `MATCH`. The function returns `true` if the provided query matches the row. +The MATCH function performs a match query on a specified field, similar to the `match` query in the Elasticsearch Query DSL. It can be used on fields such as text, semantic_text, keyword, boolean, dates, and numeric types. When used on a semantic_text field, it performs a semantic query. MATCH supports function named parameters for additional query options, and all match query parameters are supported. The function returns true if the provided query matches the row. For a simplified syntax, you can use the match operator `:` instead of MATCH. ## Syntax -`MATCH(field, query, options)` +`MATCH(field, query, options, fuzziness, auto_generate_synonyms_phrase_query, analyzer, minimum_should_match, zero_terms_query, boost, fuzzy_transpositions, fuzzy_rewrite, prefix_length, lenient, operator, max_expansions)` ### Parameters -#### `field` +#### field The field that the query will target. -#### `query` +#### query -The value to find in the specified field. +The value to find in the provided field. -#### `options` +#### options -(Optional) Additional match query options provided as function named parameters. Refer to the match query documentation for more details. +Optional. Additional match options specified as function named parameters. -## Examples +#### fuzziness -Match on a specific field +Maximum edit distance allowed for matching. -```esql -FROM books -| WHERE MATCH(author, "Faulkner") -| KEEP book_no, author -| SORT book_no -| LIMIT 5 -``` +#### auto_generate_synonyms_phrase_query -This example retrieves books where the `author` field matches "Faulkner," keeping only the `book_no` and `author` fields, sorting by `book_no`, and limiting the results to 5 rows. +Optional. If true, match phrase queries are automatically created for multi-term synonyms. Defaults to true. -Match with additional options +#### analyzer -```esql -FROM books -| WHERE MATCH(title, "Hobbit Back Again", {"operator": "AND"}) -| KEEP title -``` +Optional. Analyzer used to convert the text in the query value into tokens. Defaults to the index-time analyzer mapped for the field, or the index’s default analyzer if none is mapped. + +#### minimum_should_match + +Optional. Minimum number of clauses that must match for a document to be returned. + +#### zero_terms_query + +Optional. Indicates whether all documents or none are returned if the analyzer removes all tokens, such as when using a stop filter. Defaults to none. + +#### boost + +Optional. Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0. -This example searches for books where the `title` field matches "Hobbit Back Again" using the `AND` operator, and keeps only the `title` field in the results. +#### fuzzy_transpositions -Match with sorting on score +Optional. If true, edits for fuzzy matching include transpositions of two adjacent characters (ab → ba). Defaults to true. -Match on a specific field +#### fuzzy_rewrite +Optional. Method used to rewrite the query. If the fuzziness parameter is not 0, the match query uses a fuzzy_rewrite method of top_terms_blended_freqs_${max_expansions} by default. + +#### prefix_length + +Optional. Number of beginning characters left unchanged for fuzzy matching. Defaults to 0. + +#### lenient + +Optional. If false, format-based errors, such as providing a text query value for a numeric field, are returned. Defaults to false. + +#### operator + +Optional. Boolean logic used to interpret text in the query value. Defaults to OR. + +#### max_expansions + +Optional. Maximum number of terms to which the query will expand. Defaults to 50. + +## Examples + +Finds all books where the author field matches "Faulkner". ```esql -FROM books METADATA _score +FROM books | WHERE MATCH(author, "Faulkner") -| SORT _score DESC -| KEEP book_no, author -| LIMIT 5 ``` + +Finds books whose title matches the phrase "Hobbit Back Again" using the AND operator, and returns only the title column. +```esql +FROM books +| WHERE MATCH(title, "Hobbit Back Again", {"operator": "AND"}) +| KEEP title; +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match_phrase.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match_phrase.txt index 627d742008b4a..bc96e311db030 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match_phrase.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-match_phrase.txt @@ -1,10 +1,10 @@ # MATCH_PHRASE -The MATCH_PHRASE function performs a phrase match query on a specified field, returning true if the provided query matches the row. It is equivalent to the match_phrase query in the Elasticsearch Query DSL and can be used on text fields, as well as keyword, boolean, or date types. Additional options can be specified using function named parameters. +The MATCH_PHRASE function performs a phrase match on the specified field, returning true if the provided query matches the row. It is equivalent to the `match_phrase` query in the Query DSL and can be used on text fields, as well as keyword, boolean, or date types. It is not supported for semantic_text or numeric types. All `match_phrase` query parameters are supported, and additional options can be specified using function named parameters. ## Syntax -`MATCH_PHRASE(field, query, options)` +`MATCH_PHRASE(field, query, options, zero_terms_query, boost, analyzer, slop)` ### Parameters @@ -18,12 +18,29 @@ The value to find in the provided field. #### options -Optional. Additional options for the match_phrase query, provided as function named parameters. +Optional. Additional options for the match_phrase query, specified as function named parameters. + +#### zero_terms_query + +Optional. Indicates whether all documents or none are returned if the analyzer removes all tokens, such as when using a stop filter. Defaults to none. + +#### boost + +Optional. Floating point number used to decrease or increase the relevance scores of the query. Defaults to 1.0. + +#### analyzer + +Optional. Analyzer used to convert the text in the query value into tokens. Defaults to the index-time analyzer mapped for the field, or the index’s default analyzer if none is mapped. + +#### slop + +Optional. Maximum number of positions allowed between matching tokens. Defaults to 0. Transposed terms have a slop of 2. ## Examples +Filters the `books` table to only include rows where the `author` field contains the exact phrase "William Faulkner". ```esql FROM books | WHERE MATCH_PHRASE(author, "William Faulkner") ``` -This example filters rows in the books index where the author field contains the exact phrase "William Faulkner". \ No newline at end of file +Filter rows from the `books` table where the `author` field contains the exact phrase "William Faulkner". diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max.txt index 3b6b1f2568e9b..0f5d80ebb0136 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max.txt @@ -1,6 +1,6 @@ # MAX -The `MAX` function returns the maximum value of a field. +The MAX function returns the maximum value of a specified field. ## Syntax @@ -8,26 +8,22 @@ The `MAX` function returns the maximum value of a field. ### Parameters -#### `field` +#### field -The field for which the maximum value is calculated. +The column for which you want to find the maximum value. ## Examples -Basic Usage +Finds the highest value in the `languages` column from the `employees` table: ```esql FROM employees | STATS MAX(languages) ``` -Calculate the maximum value of the `languages` field. - -Using Inline Functions +Calculates the largest average salary change across all employees: ```esql FROM employees | STATS max_avg_salary_change = MAX(MV_AVG(salary_change)) ``` - -Calculate the maximum value of the average salary change by first averaging the multiple values per row using the `MV_AVG` function and then applying the `MAX` function. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max_over_time.txt new file mode 100644 index 0000000000000..71b576b1fbac4 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-max_over_time.txt @@ -0,0 +1,26 @@ +# MAX_OVER_TIME + +Calculates the maximum value of a field within a specified time window. + +## Syntax + +`MAX_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the maximum value for. + +#### window + +The time window over which to compute the maximum. + +## Examples + +Calculates the maximum network cost per cluster, grouped into 1-minute time buckets. + +```esql +TS k8s +| STATS cost=SUM(MAX_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-md5.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-md5.txt new file mode 100644 index 0000000000000..7d6c0d4c32aa3 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-md5.txt @@ -0,0 +1,24 @@ +# MD5 + +The MD5 command computes the MD5 hash of the input value, if the MD5 hash function is available on the JVM. + +## Syntax + +`MD5(input)` + +### Parameters + +#### input + +The value to hash. + +## Examples + +Calculates the MD5 hash of the `message` column for all rows except those where the message is "Connection error", and displays both the original message and its hash. + +```esql +FROM sample_data +| WHERE message != "Connection error" +| EVAL md5 = md5(message) +| KEEP message, md5 +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median.txt index eb21eb0ed2c6a..7b204be0316ed 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median.txt @@ -1,6 +1,6 @@ # MEDIAN -The `MEDIAN` function calculates the value that is greater than half of all values and less than half of all values, also known as the 50th percentile. The result is usually approximate. +The MEDIAN function returns the value that is greater than half of all values and less than half of all values, also known as the 50% percentile. The result is usually approximate and may vary slightly each time due to its non-deterministic nature. ## Syntax @@ -8,22 +8,20 @@ The `MEDIAN` function calculates the value that is greater than half of all valu ### Parameters -#### `number` +#### number -The input numeric value for which the median is calculated. +Expression that outputs the values to calculate the median of. ## Examples -Calculating the median and 50th percentile of salaries +Calculates both the median salary and the 50th percentile salary from the employees dataset: ```esql FROM employees | STATS MEDIAN(salary), PERCENTILE(salary, 50) ``` -Calculating the median of maximum values in a multivalued column - -To calculate the median of the maximum values of a multivalued column, first use `MV_MAX` to get the maximum value per row, and then use the result with the `MEDIAN` function: +Calculates the median of the maximum salary change for each employee: ```esql FROM employees @@ -32,5 +30,5 @@ FROM employees ## Limitations -- The `MEDIAN` function is non-deterministic, meaning you may get slightly different results when using the same data. -- Like the `PERCENTILE` function, the `MEDIAN` function provides approximate results. +- The MEDIAN function is usually approximate. +- MEDIAN is non-deterministic, meaning results may vary slightly even with the same data. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median_absolute_deviation.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median_absolute_deviation.txt index 4a704137facb8..3b8fcfe4733fe 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median_absolute_deviation.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-median_absolute_deviation.txt @@ -1,8 +1,6 @@ # MEDIAN_ABSOLUTE_DEVIATION -Returns the median absolute deviation, a robust measure of variability. It is particularly useful for describing data with outliers or non-normal distributions, as it can be more descriptive than standard deviation. The median absolute deviation is calculated as the median of the absolute deviations from the median of the entire sample. For a random variable `X`, it is defined as `median(|median(X) - X|)`. - -**Note:** This function is usually approximate, similar to `PERCENTILE`. +The MEDIAN_ABSOLUTE_DEVIATION function calculates the median absolute deviation, which is a robust measure of variability. It is particularly useful for describing data with outliers or data that is not normally distributed, and can be more informative than standard deviation in such cases. The value is computed as the median of the absolute differences between each data point and the median of the entire sample. ## Syntax @@ -10,30 +8,27 @@ Returns the median absolute deviation, a robust measure of variability. It is pa ### Parameters -#### `number` +#### number -The input numeric field or expression. +The column or expression containing numeric values for which the median absolute deviation will be calculated. ## Examples -Basic Usage +Calculate the median and median absolute deviation of employee salaries: ```esql FROM employees | STATS MEDIAN(salary), MEDIAN_ABSOLUTE_DEVIATION(salary) ``` -Calculate the median and the median absolute deviation of employee salaries. - -Using Inline Functions +Calculate the median absolute deviation of the maximum salary change per employee: ```esql FROM employees | STATS m_a_d_max_salary_change = MEDIAN_ABSOLUTE_DEVIATION(MV_MAX(salary_change)) ``` -Calculate the median absolute deviation of the maximum values of a multivalued column by first using `MV_MAX` to get the maximum value per row. - ## Limitations -- `MEDIAN_ABSOLUTE_DEVIATION` is non-deterministic, meaning that slightly different results may be returned when using the same data. +- The result of MEDIAN_ABSOLUTE_DEVIATION is usually approximate. +- The function is non-deterministic, so you may get slightly different results when running the same query on the same data. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min.txt index 87f04fef44042..52f1ea67a0610 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min.txt @@ -1,6 +1,6 @@ # MIN -The `MIN` function calculates the minimum value of a field. +The MIN function returns the minimum value of a specified field. ## Syntax @@ -8,26 +8,22 @@ The `MIN` function calculates the minimum value of a field. ### Parameters -#### `field` +#### field -The field for which the minimum value is calculated. +The column for which you want to find the minimum value. ## Examples -Basic Usage +Finds the smallest value in the `languages` column from the `employees` table: ```esql FROM employees | STATS MIN(languages) ``` -Calculate the minimum value of the `languages` field. - -Using Inline Functions +Calculates the minimum of the average salary change per employee in the `employees` table: ```esql FROM employees | STATS min_avg_salary_change = MIN(MV_AVG(salary_change)) -``` - -Calculate the minimum value of the average salary change by first averaging the multiple values per row using the `MV_AVG` function and then applying the `MIN` function. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min_over_time.txt new file mode 100644 index 0000000000000..edb9e7a0007c0 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-min_over_time.txt @@ -0,0 +1,26 @@ +# MIN_OVER_TIME + +Calculates the minimum value of a field over a specified time window. + +## Syntax + +`MIN_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the minimum value for. + +#### window + +The time window over which to compute the minimum. + +## Examples + +Calculates the sum of the minimum network cost within each 1-minute time bucket, grouped by cluster. + +```esql +TS k8s +| STATS cost=SUM(MIN_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-month_name.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-month_name.txt new file mode 100644 index 0000000000000..fec7e4c68531a --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-month_name.txt @@ -0,0 +1,23 @@ +# MONTH_NAME + +Returns the month name for the provided date, using the configured Locale. + +## Syntax + +`MONTH_NAME(date)` + +### Parameters + +#### date + +Date expression to extract the month name from. If `null`, the function returns `null`. + +## Examples + +Extracts the month name from the provided date value. + +```esql +ROW dt = to_datetime("1996-03-21T00:00:00.000Z") +| EVAL monthName = MONTH_NAME(dt); +``` +This example returns "March" for the given date. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_append.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_append.txt index 2eb0b47ba25b1..87dc66df72473 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_append.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_append.txt @@ -1,6 +1,6 @@ # MV_APPEND -Concatenates the values of two multi-value fields into a single field. +The MV_APPEND function concatenates the values of two multi-value fields into a single multi-value field. ## Syntax @@ -8,28 +8,22 @@ Concatenates the values of two multi-value fields into a single field. ### Parameters -#### `field1` +#### field1 -The first multi-value field to concatenate. +The first multi-value field whose values will be concatenated. -#### `field2` +#### field2 -The second multi-value field to concatenate. +The second multi-value field whose values will be concatenated. ## Examples -```esql -ROW a = ["foo", "bar"], b = ["baz", "qux"] -| EVAL c = MV_APPEND(a, b) -| KEEP a, b, c -``` +Concatenates the birth_date and hire_date fields into a new multi-value field called dates for employees with emp_no 10039 or 10040, and displays the relevant fields. ```esql -ROW x = [1, 2, 3], y = [4, 5, 6] -| EVAL z = MV_APPEND(x, y) -| KEEP x, y, z +FROM employees +| WHERE emp_no == 10039 OR emp_no == 10040 +| SORT emp_no +| EVAL dates = MV_APPEND(birth_date, hire_date) +| KEEP emp_no, birth_date, hire_date, dates ``` - -## Limitations - -No specific limitations are mentioned in the source documentation. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_avg.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_avg.txt index 0cd9a66568de7..170a6c706d5ba 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_avg.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_avg.txt @@ -1,6 +1,6 @@ # MV_AVG -Converts a multivalued field into a single-valued field containing the average of all its values. +The MV_AVG function converts a multivalued field into a single valued field containing the average of all its values. ## Syntax @@ -8,22 +8,15 @@ Converts a multivalued field into a single-valued field containing the average o ### Parameters -#### `number` +#### number -A multivalued expression. +A multivalue expression containing the values to be averaged. ## Examples +Calculates the average of the numbers in the multivalued field `a` and stores it in a new field `avg_a`: + ```esql ROW a=[3, 5, 1, 6] | EVAL avg_a = MV_AVG(a) -``` - -Calculate the average of the values in the multivalued column `a`. - -```esql -FROM bag_of_numbers -| EVAL min = MV_AVG(numbers) -``` - -Retrieve the average value from a multivalued field +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_concat.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_concat.txt index 91f08db58273d..c226341c085dd 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_concat.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_concat.txt @@ -1,6 +1,6 @@ # MV_CONCAT -Converts a multivalued string expression into a single-valued column by concatenating all values, separated by a specified delimiter. +The MV_CONCAT function converts a multivalued string expression into a single value by concatenating all values, separated by a specified delimiter. ## Syntax @@ -8,30 +8,24 @@ Converts a multivalued string expression into a single-valued column by concaten ### Parameters -#### `string` +#### string -A multivalued expression. +Multivalue expression containing the values to concatenate. -#### `delim` +#### delim -The delimiter used to separate the concatenated values. +Delimiter used to separate the concatenated values. ## Examples -Concatenating string values - +Concatenates the values in the multivalued string column `a` into a single string, separated by a comma and space. ```esql ROW a=["foo", "zoo", "bar"] | EVAL j = MV_CONCAT(a, ", ") ``` -Concatenates the values in the array ["foo", "zoo", "bar"] with a comma and a space as the delimiter: - -Concatenating non-string values - +Concatenates the numeric values in the multivalued column `a` into a single string, separated by a comma and space, after converting them to strings. ```esql ROW a=[10, 9, 8] | EVAL j = MV_CONCAT(TO_STRING(a), ", ") ``` - -Converts the numeric values in the multivalued column `a` to strings using `TO_STRING`, then concatenates them into a single string, separated by `", "`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_contains.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_contains.txt new file mode 100644 index 0000000000000..d46848a7814f7 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_contains.txt @@ -0,0 +1,39 @@ +# MV_CONTAINS + +The MV_CONTAINS function checks if all values from the second multivalue expression are present in the first multivalue expression. It returns a boolean value. Null values are treated as an empty set. + +## Syntax + +`MV_CONTAINS(superset, subset)` + +### Parameters + +#### superset + +A multivalue expression representing the set to be checked for containing all elements of the subset. + +#### subset + +A multivalue expression representing the set of elements to check for presence in the superset. + +## Examples + +Checks whether the single element "a" is present in the multivalue set ["a", "b", "c"] and stores the result in a new column. +```esql +ROW set = ["a", "b", "c"], element = "a" +| EVAL set_contains_element = mv_contains(set, element) +``` + +Determines if setA is a subset of setB and vice versa, storing the boolean results in new columns. +```esql +ROW setA = ["a","c"], setB = ["a", "b", "c"] +| EVAL a_subset_of_b = mv_contains(setB, setA) +| EVAL b_subset_of_a = mv_contains(setA, setB) +``` + +Filters airport records to include only those where the 'type' field contains both "major" and "military" and the 'scalerank' is 9, then keeps selected columns. +```esql +FROM airports +| WHERE mv_contains(type, ["major","military"]) AND scalerank == 9 +| KEEP scalerank, name, country +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_count.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_count.txt index 785bc819a4084..073519ffbf161 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_count.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_count.txt @@ -1,6 +1,6 @@ # MV_COUNT -Converts a multivalued expression into a single-valued column containing the count of the number of values. +The MV_COUNT function converts a multivalued expression into a single valued column that contains the count of values in the multivalued field. ## Syntax @@ -8,21 +8,15 @@ Converts a multivalued expression into a single-valued column containing the cou ### Parameters -#### `field` +#### field -A multivalued expression. +The multivalue expression to be counted. ## Examples +Counts how many values are present in the multivalued column `a` and stores the result in a new column `count_a`: + ```esql ROW a=["foo", "zoo", "bar"] | EVAL count_a = MV_COUNT(a) -``` - -Count the number of values in the multivalued column `a`. - -```esql -FROM bag_of_numbers -| EVAL count = MV_COUNT(numbers) -``` -Count the number of element in a multivalued field `numbers` +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_dedupe.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_dedupe.txt index ae0bd738c85ef..42d6380efa5ac 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_dedupe.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_dedupe.txt @@ -1,8 +1,6 @@ # MV_DEDUPE -Removes duplicate values from a multivalued field. - -**Note:** `MV_DEDUPE` may, but won’t always, sort the values in the column. +Removes duplicate values from a multivalued field. Note that MV_DEDUPE may, but won’t always, sort the values in the column. ## Syntax @@ -10,15 +8,19 @@ Removes duplicate values from a multivalued field. ### Parameters -#### `field` +#### field -A multivalue expression. +A multivalue expression containing the values from which duplicates will be removed. ## Examples +Removes duplicate entries from the array in field `a` and stores the result in a new field `dedupe_a`: + ```esql ROW a=["foo", "foo", "bar", "foo"] | EVAL dedupe_a = MV_DEDUPE(a) ``` -This example removes duplicate values from the multivalued column `a`. +Limitations + +MV_DEDUPE may, but won’t always, sort the values in the column. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_expand.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_expand.txt index db48c902ee699..ba73e1cb2744b 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_expand.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_expand.txt @@ -1,8 +1,6 @@ # MV_EXPAND -The `MV_EXPAND` command expands multivalued columns into one row per value, duplicating other columns. - -> **Note:** This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. +The MV_EXPAND command expands multivalued columns so that each value appears in its own row, duplicating the other columns for each expanded value. ## Syntax @@ -14,29 +12,15 @@ The `MV_EXPAND` command expands multivalued columns into one row per value, dupl The multivalued column to expand. -## Notes - -The output rows produced by `MV_EXPAND` can be in any order and may not respect preceding `SORT` commands. To ensure a specific ordering, place a `SORT` command after any `MV_EXPAND` commands. - - ## Examples -```esql -ROW a=[1,2,3], b="b", j=["a","b"] -| MV_EXPAND a -``` -Expand a multivalued column `a` into individual rows: +Expands each value in the multivalued column `a` into its own row, while duplicating the values of columns `b` and `j` for each expanded value. ```esql ROW a=[1,2,3], b="b", j=["a","b"] | MV_EXPAND a -| MV_EXPAND j ``` -Expand two multivalued columns `a` and `j` into individual rows: -```esql -ROW a=[1,2,3,4,5], b="b" -| MV_EXPAND a -| WHERE a > 2 -``` -Expand a multivalued column and then filtering the results: +## Limitations + +The output rows produced by MV_EXPAND can be in any order and may not respect preceding SORT commands. To guarantee a specific ordering, place a SORT after any MV_EXPAND commands. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_first.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_first.txt index df7a1a5b0769c..d2f0f279b80df 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_first.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_first.txt @@ -1,6 +1,6 @@ # MV_FIRST -Converts a multivalued expression into a single-valued column containing the first value. This is particularly useful when working with functions like `SPLIT` that produce multivalued columns in a known order. +The MV_FIRST function converts a multivalued expression into a single-valued column containing the first value. This is especially useful when working with functions that produce multivalued columns in a known order, such as `SPLIT`. The order in which multivalued fields are read from underlying storage is not guaranteed and is frequently ascending, but you should not rely on this. If you need the minimum value, use `MV_MIN` instead, as it is optimized for sorted values and does not offer a performance benefit over `MV_FIRST`. ## Syntax @@ -8,22 +8,15 @@ Converts a multivalued expression into a single-valued column containing the fir ### Parameters -#### `field` +#### field -A multivalued expression. +A multivalue expression to extract the first value from. ## Examples -Extracting the first value from a multivalued column +Extracts the first value from a semicolon-separated string by splitting the string and selecting the first element. ```esql ROW a="foo;bar;baz" | EVAL first_a = MV_FIRST(SPLIT(a, ";")) ``` - -This example splits the string `a` into multiple values using the `SPLIT` function and extracts the first value, resulting in `first_a = "foo"`. - -## Notes - -- The order in which multivalued fields are read from underlying storage is not guaranteed. While it is often ascending, this behavior should not be relied upon. -- If you need the minimum value, use `MV_MIN` instead of `MV_FIRST`. The `MV_MIN` function is optimized for sorted values and offers no performance disadvantage compared to `MV_FIRST`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_intersection.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_intersection.txt new file mode 100644 index 0000000000000..a232fb47b4b67 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_intersection.txt @@ -0,0 +1,59 @@ +# MV_INTERSECTION + +The MV_INTERSECTION function returns the values that are present in both input fields. If either field is null or if there are no matching values, the function returns `null`. + +## Syntax + +`MV_INTERSECTION(field1, field2)` + +### Parameters + +#### field1 + +Multivalue expression. If this parameter is null, the function returns null. + +#### field2 + +Multivalue expression. If this parameter is null, the function returns null. + +## Examples + +Find the intersection of two arrays of integers and keep the common values. + +```esql +ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6] +| EVAL finalValue = MV_INTERSECTION(a, b) +| KEEP finalValue +``` + +Find the intersection of two arrays of long integers and keep the values present in both arrays. + +```esql +ROW a = [1, 2, 3, 4, 5]::long, b = [2, 3, 4, 5, 6]::long +| EVAL finalValue = MV_INTERSECTION(a, b) +| KEEP finalValue +``` + +Find the intersection of two arrays of boolean values and return the shared values. + +```esql +ROW a = [true, false, false, false], b = [false] +| EVAL finalValue = MV_INTERSECTION(a, b) +| KEEP finalValue +``` + +Find the intersection of two arrays of floating-point numbers and output the values they have in common. + +```esql +ROW a = [5.2, 10.5, 1.12345, 2.6928], b = [10.5, 2.6928] +| EVAL finalValue = MV_INTERSECTION(a, b) +| KEEP finalValue +``` + +Find the intersection of two arrays of strings and display the values that appear in both. + +```esql +ROW a = ["one", "two", "three", "four", "five"], b = ["one", "four"] +| EVAL finalValue = MV_INTERSECTION(a, b) +| KEEP finalValue +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_last.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_last.txt index f01eb8ff2a254..4cf45f20fbfca 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_last.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_last.txt @@ -1,8 +1,6 @@ # MV_LAST -Converts a multivalue expression into a single-valued column containing the last value. This is particularly useful when working with functions that produce multivalued columns in a known order, such as `SPLIT`. - -The order in which multivalued fields are read from underlying storage is not guaranteed. While it is often ascending, this behavior should not be relied upon. If you need the maximum value, use `MV_MAX` instead of `MV_LAST`. `MV_MAX` is optimized for sorted values and does not offer a performance advantage over `MV_LAST`. +The MV_LAST function converts a multivalue expression into a single-valued column containing the last value. This is especially useful when working with functions that produce multivalued columns in a known order, such as `SPLIT`. The order in which multivalued fields are read from storage is not guaranteed and is frequently ascending, but you should not rely on this. If you need the maximum value, use `MV_MAX` instead, as it is optimized for sorted values and offers better performance. ## Syntax @@ -10,17 +8,16 @@ The order in which multivalued fields are read from underlying storage is not gu ### Parameters -#### `field` +#### field -A multivalue expression. +A multivalue expression to be reduced to its last value. ## Examples -Extracting the last value from a multivalued column +Extracts the last value from a semicolon-separated string in column `a` and assigns it to a new column `last_a`. ```esql ROW a="foo;bar;baz" | EVAL last_a = MV_LAST(SPLIT(a, ";")) ``` - -This example splits the string `a` into multiple values using the `SPLIT` function and then extracts the last value, resulting in `last_a = "baz"`. +This example splits the string in column `a` by semicolons and returns the last value, which is `baz`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_max.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_max.txt index 2e6389f779721..8b72b4d8c4bcd 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_max.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_max.txt @@ -1,6 +1,6 @@ # MV_MAX -Converts a multivalued expression into a single-valued column containing the maximum value. +The MV_MAX function converts a multivalued expression into a single valued column containing the maximum value. ## Syntax @@ -8,26 +8,22 @@ Converts a multivalued expression into a single-valued column containing the max ### Parameters -#### `field` +#### field -Multivalue expression. +Multivalue expression to be evaluated for its maximum value. ## Examples +Calculates the maximum value from a list of numbers in a single row. + ```esql ROW a=[3, 5, 1] | EVAL max_a = MV_MAX(a) ``` -Finds the maximum value in the multivalued column `a`, resulting in `max_a = 5`. +Finds the maximum string value from a list by comparing their UTF-8 representations. ```esql -FROM bag_of_numbers -| EVAL max = MV_MAX(numbers) -``` - -Finds the maximum value in the column `a` by comparing the strings' UTF-8 representations, resulting in `max_a = "zoo"`. - -## Supported Types - -This function can be used with any column type, including `keyword` columns. For `keyword` columns, it picks the last string by comparing their UTF-8 representation byte by byte. +ROW a=["foo", "zoo", "bar"] +| EVAL max_a = MV_MAX(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median.txt index cc445b2afd492..6debfcd670e53 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median.txt @@ -1,6 +1,6 @@ # MV_MEDIAN -Converts a multivalued field into a single-valued field containing the median value. +The MV_MEDIAN function converts a multivalued field into a single value containing the median of the values. If the number of values is even, the result is the average of the two middle entries. For non-floating point columns, the average rounds down. ## Syntax @@ -8,22 +8,22 @@ Converts a multivalued field into a single-valued field containing the median va ### Parameters -#### `number` +#### number -Multivalue expression. +A multivalue expression containing the values for which the median will be calculated. ## Examples +Calculates the median value from the array `[3, 5, 1]` and stores it in the field `median_a`: + ```esql ROW a=[3, 5, 1] | EVAL median_a = MV_MEDIAN(a) ``` -Calculate the median value of the multivalued column `a`. +Calculates the median value from the array `[3, 7, 1, 6]`, averaging the two middle values and rounding down if necessary, and stores it in the field `median_a`: ```esql ROW a=[3, 7, 1, 6] | EVAL median_a = MV_MEDIAN(a) -``` - -For rows with an even number of values, the result is the average of the middle two entries. If the column is not of a floating-point type, the average rounds **down**. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median_absolute_deviation.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median_absolute_deviation.txt index 27f3da7e8afde..a2c0c3b879e5b 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median_absolute_deviation.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_median_absolute_deviation.txt @@ -1,6 +1,6 @@ # MV_MEDIAN_ABSOLUTE_DEVIATION -Converts a multivalued field into a single-valued field containing the median absolute deviation. The median absolute deviation is calculated as the median of each data point’s deviation from the median of the entire sample. For a random variable `X`, it is defined as `median(|median(X) - X|)`. +The MV_MEDIAN_ABSOLUTE_DEVIATION function converts a multivalued field into a single value representing the median absolute deviation. This is calculated as the median of the absolute differences between each value and the median of the entire sample. For a random variable `X`, the median absolute deviation is `median(|median(X) - X|)`. If the field contains an even number of values, the median is computed as the average of the two middle values, and if the values are not floating point numbers, the averages are rounded towards zero. ## Syntax @@ -8,22 +8,15 @@ Converts a multivalued field into a single-valued field containing the median ab ### Parameters -#### `number` +#### number -A multivalue expression. +A multivalue expression containing the values for which the median absolute deviation will be calculated. ## Examples -Calculating the median absolute deviation and median +Calculates both the median absolute deviation and the median for the list of values `[0, 2, 5, 6]` and returns them as columns. ```esql ROW values = [0, 2, 5, 6] | EVAL median_absolute_deviation = MV_MEDIAN_ABSOLUTE_DEVIATION(values), median = MV_MEDIAN(values) ``` - -This example calculates the median absolute deviation and the median for the multivalued field `values`. - -## Notes - -- If the field contains an even number of values, the medians are calculated as the average of the middle two values. -- If the values are not floating-point numbers, the averages are rounded towards 0. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_min.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_min.txt index 0b682e55add0f..98f8538538835 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_min.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_min.txt @@ -1,6 +1,6 @@ # MV_MIN -Converts a multivalued expression into a single-valued column containing the minimum value. +The MV_MIN function converts a multivalued expression into a single valued column containing the minimum value. ## Syntax @@ -8,26 +8,22 @@ Converts a multivalued expression into a single-valued column containing the min ### Parameters -#### `field` +#### field -A multivalued expression. - -## Supported Types - -This function can be used with any column type, including `keyword` columns. For `keyword` columns, it selects the first string by comparing their UTF-8 representation byte by byte. +Multivalue expression to be reduced to its minimum value. ## Examples -#```esql +Calculates the minimum value from a list of numbers in the column `a` and stores it in a new column `min_a`: + +```esql ROW a=[2, 1] | EVAL min_a = MV_MIN(a) ``` -Extracts the minimum value from the multivalued column `a`, resulting in `min_a = 1`. +Finds the minimum string value from a list in the column `a`, comparing their UTF-8 representations, and stores it in `min_a`: -#```esql -FROM bag_of_numbers -| EVAL min = MV_MIN(numbers) +```esql +ROW a=["foo", "bar"] +| EVAL min_a = MV_MIN(a) ``` - -Extracts the minimum value from the multivalued column `numbers` by comparing the values lexicographically. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_percentile.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_percentile.txt index ab9e422f24cf7..8e8af8c8308d8 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_percentile.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_percentile.txt @@ -1,6 +1,6 @@ # MV_PERCENTILE -Converts a multivalued field into a single-valued field containing the value at which a certain percentage of observed values occur. +The MV_PERCENTILE function converts a multivalued field into a single valued field containing the value at which a specified percentage of observed values occur. ## Syntax @@ -8,19 +8,19 @@ Converts a multivalued field into a single-valued field containing the value at ### Parameters -#### `number` +#### number -A multivalue expression. +Multivalue expression containing the values to analyze. -#### `percentile` +#### percentile -The percentile to calculate. Must be a number between 0 and 100. Numbers outside this range will return `null`. +The percentile to calculate. Must be a number between 0 and 100. If the value is out of range, the function returns null. ## Examples +Calculates the 50th percentile (median) of an array of values and compares it to the result of the MV_MEDIAN function. + ```esql ROW values = [5, 5, 10, 12, 5000] | EVAL p50 = MV_PERCENTILE(values, 50), median = MV_MEDIAN(values) ``` - -This example calculates the 50th percentile (median) of the multivalued field `values` and compares it to the result of the `MV_MEDIAN` function. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_pseries_weighted_sum.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_pseries_weighted_sum.txt index e5d07132de9c8..f88290729db7c 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_pseries_weighted_sum.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_pseries_weighted_sum.txt @@ -1,6 +1,6 @@ # MV_PSERIES_WEIGHTED_SUM -Converts a multivalued expression into a single-valued column by multiplying each element in the input list by its corresponding term in the P-Series and computing the sum. +The MV_PSERIES_WEIGHTED_SUM function transforms a multivalued expression into a single-valued column by multiplying each element in the input list by its corresponding term in a P-Series and then summing the results. ## Syntax @@ -8,22 +8,20 @@ Converts a multivalued expression into a single-valued column by multiplying eac ### Parameters -#### `number` +#### number -A multivalue expression. +The multivalue expression to be processed. -#### `p` +#### p -A constant number representing the *p* parameter in the P-Series. It determines the impact of each element’s contribution to the weighted sum. +A constant number representing the *p* parameter in the P-Series, which affects each element’s contribution to the weighted sum. ## Examples -Calculating the weighted sum of a multivalued column +Calculates the weighted sum of the list `[70.0, 45.0, 21.0, 21.0, 21.0]` using a P-Series with p set to 1.5 and returns the result in the `sum` column. ```esql ROW a = [70.0, 45.0, 21.0, 21.0, 21.0] | EVAL sum = MV_PSERIES_WEIGHTED_SUM(a, 1.5) | KEEP sum ``` - -This example calculates the weighted sum of the multivalued column `a` using a P-Series parameter of `1.5`. The result is stored in the `sum` column. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_slice.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_slice.txt index 08af90bf8bd9e..3c0728ea7fb3d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_slice.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_slice.txt @@ -1,6 +1,6 @@ # MV_SLICE -The `MV_SLICE` function extracts a subset of a multivalued field based on specified start and end index values. It is particularly useful when working with functions that produce multivalued columns in a known order, such as `SPLIT` or `MV_SORT`. It allows access of specific elements in the multivalue field array.. +The MV_SLICE function returns a subset of a multivalued field using specified start and end index values. This is especially useful when working with functions that produce multivalued columns in a known order, such as `SPLIT` or `MV_SORT`. The order in which multivalued fields are read from storage is not guaranteed and should not be relied upon. ## Syntax @@ -8,44 +8,30 @@ The `MV_SLICE` function extracts a subset of a multivalued field based on specif ### Parameters -#### `field` +#### field -- A multivalue expression. If `null`, the function returns `null`. +Multivalue expression. If this parameter is `null`, the function returns `null`. -#### `start` +#### start -- The starting position of the slice. If `null`, the function returns `null`. -- Can be negative, where `-1` refers to the last value in the list. +Start position. If this parameter is `null`, the function returns `null`. The start argument can be negative; an index of -1 refers to the last value in the list. -#### `end` (Optional) +#### end -- The ending position of the slice (inclusive). If omitted, only the value at the `start` position is returned. -- Can be negative, where `-1` refers to the last value in the list. +End position (included). Optional; if omitted, only the value at the `start` position is returned. The end argument can be negative; an index of -1 refers to the last value in the list. ## Examples -Extracting specific slices from a multivalued field +Extracts the value at index 1 and the values from index 2 to 3 from the multivalued field `a`. ```esql -ROW a = [1, 2, 2, 3] -| EVAL a1 = MV_SLICE(a, 1), a2 = MV_SLICE(a, 2, 3) +row a = [1, 2, 2, 3] +| eval a1 = mv_slice(a, 1), a2 = mv_slice(a, 2, 3) ``` -This example extracts: -- `a1` as the value starting at index `1` (second value in the list). -- `a2` as the values from index `2` to `3` (third and fourth values in the list). - -Using negative indices to slice from the end of the list +Extracts the value at index -2 and the values from index -3 to -1 from the multivalued field `a`. ```esql -ROW a = [1, 2, 2, 3] -| EVAL a1 = MV_SLICE(a, -2), a2 = MV_SLICE(a, -3, -1) +row a = [1, 2, 2, 3] +| eval a1 = mv_slice(a, -2), a2 = mv_slice(a, -3, -1) ``` - -This example extracts: -- `a1` as the value starting at the second-to-last index (`-2`). -- `a2` as the values from the third-to-last index (`-3`) to the last index (`-1`). - -## Notes - -- The order in which multivalued fields are read from underlying storage is not guaranteed. While it is often ascending, this behavior should not be relied upon. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sort.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sort.txt index c3e6b7146268c..0adb842984df3 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sort.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sort.txt @@ -1,6 +1,6 @@ # MV_SORT -Sorts a multivalued field in lexicographical order. +The MV_SORT function sorts the values in a multivalued field in lexicographical order. ## Syntax @@ -8,25 +8,20 @@ Sorts a multivalued field in lexicographical order. ### Parameters -#### `field` +#### field -- Multivalue expression. If `null`, the function returns `null`. +Multivalue expression to be sorted. If the value is `null`, the function returns `null`. -#### `order` +#### order -- Sort order. The valid options are `ASC` and `DESC`. The default is `ASC`. +Optional. Specifies the sort order. Valid options are `ASC` (ascending) and `DESC` (descending). The default is `ASC`. ## Examples +Sorts the values in the array `[4, 2, -3, 2]` in both ascending and descending order, assigning the sorted arrays to `sa` and `sd` respectively. + ```esql ROW a = [4, 2, -3, 2] | EVAL sa = mv_sort(a), sd = mv_sort(a, "DESC") ``` - -This example sorts the multivalued field `a` in ascending order (`sa`) and descending order (`sd`). - - -```esql -FROM bag_of_numbers -| EVAL sorted = MV_SORT(numbers) -``` +This example sorts the array `[4, 2, -3, 2]` in ascending order and descending order, storing the results in `sa` and `sd` respectively. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sum.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sum.txt index de237cc2e918b..63f0b161bbba6 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sum.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_sum.txt @@ -1,6 +1,6 @@ # MV_SUM -Converts a multivalued field into a single-valued field containing the sum of all its values. +The MV_SUM function converts a multivalued field into a single valued field containing the sum of all its values. ## Syntax @@ -8,15 +8,15 @@ Converts a multivalued field into a single-valued field containing the sum of al ### Parameters -#### `number` +#### number -A multivalued expression. +A multivalue expression containing the values to be summed. ## Examples +Calculates the sum of all values in the multivalued field `a` and stores the result in a new field `sum_a`. + ```esql ROW a=[3, 5, 6] | EVAL sum_a = MV_SUM(a) -``` - -This example calculates the sum of the values in the multivalued column `a`. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_union.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_union.txt new file mode 100644 index 0000000000000..9416660a3c013 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_union.txt @@ -0,0 +1,54 @@ +# MV_UNION + +The MV_UNION function returns all unique values from the combined input fields, performing a set union. Null values are treated as empty sets, and the function returns `null` only if both fields are null. + +## Syntax + +`MV_UNION(field1, field2)` + +### Parameters + +#### field1 + +Multivalue expression. Null values are treated as empty sets. + +#### field2 + +Multivalue expression. Null values are treated as empty sets. + +## Examples + +Combine two arrays of integers and return all unique values: +```esql +ROW a = [1, 2, 3, 4, 5], b = [2, 3, 4, 5, 6] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +Combine two arrays of long integers and return all unique values: +```esql +ROW a = [1, 2, 3, 4, 5]::long, b = [2, 3, 4, 5, 6]::long +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +Combine two arrays of boolean values and return all unique values: +```esql +ROW a = [true, false], b = [false] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +Combine two arrays of floating-point numbers and return all unique values: +```esql +ROW a = [5.2, 10.5, 1.12345], b = [10.5, 2.6928] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` + +Combine two arrays of strings and return all unique values: +```esql +ROW a = ["one", "two", "three"], b = ["two", "four"] +| EVAL finalValue = MV_UNION(a, b) +| KEEP finalValue +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_zip.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_zip.txt index c1a982b089c81..e5655b7fad127 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_zip.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-mv_zip.txt @@ -1,6 +1,6 @@ # MV_ZIP -Combines the values from two multivalued fields with a delimiter that joins them together. +The MV_ZIP function combines the values from two multivalued fields, joining each pair of values together using a specified delimiter. ## Syntax @@ -8,32 +8,24 @@ Combines the values from two multivalued fields with a delimiter that joins them ### Parameters -#### `string1` +#### string1 -Multivalue expression. +A multivalue expression representing the first set of values to zip. -#### `string2` +#### string2 -Multivalue expression. +A multivalue expression representing the second set of values to zip. -#### `delim` +#### delim -Optional. The delimiter used to join the values. If omitted, `,` is used as the default delimiter. +Optional. The delimiter used to join each pair of values. If omitted, a comma (`,`) is used by default. ## Examples -Combining two multivalued fields with a custom delimiter +Combines the values from two multivalued fields `a` and `b`, joining each pair with a hyphen and storing the result in field `c`. ```esql ROW a = ["x", "y", "z"], b = ["1", "2"] | EVAL c = mv_zip(a, b, "-") | KEEP a, b, c -``` - -This example combines the values from two multivalued fields `a` and `b` using the `-` delimiter. - -#### Result - -| a | b | c | -|------------------|-------------|----------------| -| ["x", "y", "z"] | ["1", "2"] | ["x-1", "y-2", "z"] | +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-now.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-now.txt index cba069d1fd162..7ea21c2090d4d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-now.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-now.txt @@ -1,4 +1,4 @@ -## NOW +# NOW Returns the current date and time. @@ -12,15 +12,15 @@ This function does not take any parameters. ## Examples -#Retrieve the current date and time +Returns a single row with the current date and time as the value of the `current_date` column: ```esql ROW current_date = NOW() ``` -#Retrieve logs from the last hour +Filters and retrieves log entries from the `sample_data` index where the `@timestamp` is within the last hour: ```esql FROM sample_data | WHERE @timestamp > NOW() - 1 hour -``` +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-operators.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-operators.txt index e8311dc56310f..d074e7673548d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-operators.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-operators.txt @@ -1,20 +1,19 @@ -```markdown -# ES|QL Operators +# ES|QL Operators Reference -This document provides an overview of the operators available in ES|QL, categorized into binary, unary, logical, and other operators. Each operator is accompanied by an example query to demonstrate its usage. +This document provides an overview of the operators available in ES|QL (Elasticsearch Query Language), including binary, unary, logical, and other special operators. Each section includes a brief description and an example ES|QL query. --- -## Binary Operators +## 1. Binary Operators -Binary operators are used to compare or perform arithmetic operations between two values. +Binary operators operate on two expressions. ### Equality (`==`) Checks if two values are equal. ```esql FROM employees -| WHERE first_name == "John" +| WHERE first_name == "Alice" ``` ### Inequality (`!=`) @@ -26,7 +25,7 @@ FROM employees ``` ### Less Than (`<`) -Checks if a value is less than another. +Checks if the left value is less than the right value. ```esql FROM employees @@ -34,7 +33,7 @@ FROM employees ``` ### Less Than or Equal To (`<=`) -Checks if a value is less than or equal to another. +Checks if the left value is less than or equal to the right value. ```esql FROM employees @@ -42,19 +41,19 @@ FROM employees ``` ### Greater Than (`>`) -Checks if a value is greater than another. +Checks if the left value is greater than the right value. ```esql FROM employees -| WHERE age > 30 +| WHERE height > 1.8 ``` ### Greater Than or Equal To (`>=`) -Checks if a value is greater than or equal to another. +Checks if the left value is greater than or equal to the right value. ```esql FROM employees -| WHERE experience_years >= 5 +| WHERE languages >= 3 ``` ### Add (`+`) @@ -62,15 +61,16 @@ Adds two values. ```esql FROM employees -| EVAL total_compensation = salary + bonus +| EVAL total = salary + bonus +| WHERE total > 100000 ``` ### Subtract (`-`) -Subtracts one value from another. +Subtracts the right value from the left value. ```esql FROM employees -| EVAL remaining_vacation_days = total_vacation_days - used_vacation_days +| WHERE vacation_days - used_days > 5 ``` ### Multiply (`*`) @@ -78,63 +78,63 @@ Multiplies two values. ```esql FROM employees -| EVAL annual_salary = monthly_salary * 12 +| WHERE height * 100 > 180 ``` ### Divide (`/`) -Divides one value by another. +Divides the left value by the right value. ```esql FROM employees -| EVAL average_salary = total_salary / employee_count +| WHERE salary / 12 > 4000 ``` ### Modulus (`%`) -Returns the remainder of a division. +Returns the remainder of division. ```esql FROM employees -| EVAL remainder = employee_id % 2 +| WHERE emp_no % 2 == 0 ``` --- -## Unary Operators +## 2. Unary Operators -Unary operators operate on a single operand. +Unary operators operate on a single expression. ### Negation (`-`) Negates a numeric value. ```esql -ROW value = 10 -| EVAL negative_value = -value +ROW a = 5 +| EVAL neg_a = -a ``` --- -## Logical Operators +## 3. Logical Operators -Logical operators are used to combine or negate conditions. +Logical operators are used to combine multiple boolean expressions. ### AND -Returns `true` if both conditions are true. +Returns `true` if both expressions are true. ```esql FROM employees -| WHERE age > 30 AND department == "Engineering" +| WHERE still_hired AND salary > 50000 ``` ### OR -Returns `true` if at least one condition is true. +Returns `true` if at least one expression is true. ```esql FROM employees -| WHERE department == "HR" OR department == "Finance" +| WHERE department == "Engineering" OR department == "IT" ``` ### NOT -Negates a condition. +Negates a boolean expression. ```esql FROM employees @@ -143,19 +143,16 @@ FROM employees --- -## Other Operators +## 4. Other Operators and Predicates -### IS NULL and IS NOT NULL -Checks if a value is `NULL` or not. +### IS NULL / IS NOT NULL +Checks if a value is `null` or not. -#### IS NULL ```esql FROM employees | WHERE birth_date IS NULL -| KEEP first_name, last_name ``` -#### IS NOT NULL ```esql FROM employees | WHERE is_rehired IS NOT NULL @@ -163,11 +160,11 @@ FROM employees ``` ### Cast (`::`) -Casts a value to a specific type. +Casts a value to a different type. ```esql FROM employees -| EVAL salary_as_string = salary::KEYWORD +| EVAL salary_str = salary::keyword ``` ### IN @@ -175,43 +172,54 @@ Checks if a value is in a list of values. ```esql ROW a = 1, b = 4, c = 3 -| WHERE c - a IN (3, b / 2, a) +| WHERE c-a IN (3, b / 2, a) ``` ### LIKE -Filters data based on string patterns using wildcards. +Pattern matching using wildcards (`*` for any number of characters, `?` for a single character). -#Basic usage ```esql FROM employees -| WHERE first_name LIKE "J*" +| WHERE first_name LIKE """?b*""" +| KEEP first_name, last_name ``` -#Escaping special characters ```esql ROW message = "foo * bar" -| WHERE message LIKE "foo \\* bar" +| WHERE message LIKE """foo \* bar""" ``` ### RLIKE -Filters data based on string patterns using regular expressions. +Pattern matching using regular expressions. ```esql FROM employees -| WHERE first_name RLIKE "J.*" +| WHERE last_name RLIKE """^S.*n$""" +| KEEP first_name, last_name ``` -### Cast `::` +--- -The `::` operator provides a convenient alternative syntax to the `TO_` conversion functions. +## 5. Example: Combining Operators -Examples: +You can combine multiple operators in a single query: ```esql FROM employees -| EVAL salary = salary::double +| WHERE (salary > 50000 AND still_hired) OR department IN ("Engineering", "IT") ``` -```esql -ROW ver = CONCAT(("0"::INT + 1)::STRING, ".2.3")::VERSION -``` +--- + +## 6. Notes + +- Wildcards in `LIKE`: `*` matches zero or more characters, `?` matches one character. +- Use triple quotes (`"""`) to avoid escaping special characters in patterns. +- For more advanced pattern matching, use `RLIKE` with regular expressions. +- The `IN` operator can be used with lists of literals, fields, or expressions. +- Use `IS NULL` and `IS NOT NULL` for null checks. +- Use `::` or type conversion functions to cast values. + +--- + +For more details, refer to the [ES|QL Syntax Reference](#). \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-overview.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-overview.txt index c06d1efcd3094..67ede9f6bee13 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-overview.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-overview.txt @@ -1,105 +1,96 @@ ```markdown -# Elasticsearch Query Language (ES|QL) +# ES|QL: The Elasticsearch Query Language -The Elasticsearch Query Language (ES|QL) is a powerful and intuitive language designed to filter, transform, and analyze data stored in Elasticsearch. It is built to be user-friendly and accessible to a wide range of users, including end users, SRE teams, application developers, and administrators. ES|QL enables users to perform complex data operations such as filtering, aggregation, and time-series analysis, as well as generate visualizations and statistical insights. +## Overview -## Key Features of ES|QL +**ES|QL** (Elasticsearch Query Language) is a modern, piped query language designed for filtering, transforming, and analyzing data stored in Elasticsearch. It is intended to be intuitive and accessible for a wide range of users, including SRE teams, application developers, administrators, and end users. ES|QL enables users to author queries for event detection, statistical analysis, and data visualization, supporting a rich set of commands and functions for complex data operations. -- **Pipe-based Syntax**: ES|QL uses a pipe (`|`) syntax to chain operations, where the output of one operation becomes the input for the next. This step-by-step approach simplifies complex data transformations and analysis. -- **Rich Command Set**: ES|QL supports a wide range of commands and functions for data manipulation, including filtering, aggregation, enrichment, and statistical analysis. -- **Ease of Use**: Designed to be easy to learn and use, ES|QL is suitable for both technical and non-technical users. -- **Integration with Elasticsearch**: ES|QL queries are executed directly within Elasticsearch, leveraging its compute engine for high performance and scalability. +## Key Features ---- +- **Piped Syntax:** ES|QL uses the pipe character (`|`) to chain commands, allowing users to build queries as a sequence of operations. Each command processes the output of the previous one, enabling step-by-step data transformation. +- **Wide Command Support:** Includes commands for filtering (`WHERE`), aggregation (`STATS`), enrichment (`ENRICH`, `LOOKUP JOIN`), sorting (`SORT`), column management (`KEEP`, `DROP`, `RENAME`), and more. +- **Functionality:** Supports statistical analysis, time-series operations, pattern matching, and data enrichment. +- **Extensibility:** Designed to work with Elasticsearch and, in the future, other runtimes. -## Known Limitations of ES|QL +## Example ES|QL Query -While ES|QL is a powerful tool, it has some limitations to be aware of: - -### Result Set Size -- By default, ES|QL queries return up to 1,000 rows. This can be increased to a maximum of 10,000 rows using the `LIMIT` command. This upper limit is configurable but comes with trade-offs such as increased memory usage and processing time. - -### Field Types -- ES|QL supports a wide range of field types, including `boolean`, `date`, `keyword`, `text`, `long`, and `double`. However, some field types, such as `binary`, `nested`, and `histogram`, are not yet supported. -- When querying multiple indices, fields with conflicting types must be explicitly converted to a single type using type conversion functions. - -### Full-Text Search -- Full-text search is in technical preview and has limitations. For example, full-text search functions like `MATCH` must be used directly after the `FROM` command or close to it. Additionally, disjunctions (`OR`) in `WHERE` clauses are restricted unless all clauses use full-text functions. - -### Time Series Data Streams -- ES|QL does not currently support querying time series data streams (TSDS). - -### Date Math -- Date math expressions are limited. For example, subtracting two datetime values or using parentheses in date math expressions is not supported. - -### Multivalued Fields -- Functions generally return `null` when applied to multivalued fields unless explicitly documented otherwise. Use multivalue functions to handle such fields. - -### Timezone Support -- ES|QL only supports the UTC timezone. - -### Kibana Integration -- The Discover interface in Kibana has a 10,000-row limit for displayed results and a 50-column limit for displayed fields. These limits apply only to the UI and not to the underlying query execution. - ---- +```esql +FROM logs-* +| WHERE @timestamp >= NOW() - 1 day +| STATS event_count = COUNT(*) BY event.code +| SORT event_count DESC +| LIMIT 10 +``` -## Using ES|QL in Kibana +## Using ES|QL via the REST API -ES|QL is integrated into Kibana, allowing users to query and visualize data directly from the Discover interface. Key points for using ES|QL in Kibana include: +ES|QL queries can be executed using the Elasticsearch REST API. The endpoint is `POST /_query`, and the query is provided in the request body. -- **Enablement**: ES|QL is enabled by default in Kibana but can be disabled via the `enableESQL` setting in Advanced Settings. -- **Query Bar**: The query bar in Discover supports ES|QL syntax, with features like auto-complete and query history for ease of use. -- **Visualization**: ES|QL queries can be used to create visualizations, which can be saved to dashboards or used for alerting. -- **Time Filtering**: Use the standard time filter or custom time parameters (`?_tstart` and `?_tend`) to filter data by time range. +**Example:** -### Example Query in Kibana -```esql -FROM kibana_sample_data_logs -| WHERE @timestamp > NOW() - 1 day -| STATS total_bytes = SUM(bytes) BY geo.dest -| SORT total_bytes DESC -| LIMIT 5 +```http +POST /_query +{ + "query": """ + FROM employees + | WHERE hire_date >= "2020-01-01" + | STATS avg_salary = AVG(salary) BY department + | SORT avg_salary DESC + | LIMIT 5 + """ +} ``` -This query retrieves the top 5 destinations by total bytes in the last 24 hours. +**Notes:** +- You can use triple quotes (`"""`) for multi-line queries. +- Parameters can be passed for dynamic queries. +- The API returns results as a table, with each row representing a document and each column a field. ---- +## ES|QL in Kibana -## Cross-Cluster Querying with ES|QL - -ES|QL supports querying across multiple clusters, enabling users to analyze data stored in different Elasticsearch clusters. To query remote clusters, use the format `:` in the `FROM` command. - -### Example Cross-Cluster Query -```esql -FROM cluster_one:employees,cluster_two:other-employees-* -| STATS avg_salary = AVG(salary) BY department -| SORT avg_salary DESC -``` +- **Editor Shortcuts:** The ES|QL editor in Kibana supports keyboard shortcuts for running queries and commenting lines. +- **Visualization:** Queries in Discover automatically generate visualizations (e.g., bar charts, histograms) based on the query output. If your indices do not have a `@timestamp` field, you can specify a custom time field. +- **Dashboards:** ES|QL queries can be used to create panels in Kibana dashboards. You can save and edit visualizations directly from the dashboard interface. +- **Enrich Policies:** Before using the `ENRICH` command, you must create and execute an enrich policy. Kibana provides UI support for policy creation. +- **Alerting:** ES|QL queries can be used to create alerting rules in Discover. -This query retrieves the average salary by department across two clusters and sorts the results in descending order. +**Important Kibana Limitations:** +- Filtering via the UI is disabled in ES|QL mode; use the `WHERE` command in your query. +- Discover displays a maximum of 10,000 rows and 50 columns per query. +- CSV export is limited to 10,000 rows. +- Querying many indices without filters may result in errors due to large response sizes; use `DROP` or `KEEP` to limit fields. ---- +## Cross-Cluster Querying -## Using the ES|QL REST API +ES|QL supports querying across multiple clusters, indices, data streams, or aliases. To query remote clusters, use the format `:`. -The ES|QL REST API allows users to execute ES|QL queries programmatically. Queries are sent as HTTP POST requests to the `_query` endpoint. +**Example:** -### Example REST API Request -```json -POST /_query -{ - "query": "FROM employees | WHERE salary > 50000 | SORT salary DESC | LIMIT 10" -} +```esql +FROM cluster_one:employees-00001,cluster_two:other-employees-* ``` -### Key Points -- The `query` field contains the ES|QL query as a string. -- Use the `params` field to pass query parameters dynamically. -- The API returns results in JSON format, making it easy to integrate with other applications. - ---- +**Notes:** +- All underlying indices and shards must be active; queries will fail if any are unassigned or paused. +- Field type mismatches across indices must be resolved using type conversion functions (e.g., `TO_IP`, `TO_STRING`). +- Time series data streams (TSDS) are not supported. +- Union types are in technical preview; ambiguous fields must be explicitly converted to a single type. + +## Known Limitations + +- **Result Set Size:** By default, queries return up to 1,000 rows; the maximum is 10,000 rows, configurable via cluster settings. Larger result sets increase memory and processing requirements. +- **Field Types:** Only certain field types are supported (e.g., `boolean`, `date`, `double`, `ip`, `keyword`, `long`, `text`). Unsupported types return errors or `null` values. +- **Full-Text Search:** Full-text functions (`MATCH`, `QSTR`, `KQL`) must be used directly after the `FROM` command. Disjunctions (`OR`) in `WHERE` clauses are limited. +- **Text Fields:** Queries on `text` fields behave as if they are `keyword` fields (case-sensitive, full-string match) unless using full-text functions. +- **Source Field:** ES|QL does not support indices with the `_source` field disabled. +- **Date Math:** Date math expressions must have the datetime on the left; subtracting two datetimes is not supported. +- **Multivalue Fields:** Functions generally return `null` for multivalued fields unless documented otherwise; use multivalue functions to convert to single values. +- **Timezone:** Only UTC is supported. +- **Sorting:** Spatial types are not supported in `SORT` commands. +- **Index Metadata:** Use the `METADATA` directive to include index information in results. +- **Known Issues:** A bug in the `STATS` command may yield incorrect results when grouping by two high-cardinality keyword fields (fixed in recent releases). ## Summary -ES|QL is a versatile and user-friendly query language for Elasticsearch, offering powerful capabilities for data analysis and transformation. While it has some limitations, its integration with Kibana and support for cross-cluster querying make it a valuable tool for a wide range of use cases. Whether you're analyzing logs, building dashboards, or creating alerts, ES|QL provides the flexibility and performance needed to work with Elasticsearch data effectively. +ES|QL is a flexible, high-performance query language for Elasticsearch, supporting advanced data analysis and transformation. It is accessible via REST API, integrates with Kibana for visualization and alerting, and supports cross-cluster querying with some limitations. Users should be aware of result size limits, field type support, and specific behaviors in Kibana and multi-index scenarios. ``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile.txt index aea8b7c6dac28..7f60f82e7e409 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile.txt @@ -1,6 +1,6 @@ # PERCENTILE -The `PERCENTILE` function calculates the value at which a specified percentage of observed values occur. For example, the 95th percentile is the value greater than 95% of the observed values, while the 50th percentile corresponds to the `MEDIAN`. +The PERCENTILE function returns the value at which a specified percentage of observed values occur. For example, the 95th percentile is the value greater than 95% of the observed values, and the 50th percentile is the median. ## Syntax @@ -8,35 +8,35 @@ The `PERCENTILE` function calculates the value at which a specified percentage o ### Parameters -#### `number` +#### number -The numeric field or expression for which the percentile is calculated. +The column or expression containing the numeric values for which you want to calculate the percentile. -#### `percentile` +#### percentile -The percentile value to calculate (e.g., 0 for the minimum, 50 for the median, 100 for the maximum). +The percentile to calculate, specified as a number between 0 and 100. ## Examples -Basic Percentile Calculation +Calculates the 0th (minimum), 50th (median), and 99th percentiles of the salary column in the employees dataset. ```esql FROM employees -| STATS p0 = PERCENTILE(salary, 0), p50 = PERCENTILE(salary, 50), p99 = PERCENTILE(salary, 99) +| STATS p0 = PERCENTILE(salary, 0) + , p50 = PERCENTILE(salary, 50) + , p99 = PERCENTILE(salary, 99) ``` -This example calculates the 0th percentile (minimum), 50th percentile (median), and 99th percentile of the `salary` field. - -Using Inline Functions +Calculates the 80th percentile of the maximum salary change for each employee. ```esql FROM employees | STATS p80_max_salary_change = PERCENTILE(MV_MAX(salary_change), 80) ``` -This example calculates the 80th percentile of the maximum values in a multivalued column `salary_change`. The `MV_MAX` function is used to determine the maximum value per row before applying the `PERCENTILE` function. - -## Notes +## Limitations -- PERCENTILE is usually approximate. -- PERCENTILE is also non-deterministic. This means you can get slightly different results using the same data. +- The PERCENTILE function uses the TDigest algorithm, which calculates approximate percentiles to efficiently handle large datasets. +- Accuracy is proportional to `q(1-q)`, meaning extreme percentiles (such as 99%) are more accurate than less extreme percentiles like the median. +- For small datasets, percentiles are highly accurate, but as the dataset grows, the algorithm trades accuracy for memory savings. +- The function is non-deterministic, so repeated queries on the same data may yield slightly different results. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile_over_time.txt new file mode 100644 index 0000000000000..c460d5fa4f3ab --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-percentile_over_time.txt @@ -0,0 +1,26 @@ +# PERCENTILE_OVER_TIME + +Calculates the percentile over time for a specified field. + +## Syntax + +`PERCENTILE_OVER_TIME(field, percentile)` + +### Parameters + +#### field + +The metric field to calculate the percentile for. + +#### percentile + +The percentile value to compute, specified as a number between 0 and 100. + +## Examples + +Calculates the maximum 95th and 99th percentiles of the `network.cost` field over time, grouping results by cluster and 1-minute time buckets. + +```esql +TS k8s +| STATS p95_cost=MAX(PERCENTILE_OVER_TIME(network.cost, 95)), p99_cost=MAX(PERCENTILE_OVER_TIME(network.cost, 99)) BY cluster, time_bucket = TBUCKET(1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pi.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pi.txt index d3b15417f5208..bba3c69d9bbd0 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pi.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pi.txt @@ -1,6 +1,6 @@ # PI -Returns Pi, the mathematical constant representing the ratio of a circle’s circumference to its diameter. +Returns Pi, the ratio of a circle’s circumference to its diameter. ## Syntax @@ -12,8 +12,8 @@ This function does not take any parameters. ## Examples -Returning the value of Pi +Returns a single row containing the value of Pi. ```esql ROW PI() -``` +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pow.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pow.txt index c5b5f4ef8f86a..4c899e1287394 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pow.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-pow.txt @@ -1,6 +1,6 @@ # POW -The `POW` function calculates the value of a base raised to the power of an exponent. +The POW function returns the value of a base raised to the power of an exponent. If the result overflows the double type, null will be returned. ## Syntax @@ -8,36 +8,30 @@ The `POW` function calculates the value of a base raised to the power of an expo ### Parameters -#### `base` +#### base -Numeric expression for the base. If `null`, the function returns `null`. +Numeric expression for the base. If the value is null, the function returns null. -#### `exponent` +#### exponent -Numeric expression for the exponent. If `null`, the function returns `null`. +Numeric expression for the exponent. If the value is null, the function returns null. ## Examples -Basic usage +Calculates 2 raised to the power of 2 and returns the result. ```esql ROW base = 2.0, exponent = 2 | EVAL result = POW(base, exponent) ``` -Calculate `2.0` raised to the power of `2`. - -Fractional exponent (root calculation) - -The exponent can be a fraction, which is similar to performing a root. For example, an exponent of `0.5` calculates the square root of the base: +Calculates the square root of 4 by raising it to the power of 0.5. ```esql ROW base = 4, exponent = 0.5 | EVAL s = POW(base, exponent) ``` -Calculate the square root of `4` using an exponent of `0.5`. - ## Limitations -- It is possible to overflow a double result when using this function. In such cases, the function will return `null`. +It is possible to overflow a double result; in that case, null will be returned. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-present.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-present.txt new file mode 100644 index 0000000000000..6d8b9bdfdf331 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-present.txt @@ -0,0 +1,37 @@ +# PRESENT + +The PRESENT function returns true if the input expression yields any non-null values within the current aggregation context; otherwise, it returns false. + +## Syntax + +`PRESENT(field)` + +### Parameters + +#### field + +Expression that outputs values to be checked for presence. + +## Examples + +Determines if there are any non-null values in the `languages` field across all employees: + +```esql +FROM employees +| STATS is_present = PRESENT(languages) +``` + +Checks if there are any non-null `salary` values within each group of employees sharing the same `languages` value: + +```esql +FROM employees +| STATS is_present = PRESENT(salary) BY languages +``` + +For the employee with `emp_no` 10020, returns 1 if any non-null `languages` values are present, otherwise returns 0: + +```esql +FROM employees +| WHERE emp_no == 10020 +| STATS is_present = TO_INTEGER(PRESENT(languages)) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-present_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-present_over_time.txt new file mode 100644 index 0000000000000..298419660378a --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-present_over_time.txt @@ -0,0 +1,27 @@ +# PRESENT_OVER_TIME + +Calculates the presence of a field in the output result over a specified time range. + +## Syntax + +`PRESENT_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the presence for. + +#### window + +The time window over which to compute the presence. + +## Examples + +Calculates the maximum presence of the `events_received` field for each pod in the "prod" cluster, grouping results into 2-minute time buckets. + +```esql +TS k8s +| WHERE cluster == "prod" AND pod == "two" +| STATS events_received = MAX(PRESENT_OVER_TIME(events_received)) BY pod, time_bucket = TBUCKET(2 minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-qstr.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-qstr.txt index 68c86ee176ea1..455f61eff039e 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-qstr.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-qstr.txt @@ -1,33 +1,122 @@ -## QSTR +# QSTR -Performs a query string query and returns `true` if the provided query string matches the row. +The QSTR function performs a query string query and returns true if the provided query string matches the row. ## Syntax -`QSTR(query)` +`QSTR(query, options, max_determinized_states, fuzziness, auto_generate_synonyms_phrase_query, phrase_slop, default_field, allow_leading_wildcard, minimum_should_match, fuzzy_transpositions, fuzzy_prefix_length, time_zone, lenient, rewrite, default_operator, analyzer, fuzzy_max_expansions, quote_analyzer, allow_wildcard, boost, quote_field_suffix, enable_position_increments, fields)` ### Parameters -#### `query` +#### query Query string in Lucene query string format. +#### options + +Optional. Additional options for Query String as function named parameters. See query string query for more information. + +#### max_determinized_states + +(integer) Maximum number of automaton states required for the query. Default is 10000. + +#### fuzziness + +(keyword) Maximum edit distance allowed for matching. + +#### auto_generate_synonyms_phrase_query + +(boolean) If true, match phrase queries are automatically created for multi-term synonyms. Defaults to true. + +#### phrase_slop + +(integer) Maximum number of positions allowed between matching tokens for phrases. Defaults to 0 (which means exact matches are required). + +#### default_field + +(keyword) Default field to search if no field is provided in the query string. Supports wildcards (*). + +#### allow_leading_wildcard + +(boolean) If true, the wildcard characters * and ? are allowed as the first character of the query string. Defaults to true. + +#### minimum_should_match + +(string) Minimum number of clauses that must match for a document to be returned. + +#### fuzzy_transpositions + +(boolean) If true, edits for fuzzy matching include transpositions of two adjacent characters (ab → ba). Defaults to true. + +#### fuzzy_prefix_length + +(integer) Number of beginning characters left unchanged for fuzzy matching. Defaults to 0. + +#### time_zone + +(keyword) Coordinated Universal Time (UTC) offset or IANA time zone used to convert date values in the query string to UTC. + +#### lenient + +(boolean) If false, format-based errors, such as providing a text query value for a numeric field, are returned. Defaults to false. + +#### rewrite + +(keyword) Method used to rewrite the query. + +#### default_operator + +(keyword) Default boolean logic used to interpret text in the query string if no operators are specified. + +#### analyzer + +(keyword) Analyzer used to convert the text in the query value into token. Defaults to the index-time analyzer mapped for the default_field. + +#### fuzzy_max_expansions + +(integer) Maximum number of terms to which the query expands for fuzzy matching. Defaults to 50. + +#### quote_analyzer + +(keyword) Analyzer used to convert quoted text in the query string into tokens. Defaults to the search_quote_analyzer mapped for the default_field. + +#### allow_wildcard + +(boolean) If true, the query attempts to analyze wildcard terms in the query string. Defaults to false. + +#### boost + +(float) Floating point number used to decrease or increase the relevance scores of the query. + +#### quote_field_suffix + +(keyword) Suffix appended to quoted text in the query string. + +#### enable_position_increments + +(boolean) If true, enable position increments in queries constructed from a query_string search. Defaults to true. + +#### fields + +(keyword) Array of fields to search. Supports wildcards (*). + ## Examples -### Example 1 +Filters the books dataset to only include rows where the author field matches "Faulkner". ```esql FROM books | WHERE QSTR("author: Faulkner") -| KEEP book_no, author -| SORT book_no -| LIMIT 5 ``` -This example filters rows where the `author` field matches "Faulkner," keeps the `book_no` and `author` columns, sorts by `book_no`, and limits the output to 5 rows. +Filters the books dataset to include rows where the title is similar to "Hobbjt", allowing up to 2 character changes for fuzzy matching. +```esql +FROM books +| WHERE QSTR("title: Hobbjt~", {"fuzziness": 2}) +``` -### Example 2 +This example filters rows where any field contains the term "Great" and limits the output to 10 rows. ```esql FROM books @@ -35,5 +124,3 @@ FROM books | LIMIT 10 ``` - -This example filters rows where any field contains the term "Great" and limits the output to 10 rows. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rate.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rate.txt new file mode 100644 index 0000000000000..5e26fbf31de48 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rate.txt @@ -0,0 +1,26 @@ +# RATE + +Calculates the per-second average rate of increase of a counter. This function accounts for breaks in monotonicity, such as counter resets when a service restarts, and extrapolates values within each bucketed time interval. Rate is the most appropriate aggregate function for counters and is only allowed in a STATS command under a `TS` source command, to ensure correct application per time series. + +## Syntax + +`RATE(field, window)` + +### Parameters + +#### field + +The counter field whose per-second average rate of increase is computed. + +#### window + +The time window over which the rate is computed. + +## Examples + +Calculates the maximum per-second rate of increase for the `network.total_bytes_in` counter within each 5-minute time bucket. + +```esql +TS k8s +| STATS max_rate=MAX(RATE(network.total_bytes_in)) BY time_bucket = TBUCKET(5minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rename.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rename.txt index 667708b17907b..8e3ea9bc0d582 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rename.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rename.txt @@ -1,39 +1,49 @@ -## RENAME +# RENAME -The `RENAME` command is used to rename one or more columns in a table. If a column with the new name already exists, it will be replaced by the renamed column. +The RENAME command changes the names of one or more columns. If a column with the new name already exists, it will be replaced by the renamed column. Renaming multiple columns in a single RENAME command is equivalent to using multiple sequential RENAME commands. ## Syntax `RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN]` +The following syntax is also supported: +`RENAME new_name1 = old_name1[, ..., new_nameN = old_nameN]` + +Both syntax options can be used interchangeably, but it is recommended to stick to one for consistency and readability. + ### Parameters -#### `old_nameX` +#### old_nameX The name of the column you want to rename. -#### `new_nameX` +#### new_nameX -The new name for the column. If it conflicts with an existing column name, the existing column is dropped. If multiple columns are renamed to the same name, all but the rightmost column with the same new name are dropped. +The new name for the column. If the new name conflicts with an existing column, the existing column is dropped. If multiple columns are renamed to the same name, all but the rightmost column with the same new name are dropped. ## Examples -### Rename a single column - -Rename the `still_hired` column to `employed`: +Renames the `still_hired` column to `employed` after selecting specific columns. ```esql FROM employees | KEEP first_name, last_name, still_hired -| RENAME still_hired AS employed +| RENAME still_hired AS employed ``` -### Rename multiple columns - -Rename `first_name` to `fn` and `last_name` to `ln` in a single command: +Renames `first_name` to `fn` and `last_name` to `ln` in a single command after keeping only those columns. ```esql FROM employees | KEEP first_name, last_name | RENAME first_name AS fn, last_name AS ln +``` + +Renames `first_name` to `fn` and then `last_name` to `ln` using two separate RENAME commands after keeping only those columns. + +```esql +FROM employees +| KEEP first_name, last_name +| RENAME first_name AS fn +| RENAME last_name AS ln ``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-repeat.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-repeat.txt index 9a3e312b4d31a..17b27a5ba592f 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-repeat.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-repeat.txt @@ -1,6 +1,6 @@ # REPEAT -The `REPEAT` function constructs a string by concatenating a given string with itself a specified number of times. +The REPEAT function returns a string created by concatenating the input string with itself a specified number of times. ## Syntax @@ -8,19 +8,19 @@ The `REPEAT` function constructs a string by concatenating a given string with i ### Parameters -#### `string` +#### string -The string to be repeated. +String expression to be repeated. -#### `number` +#### number -The number of times the string should be repeated. +Number of times to repeat the string. ## Examples +Repeats the value in column `a` three times and stores the result in a new column `triple_a`: + ```esql ROW a = "Hello!" | EVAL triple_a = REPEAT(a, 3) -``` - -This example creates a new column `triple_a` by repeating the string `"Hello!"` three times. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-replace.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-replace.txt index 6c1470f3fad41..21e662974862e 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-replace.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-replace.txt @@ -1,6 +1,6 @@ # REPLACE -The `REPLACE` function substitutes any match of a regular expression in a string with a specified replacement string. +The REPLACE function substitutes any match of the regular expression in a string with a specified replacement string. ## Syntax @@ -8,32 +8,30 @@ The `REPLACE` function substitutes any match of a regular expression in a string ### Parameters -#### `string` +#### string -String expression. +The string expression to search and replace within. -#### `regex` +#### regex -Regular expression. +The regular expression pattern to match in the string. -#### `newString` +#### newString -Replacement string. +The string to replace each match of the regular expression. ## Examples -The following example replaces any occurrence of the word "World" with the word "Universe": +Replaces the word "World" with "Universe" in the given string. ```esql ROW str = "Hello World" | EVAL str = REPLACE(str, "World", "Universe") -| KEEP str ``` -Another example could be replacing digits in a string with a specific character: +Removes all spaces from the string by replacing one or more whitespace characters with an empty string. ```esql -ROW str = "User123" -| EVAL str = REPLACE(str, "\\d", "*") -| KEEP str +ROW str = "Hello World" +| EVAL str = REPLACE(str, "\\\\s+", "") ``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rerank.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rerank.txt new file mode 100644 index 0000000000000..c5100512c90da --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rerank.txt @@ -0,0 +1,73 @@ +# RERANK + +The RERANK command uses an inference model to compute a new relevance score for an initial set of documents. It is typically used after filtering and limiting the dataset to a manageable size, balancing performance and accuracy. RERANK processes each row through an inference model, which can impact performance and costs. + +## Syntax + +`RERANK [column =] query ON field [, field, ...] [WITH { "inference_id" : "my_inference_endpoint" }]` + +### Parameters + +#### column + +Optional. The name of the output column that will contain the reranked scores. If not specified, the results are stored in a column named `_score`. If the specified column already exists, it will be overwritten with the new results. + +#### query + +The query text used to rerank the documents. This is usually the same query used in the initial search. + +#### field + +One or more fields to use for reranking. These fields should contain the text that the reranking model will evaluate. + +#### my_inference_endpoint + +The ID of the inference endpoint to use for the reranking task. The endpoint must be configured with the `rerank` task type. + +## Examples + +Rerank the top 100 books matching "hobbit" in the description using a reranking model, then keep only the top 3 titles and their scores. + +```esql +FROM books METADATA _score +| WHERE MATCH(description, "hobbit") +| SORT _score DESC +| LIMIT 100 +| RERANK "hobbit" ON description WITH { "inference_id" : "test_reranker" } +| LIMIT 3 +| KEEP title, _score +``` + +Rerank the top 100 books that match "hobbit" in the description or "Tolkien" as the author, using both fields for reranking, and store the new score in a column named `rerank_score`. + +```esql +FROM books METADATA _score +| WHERE MATCH(description, "hobbit") OR MATCH(author, "Tolkien") +| SORT _score DESC +| LIMIT 100 +| RERANK rerank_score = "hobbit" ON description, author WITH { "inference_id" : "test_reranker" } +| SORT rerank_score +| LIMIT 3 +| KEEP title, _score, rerank_score +``` + +Rerank the top 100 books, combine the original and reranked scores, and keep the top 3 results with both scores. + +```esql +FROM books METADATA _score +| WHERE MATCH(description, "hobbit") OR MATCH(author, "Tolkien") +| SORT _score DESC +| LIMIT 100 +| RERANK rerank_score = "hobbit" ON description, author WITH { "inference_id" : "test_reranker" } +| EVAL original_score = _score, _score = rerank_score + original_score +| SORT _score +| LIMIT 3 +| KEEP title, original_score, rerank_score, _score +``` + +## Limitations + +- Starting in version 9.3.0, RERANK automatically limits processing to 1000 rows by default to prevent accidental high consumption. This limit is applied before the RERANK command executes. You can adjust or disable this limit using the `esql.command.rerank.limit` and `esql.command.rerank.enabled` cluster settings. +- In version 9.2.x, no automatic row limit is applied. It is recommended to use LIMIT before or after RERANK to control the number of documents processed. +- RERANK commands may time out when processing large datasets or complex queries. The default timeout is 10 minutes, and increasing this limit depends on your deployment type. For Elastic Cloud Serverless, a manual override from Elastic Support is required. +- To avoid timeouts and high resource usage, reduce data volume with LIMIT or more selective filters before RERANK, split complex operations into multiple queries, or configure your HTTP client's response timeout. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-reverse.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-reverse.txt index 680fefb3a6504..cbaa2aaf8d033 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-reverse.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-reverse.txt @@ -1,6 +1,6 @@ # REVERSE -The `REVERSE` function returns a new string with the characters of the input string in reverse order. +Returns a new string with the characters of the input string in reverse order. Unicode grapheme clusters are kept together during reversal. ## Syntax @@ -8,36 +8,18 @@ The `REVERSE` function returns a new string with the characters of the input str ### Parameters -#### `str` +#### str -String expression. If `null`, the function returns `null`. +String expression to be reversed. If the value is `null`, the function returns `null`. ## Examples -Reversing a simple string - +Reverses the characters in the string "Some Text". ```esql -ROW message = "Some Text" -| EVAL message_reversed = REVERSE(message); +ROW message = "Some Text" | EVAL message_reversed = REVERSE(message); ``` -| message | message_reversed | -|-----------|------------------| -| Some Text | txeT emoS | - -Reversing a string with emojis - +Reverses the order of the emoji grapheme clusters in the string "💧🪨🔥💨", keeping each cluster intact. ```esql -ROW bending_arts = "💧🪨🔥💨" -| EVAL bending_arts_reversed = REVERSE(bending_arts); +ROW bending_arts = "💧🪨🔥💨" | EVAL bending_arts_reversed = REVERSE(bending_arts); ``` - -| bending_arts | bending_arts_reversed | -|--------------|-----------------------| -| 💧🪨🔥💨 | 💨🔥🪨💧 | - -`REVERSE` works with Unicode and preserves grapheme clusters during reversal. - -## Limitations - -If Elasticsearch is running with a JDK version less than 20, the function may not properly reverse grapheme clusters. For example, "👍🏽😊" might be reversed to "🏽👍😊" instead of the correct "😊👍🏽". Elastic Cloud and the JDK bundled with Elasticsearch use newer JDKs, so this issue typically arises only if an older JDK is explicitly used. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-right.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-right.txt index 5904cc2520b2f..8c6b41d47752c 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-right.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-right.txt @@ -1,6 +1,6 @@ # RIGHT -Returns a substring by extracting a specified number of characters from the right side of a string. +The RIGHT function returns a substring by extracting a specified number of characters from the end (right side) of a string. ## Syntax @@ -8,22 +8,20 @@ Returns a substring by extracting a specified number of characters from the righ ### Parameters -#### `string` +#### string -The string from which to return a substring. +The string from which the substring will be extracted. -#### `length` +#### length -The number of characters to return. +The number of characters to return from the right end of the string. ## Examples +Extracts the last three characters from the `last_name` column for each employee. + ```esql FROM employees | KEEP last_name | EVAL right = RIGHT(last_name, 3) -| SORT last_name ASC -| LIMIT 5 -``` - -Extracts the last three characters from the `last_name` column, sorts the results alphabetically, and limits the output to the first five rows. \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round.txt index a8f5ed668a899..4aaa7a8b4e72d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round.txt @@ -1,6 +1,6 @@ # ROUND -Rounds a number to the specified number of decimal places. By default, it rounds to 0 decimal places, returning the nearest integer. If the precision is a negative number, it rounds to the specified number of digits left of the decimal point. +The ROUND function rounds a number to a specified number of decimal places. By default, it rounds to 0 decimal places, returning the nearest integer. If the precision is negative, it rounds to the number of digits left of the decimal point. ## Syntax @@ -8,31 +8,20 @@ Rounds a number to the specified number of decimal places. By default, it rounds ### Parameters -#### `number` +#### number The numeric value to round. If `null`, the function returns `null`. -#### `decimals` +#### decimals -The number of decimal places to round to. Defaults to 0. If `null`, the function returns `null`. +The number of decimal places to round to. This parameter is optional and defaults to 0. If `null`, the function returns `null`. ## Examples -Rounding a height value to one decimal place +Calculates each employee's height in feet and rounds the result to one decimal place, displaying their first name, last name, and the rounded height. + ```esql FROM employees | KEEP first_name, last_name, height | EVAL height_ft = ROUND(height * 3.281, 1) ``` - -This example converts the `height` column from meters to feet and rounds the result to one decimal place. - -```esql -FROM sales -| KEEP product_name, revenue -| EVAL rounded_revenue = ROUND(revenue, -2) -``` - -## Notes - -If "decimals" is a negative number, the ROUND function rounds to the number of digits left of the decimal point. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round_to.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round_to.txt index cc8c81898cc7f..eb1309ceb8abf 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round_to.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-round_to.txt @@ -1,24 +1,24 @@ # ROUND_TO -The ROUND_TO function rounds a numeric value down to one of a list of fixed points. +The ROUND_TO function rounds a numeric value down to the nearest value from a specified list of fixed points. ## Syntax -ROUND_TO(field, points) +`ROUND_TO(field, points)` ### Parameters #### field -The numeric value to round. If `null`, the function returns `null`. +The numeric value to round. If the value is `null`, the function returns `null`. #### points -Remaining rounding points. Must be constants. +A list of constant values that serve as the rounding points. ## Examples -Group employees by birth date windows, rounding each birth date down to the nearest specified date: +Counts the number of employees, grouping them by their birth date rounded down to the nearest specified date window. ```esql FROM employees @@ -33,4 +33,4 @@ FROM employees "1975-01-01T00:00:00Z"::DATETIME ) | SORT birth_window ASC -``` \ No newline at end of file +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-row.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-row.txt index 3c7bd22843bb5..c6ca5f86a4c9a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-row.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-row.txt @@ -1,6 +1,6 @@ # ROW -The `ROW` command generates a single row with one or more columns, each assigned a specified value. This is particularly useful for testing purposes. +The ROW command generates a single row with one or more columns, each assigned a value you specify. This is useful for testing or creating sample data. ## Syntax @@ -8,36 +8,29 @@ The `ROW` command generates a single row with one or more columns, each assigned ### Parameters -#### `columnX` +#### columnX -The name of the column. -If duplicate column names are provided, only the rightmost duplicate creates a column. +The column name. If duplicate column names are provided, only the rightmost duplicate creates a column. -#### `valueX` +#### valueX The value assigned to the column. This can be a literal, an expression, or a function. ## Examples -Basic usage - -Create a row with three columns, each assigned a specific value: +Creates a single row with three columns: 'a' set to 1, 'b' set to the string "two", and 'c' set to null. ```esql ROW a = 1, b = "two", c = null ``` -Multi-value columns - -Use square brackets to assign multiple values to a single column: +Creates a single row with a column 'a' containing a multi-value array [2, 1]. ```esql ROW a = [2, 1] ``` -Using functions - -Generate a row where a column's value is calculated using a function: +Creates a single row with column 'a' set to the rounded value of 1.23 to zero decimal places. ```esql ROW a = ROUND(1.23, 0) diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rtrim.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rtrim.txt index ad01facba5244..1cd515d9b7ec2 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rtrim.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-rtrim.txt @@ -8,12 +8,14 @@ Removes trailing whitespaces from a string. ### Parameters -#### `string` +#### string -String expression. If `null`, the function returns `null`. +String expression to process. If the value is `null`, the function returns `null`. ## Examples +Removes trailing spaces from the `message` and `color` columns, then adds single quotes around the trimmed results. + ```esql ROW message = " some text ", color = " red " | EVAL message = RTRIM(message) @@ -21,5 +23,3 @@ ROW message = " some text ", color = " red " | EVAL message = CONCAT("'", message, "'") | EVAL color = CONCAT("'", color, "'") ``` - -This example removes trailing whitespaces from the `message` and `color` columns, then wraps the resulting strings in single quotes. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sample.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sample.txt index c4a3b0cab4097..76e1a17a9dffc 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sample.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sample.txt @@ -1,26 +1,29 @@ # SAMPLE -The SAMPLE command selects a random fraction of rows from the table, based on the specified probability. +The SAMPLE function collects a specified number of sample values from a field. ## Syntax -`SAMPLE probability` +`SAMPLE(field, limit)` ### Parameters -#### probability +#### field -The probability that a row is included in the sample. The value must be between 0 and 1, exclusive. +The field from which to collect sample values. + +#### limit + +The maximum number of values to collect. ## Examples +Collects up to 5 sample values from the `gender` field across all employee records. + ```esql FROM employees +| STATS sample = SAMPLE(gender, 5) +```OM employees | KEEP emp_no | SAMPLE 0.05 ``` -Randomly select approximately 5% of the employee numbers from the employees table. - -## Limitations - -This functionality is in technical preview and may be changed or removed in a future release. Features in technical preview are not subject to the support SLA of official GA features. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-scalb.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-scalb.txt new file mode 100644 index 0000000000000..6e386ea629dd9 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-scalb.txt @@ -0,0 +1,25 @@ +# SCALB + +The SCALB function returns the result of multiplying a number by 2 raised to the power of a scale factor, following the behavior of Java's `scalb` function. The result is rounded as if performed by a single correctly rounded floating-point multiply to a member of the double value set. + +## Syntax + +`SCALB(d, scaleFactor)` + +### Parameters + +#### d + +Numeric expression for the multiplier. If `null`, the function returns `null`. + +#### scaleFactor + +Numeric expression for the scale factor. If `null`, the function returns `null`. + +## Examples + +Calculates the value of 3.0 multiplied by 2 to the power of 10 and stores the result in a new column. + +```esql +row x = 3.0, y = 10 | eval z = scalb(x, y) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-score.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-score.txt new file mode 100644 index 0000000000000..c1bd854eb8757 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-score.txt @@ -0,0 +1,23 @@ +# SCORE + +The SCORE function calculates the relevance score of an expression. Only full text functions are scored, and it returns scores for all resulting documents. + +## Syntax + +`SCORE(query)` + +### Parameters + +#### query + +A boolean expression containing full text function(s) to be scored. + +## Examples + +Calculates the relevance score for documents where the title matches "Return" and the author matches "Tolkien", and assigns the score for the title match to a new field. + +```esql +FROM books METADATA _score +| WHERE match(title, "Return") AND match(author, "Tolkien") +| EVAL first_score = score(match(title, "Return")) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sha1.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sha1.txt new file mode 100644 index 0000000000000..a2cd0725e3a40 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sha1.txt @@ -0,0 +1,24 @@ +# SHA1 + +The SHA1 command computes the SHA1 hash of the input value. + +## Syntax + +`SHA1(input)` + +### Parameters + +#### input + +The value to hash. + +## Examples + +Calculates the SHA1 hash of the `message` column for all rows except those where the message is "Connection error", and displays both the original message and its hash. + +```esql +FROM sample_data +| WHERE message != "Connection error" +| EVAL sha1 = sha1(message) +| KEEP message, sha1 +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sha256.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sha256.txt new file mode 100644 index 0000000000000..e85f5ba5c0d02 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sha256.txt @@ -0,0 +1,24 @@ +# SHA256 + +The SHA256 command computes the SHA256 hash of the input value. + +## Syntax + +`SHA256(input)` + +### Parameters + +#### input + +The value to hash. + +## Examples + +Calculates the SHA256 hash of the `message` column for all rows where the message is not "Connection error", and displays both the original message and its hash. + +```esql +FROM sample_data +| WHERE message != "Connection error" +| EVAL sha256 = sha256(message) +| KEEP message, sha256 +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-show.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-show.txt index 6368411d3c9a8..c632225da8e69 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-show.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-show.txt @@ -1,6 +1,6 @@ -## SHOW +# SHOW -The `SHOW` command provides information about the deployment and its capabilities. +The SHOW command returns information about the deployment and its capabilities. ## Syntax @@ -8,13 +8,13 @@ The `SHOW` command provides information about the deployment and its capabilitie ### Parameters -#### `item` +#### item -This parameter can only be `INFO`. +Can only be INFO. ## Examples -Retrieve the deployment’s version, build date, and hash: +Returns the deployment’s version, build date, and hash. ```esql SHOW INFO diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-signum.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-signum.txt index aabe47a38a152..3011cf4604e5d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-signum.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-signum.txt @@ -1,6 +1,6 @@ # SIGNUM -Returns the sign of a given number. It outputs `-1` for negative numbers, `0` for `0`, and `1` for positive numbers. +The SIGNUM function returns the sign of a given number. It outputs `-1` for negative numbers, `0` for zero, and `1` for positive numbers. ## Syntax @@ -8,35 +8,15 @@ Returns the sign of a given number. It outputs `-1` for negative numbers, `0` fo ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression. If the value is `null`, the function returns `null`. ## Examples +Calculates the sign of the value in column `d` and stores the result in column `s`: + ```esql ROW d = 100.0 | EVAL s = SIGNUM(d) -``` - -This example calculates the sign of the number `100.0`. - - -## Notes - -If SORT is used right after a KEEP command, make sure it only uses column names in KEEP, -or move the SORT before the KEEP, e.g. -- not correct: KEEP date | SORT @timestamp, -- correct: SORT @timestamp | KEEP date - -By default, the sorting order is ascending. You can specify an explicit sort order by using `ASC` for ascending or `DESC` for descending. - -If two rows have the same sort key, they are considered equal. You can provide additional sort expressions to act as tie breakers. - -When sorting on multivalued columns, the lowest value is used when sorting in ascending order and the highest value is used when sorting in descending order. - -By default, `null` values are treated as being larger than any other value. This means that with an ascending sort order, `null` values are sorted last, and with a descending sort order, `null` values are sorted first. You can change this by providing `NULLS FIRST` or `NULLS LAST`. - -## Limitations -- **Multivalued Columns**: When sorting on multivalued columns, the lowest value is used for ascending order and the highest value for descending order. -- **Null Values**: By default, null values are treated as larger than any other value. This can be changed using `NULLS FIRST` or `NULLS LAST`. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sin.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sin.txt index 3b4ef78d1bd5b..91ac36d3df856 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sin.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sin.txt @@ -1,6 +1,6 @@ # SIN -Returns the sine of an angle. +The SIN function returns the sine of a given angle, where the angle is specified in radians. ## Syntax @@ -8,15 +8,15 @@ Returns the sine of an angle. ### Parameters -#### `angle` +#### angle -An angle, in radians. If `null`, the function returns `null`. +An angle in radians. If the value is `null`, the function returns `null`. ## Examples +Calculates the sine of the value in column `a` (which is set to 1.8 radians) and stores the result in a new column called `sin`: + ```esql ROW a=1.8 -| EVAL sin = SIN(a) +| EVAL sin=SIN(a) ``` - -Calculate the sine of the angle `1.8` radians and store the result in a new column named `sin`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sinh.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sinh.txt index e1eaf459566b8..e5b7f39ee3690 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sinh.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sinh.txt @@ -10,13 +10,13 @@ Returns the hyperbolic sine of a number. #### number -A numeric expression. If `null`, the function returns `null`. +Numeric expression. If the value is `null`, the function returns `null`. ## Examples +Calculates the hyperbolic sine of the value in column `a` (which is set to 1.8). + ```esql ROW a=1.8 -| EVAL sinh = SINH(a) -``` - -Calculate the hyperbolic sine of the value `1.8`. \ No newline at end of file +| EVAL sinh=SINH(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sort.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sort.txt index ad400c7c6c26a..d0078892fb465 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sort.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sort.txt @@ -1,6 +1,6 @@ -## SORT +# SORT -The `SORT` command organizes a table by one or more columns. +The SORT command arranges the rows of a table based on one or more columns. By default, sorting is in ascending order, but you can specify ascending (ASC) or descending (DESC) order. Additional sort expressions can be used as tie breakers. When sorting multivalued columns, the lowest value is used for ascending order and the highest for descending. Null values are treated as larger than any other value by default, appearing last in ascending order and first in descending order. You can override this behavior with NULLS FIRST or NULLS LAST. ## Syntax @@ -8,15 +8,13 @@ The `SORT` command organizes a table by one or more columns. ### Parameters -#### `columnX` +#### columnX The column to sort on. ## Examples -### Basic sorting - -Sort the table by the `height` column in ascending order (default behavior): +Sorts the employees by their height in ascending order. ```esql FROM employees @@ -24,9 +22,7 @@ FROM employees | SORT height ``` -### Explicitly sorting in descending order with `DESC` - -Sort the table by the `height` column in descending order: +Sorts the employees by their height in descending order. ```esql FROM employees @@ -34,9 +30,7 @@ FROM employees | SORT height DESC ``` -### Providing additional sort expressions to act as tie breakers - -Sort the table by `height` in descending order, and use `first_name` in ascending order as a tie breaker: +Sorts the employees by height in descending order, and uses first name in ascending order to break ties. ```esql FROM employees @@ -44,31 +38,10 @@ FROM employees | SORT height DESC, first_name ASC ``` -### Sorting `null` values first using `NULLS FIRST` - -Sort the table by `first_name` in ascending order, placing `null` values first: +Sorts the employees by first name in ascending order, placing any null values at the top of the list. ```esql FROM employees | KEEP first_name, last_name, height | SORT first_name ASC NULLS FIRST ``` - -## Notes - -If SORT is used right after a KEEP command, make sure it only uses column names in KEEP, -or move the SORT before the KEEP, e.g. -- not correct: KEEP date | SORT @timestamp, -- correct: SORT @timestamp | KEEP date) - -By default, the sorting order is ascending. You can specify an explicit sort order by using `ASC` for ascending or `DESC` for descending. - -If two rows have the same sort key, they are considered equal. You can provide additional sort expressions to act as tie breakers. - -When sorting on multivalued columns, the lowest value is used when sorting in ascending order and the highest value is used when sorting in descending order. - -By default, `null` values are treated as being larger than any other value. This means that with an ascending sort order, `null` values are sorted last, and with a descending sort order, `null` values are sorted first. You can change this by providing `NULLS FIRST` or `NULLS LAST`. - -## Limitations -- **Multivalued Columns**: When sorting on multivalued columns, the lowest value is used for ascending order and the highest value for descending order. -- **Null Values**: By default, null values are treated as larger than any other value. This can be changed using `NULLS FIRST` or `NULLS LAST`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-space.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-space.txt index d34e58ddf1e18..9f12e3e3b09af 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-space.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-space.txt @@ -1,6 +1,6 @@ # SPACE -Returns a string composed of a specified number of spaces. +The SPACE function returns a string consisting of a specified number of space characters. ## Syntax @@ -8,14 +8,14 @@ Returns a string composed of a specified number of spaces. ### Parameters -#### `number` +#### number -The number of spaces to include in the resulting string. +Specifies the number of spaces to include in the result string. ## Examples +Creates a message by concatenating "Hello" and "World!" with a single space in between. + ```esql ROW message = CONCAT("Hello", SPACE(1), "World!"); -``` - -This example creates a string with the word "Hello," followed by a single space, and then the word "World!". +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-split.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-split.txt index 27dc7d36d2bad..65018d9115307 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-split.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-split.txt @@ -1,6 +1,6 @@ # SPLIT -The `SPLIT` function splits a single-valued string into multiple strings based on a specified delimiter. +The SPLIT function divides a single string value into multiple strings using a specified delimiter. ## Syntax @@ -8,19 +8,24 @@ The `SPLIT` function splits a single-valued string into multiple strings based o ### Parameters -#### `string` +#### string -String expression. If `null`, the function returns `null`. +String expression to be split. If the value is `null`, the function returns `null`. -#### `delim` +#### delim -Delimiter used to split the string. Only single-byte delimiters are currently supported. +Delimiter used to split the string. Only single byte delimiters are supported. ## Examples +Splits the value in the `words` column into an array of strings using the semicolon as the delimiter. + ```esql ROW words="foo;bar;baz;qux;quux;corge" | EVAL word = SPLIT(words, ";") ``` +This example splits the string in the `words` column into multiple strings using the semicolon as the delimiter. + +## Limitations -This example splits the string `words` into multiple strings using the semicolon (`;`) as the delimiter. +Only single byte delimiters are supported. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sqrt.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sqrt.txt index aa3694cea87ee..2391bc027b5fa 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sqrt.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sqrt.txt @@ -1,6 +1,6 @@ # SQRT -Returns the square root of a number. The input can be any numeric value, and the return value is always a double. Square roots of negative numbers and infinities are `null`. +The SQRT function returns the square root of a numeric value. The result is always a double. If the input is a negative number or infinity, the function returns null. ## Syntax @@ -8,22 +8,15 @@ Returns the square root of a number. The input can be any numeric value, and the ### Parameters -#### `number` +#### number -Numeric expression. If `null`, the function returns `null`. +Numeric expression. If the value is null, the function returns null. ## Examples +Calculates the square root of 100.0 and stores the result in a new column. + ```esql ROW d = 100.0 | EVAL s = SQRT(d) -``` - -Calculate the square root of the value `100.0`. - -```esql -FROM employees -| KEEP first_name, last_name, height -| EVAL sqrt_height = SQRT(height) -``` -Keep only the first_name, last_name, height columns, and then create a new `sqrt_height` which equals to the square root of all the values in the height column. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_centroid_agg.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_centroid_agg.txt index 8b7a421e99e9a..c12c97a2e77db 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_centroid_agg.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_centroid_agg.txt @@ -1,6 +1,6 @@ # ST_CENTROID_AGG -Calculates the spatial centroid over a field with a spatial point geometry type. +Calculates the spatial centroid for a field containing spatial point geometry data. ## Syntax @@ -8,15 +8,15 @@ Calculates the spatial centroid over a field with a spatial point geometry type. ### Parameters -#### `field` +#### field -The field containing spatial point geometry data. +The column containing spatial point geometry values to aggregate. ## Examples +Calculates the centroid point from all airport locations in the dataset. + ```esql FROM airports -| STATS centroid = ST_CENTROID_AGG(location) +| STATS centroid=ST_CENTROID_AGG(location) ``` - -Calculate the spatial centroid of the `location` field in the `airports` dataset. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_contains.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_contains.txt index 87bd34d7aeb55..749be11e927d3 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_contains.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_contains.txt @@ -1,6 +1,6 @@ # ST_CONTAINS -Determines whether the first geometry contains the second geometry. This is the inverse of the `ST_WITHIN` function. +Returns whether the first geometry contains the second geometry. This is the inverse of the ST_WITHIN function. ## Syntax @@ -14,28 +14,15 @@ Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_sh #### geomB -Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must have the same coordinate system as the first. Combining `geo_*` and `cartesian_*` parameters is not allowed. +Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must have the same coordinate system as the first, so you cannot combine `geo_*` and `cartesian_*` parameters. ## Examples -Filtering city boundaries containing a specific polygon +Check if a city boundary contains a given polygon: ```esql FROM airport_city_boundaries | WHERE ST_CONTAINS(city_boundary, TO_GEOSHAPE("POLYGON((109.35 18.3, 109.45 18.3, 109.45 18.4, 109.35 18.4, 109.35 18.3))")) | KEEP abbrev, airport, region, city, city_location ``` - -This query filters city boundaries that contain the specified polygon and keeps selected fields in the output. - -Filtering regional boundaries containing a specific polygon - -```esql -FROM regions -| WHERE ST_CONTAINS(region_boundary, TO_GEOSHAPE("POLYGON((30 10, 40 40, 20 40, 10 20, 30 10))")) -| KEEP region_name, region_code, region_boundary -``` - -## Limitations - -It's important to note that the second parameter must have the same coordinate system as the first. Therefore, it's not possible to combine `geo_*` and `cartesian_*` parameters. +This example filters for city boundaries that contain the specified polygon and keeps selected columns in the output. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_disjoint.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_disjoint.txt index f501fc449dd38..ffeefdd1e32b4 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_disjoint.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_disjoint.txt @@ -1,6 +1,6 @@ # ST_DISJOINT -Determines whether two geometries or geometry columns are disjoint. This is the inverse of the `ST_INTERSECTS` function. Mathematically, two geometries are disjoint if their intersection is empty: `ST_Disjoint(A, B) ⇔ A ⋂ B = ∅`. +Returns whether two geometries or geometry columns are disjoint. This is the inverse of the ST_INTERSECTS function. In mathematical terms: ST_Disjoint(A, B) ⇔ A ⋂ B = ∅ ## Syntax @@ -14,26 +14,15 @@ Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_sh #### geomB -Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must have the same coordinate system as the first. Combining `geo_*` and `cartesian_*` parameters is not supported. +Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must have the same coordinate system as the first; you cannot combine `geo_*` and `cartesian_*` parameters. ## Examples -Filtering disjoint geometries - -The following query filters rows where the `city_boundary` geometry is disjoint from a specified polygon. It keeps only the specified columns in the result. +Check if the city boundary is disjoint from a given polygon: ```esql FROM airport_city_boundaries | WHERE ST_DISJOINT(city_boundary, TO_GEOSHAPE("POLYGON((-10 -60, 120 -60, 120 60, -10 60, -10 -60))")) | KEEP abbrev, airport, region, city, city_location ``` - -```esql -FROM airport_city_boundaries -| WHERE ST_DISJOINT(city_boundary, TO_GEOSHAPE("POLYGON((30 10, 40 40, 20 40, 10 20, 30 10))")) -| KEEP abbrev, airport, region, city, city_location -``` - -## Limitations - -- The two geometries must share the same coordinate system. For example, you cannot mix `geo_*` and `cartesian_*` types in the same function call. +This example filters airport city boundaries to only those that do not intersect with the specified polygon. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_distance.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_distance.txt index 059bd93aa4829..19309d4f69c0e 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_distance.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_distance.txt @@ -1,6 +1,6 @@ # ST_DISTANCE -Computes the distance between two points. For cartesian geometries, it calculates the pythagorean distance in the same units as the original coordinates. For geographic geometries, it computes the circular distance along the great circle in meters. +Computes the distance between two points. For cartesian geometries, this is the pythagorean distance in the same units as the original coordinates. For geographic geometries, this is the circular distance along the great circle in meters. ## Syntax @@ -10,24 +10,19 @@ Computes the distance between two points. For cartesian geometries, it calculate #### geomA -- Expression of type `geo_point` or `cartesian_point`. -- If `null`, the function returns `null`. +Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. #### geomB -- Expression of type `geo_point` or `cartesian_point`. -- If `null`, the function returns `null`. -- Must have the same coordinate system as `geomA`. Combining `geo_point` and `cartesian_point` parameters is not supported. +Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. It is not possible to combine `geo_point` and `cartesian_point` parameters. ## Examples -Calculating the distance between two points +Calculate the distance between the airport location and the city location for Copenhagen Airport: ```esql FROM airports | WHERE abbrev == "CPH" | EVAL distance = ST_DISTANCE(location, city_location) | KEEP abbrev, name, location, city_location, distance -``` - -This example calculates the distance between the airport's location and the city's location for the airport with the abbreviation "CPH". +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_envelope.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_envelope.txt index a400101648a3f..969641e59f153 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_envelope.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_envelope.txt @@ -1,19 +1,20 @@ # ST_ENVELOPE -Determines the minimum bounding box of the supplied geometry. +The ST_ENVELOPE function determines the minimum bounding box of the supplied geometry. ## Syntax -`ST_ENVELOPE(geometry)` +`ST_ENVELOPE(geometry, point, point, point, point)` ### Parameters -#### `geometry` +#### geometry Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If `null`, the function returns `null`. ## Examples +Finds the minimum bounding box for the city boundary of Copenhagen airport and displays the abbreviation, airport name, and the bounding box. ```esql FROM airport_city_boundaries | WHERE abbrev == "CPH" @@ -21,4 +22,11 @@ FROM airport_city_boundaries | KEEP abbrev, airport, envelope ``` -This example calculates the minimum bounding box for the `city_boundary` geometry of the airport with the abbreviation "CPH". \ No newline at end of file +Calculates the minimum and maximum x and y coordinates from the bounding box of Copenhagen airport's city boundary and displays them along with the abbreviation and airport name. +```esql +FROM airport_city_boundaries +| WHERE abbrev == "CPH" +| EVAL envelope = ST_ENVELOPE(city_boundary) +| EVAL xmin = ST_XMIN(envelope), xmax = ST_XMAX(envelope), ymin = ST_YMIN(envelope), ymax = ST_YMAX(envelope) +| KEEP abbrev, airport, xmin, xmax, ymin, ymax +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_extent_agg.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_extent_agg.txt index 2f584794d0674..2282cfdb6b904 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_extent_agg.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_extent_agg.txt @@ -1,6 +1,6 @@ # ST_EXTENT_AGG -Calculates the spatial extent over a field with a geometry type, returning a bounding box for all values of the field. +Calculates the spatial extent over a field containing geometry data, returning a bounding box that encompasses all values in the field. ## Syntax @@ -8,18 +8,16 @@ Calculates the spatial extent over a field with a geometry type, returning a bou ### Parameters -#### `field` +#### field -The field containing geometry data over which the spatial extent is calculated. +The column containing geometry values for which the spatial extent (bounding box) will be calculated. ## Examples -Calculate the spatial extent of airport locations in India +Finds the bounding box that contains all airport locations in India. ```esql FROM airports | WHERE country == "India" | STATS extent = ST_EXTENT_AGG(location) -``` - -This example calculates the bounding box for the `location` field of airports in India. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_intersects.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_intersects.txt index 48eaeb7ec716d..79191b975a7e2 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_intersects.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_intersects.txt @@ -1,6 +1,6 @@ # ST_INTERSECTS -Determines whether two geometries intersect. Two geometries intersect if they share any point in common, including points along lines or within polygons. This function is the inverse of `ST_DISJOINT`. +Returns true if two geometries intersect. They intersect if they have any point in common, including their interior points (points along lines or within polygons). This is the inverse of the ST_DISJOINT function. In mathematical terms: ST_Intersects(A, B) ⇔ A ⋂ B ≠ ∅ ## Syntax @@ -8,21 +8,24 @@ Determines whether two geometries intersect. Two geometries intersect if they sh ### Parameters -#### `geomA` +#### geomA -An expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. +Expression of type `geo_point`, `cartesian_point`, `geo_shape` or `cartesian_shape`. If `null`, the function returns `null`. -#### `geomB` +#### geomB -An expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The coordinate system of `geomB` must match that of `geomA`. Combining `geo_*` and `cartesian_*` parameters is not allowed. +Expression of type `geo_point`, `cartesian_point`, `geo_shape` or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. This means it is not possible to combine `geo_*` and `cartesian_*` parameters. ## Examples -Checking if a location intersects with a polygon +Check if airport locations intersect with a given polygon: ```esql FROM airports | WHERE ST_INTERSECTS(location, TO_GEOSHAPE("POLYGON((42 14, 43 14, 43 15, 42 15, 42 14))")) ``` -This example filters airports to find those whose `location` intersects with the specified polygon. +## Limitations + +- Both parameters must use the same coordinate system; mixing `geo_*` and `cartesian_*` types is not supported. +- If either parameter is `null`, the function returns `null`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_within.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_within.txt index a150fee1b1b4d..dc3ae59213a05 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_within.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_within.txt @@ -1,6 +1,6 @@ # ST_WITHIN -Determines whether the first geometry is within the second geometry. This is the inverse of the `ST_CONTAINS` function. +Returns whether the first geometry is within the second geometry. This is the inverse of the ST_CONTAINS function. ## Syntax @@ -8,22 +8,19 @@ Determines whether the first geometry is within the second geometry. This is the ### Parameters -#### `geomA` +#### geomA -Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. +Expression of type `geo_point`, `cartesian_point`, `geo_shape` or `cartesian_shape`. If `null`, the function returns `null`. -#### `geomB` +#### geomB -Expression of type `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must have the same coordinate system as the first. Combining `geo_*` and `cartesian_*` parameters is not allowed. +Expression of type `geo_point`, `cartesian_point`, `geo_shape` or `cartesian_shape`. If `null`, the function returns `null`. The second parameter must also have the same coordinate system as the first. It is not possible to combine `geo_*` and `cartesian_*` parameters. ## Examples -Filtering rows where a city boundary is within a specified polygon - ```esql FROM airport_city_boundaries | WHERE ST_WITHIN(city_boundary, TO_GEOSHAPE("POLYGON((109.1 18.15, 109.6 18.15, 109.6 18.65, 109.1 18.65, 109.1 18.15))")) | KEEP abbrev, airport, region, city, city_location ``` - -This example filters rows where the `city_boundary` geometry is entirely within the specified polygon. It then keeps only the `abbrev`, `airport`, `region`, `city`, and `city_location` columns. +Filter airport city boundaries to only those within a specified polygon. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_x.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_x.txt index c1b19d393e5cc..68ee0ade8b732 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_x.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_x.txt @@ -1,6 +1,6 @@ # ST_X -Extracts the `x` coordinate from the supplied point. For points of type `geo_point`, this corresponds to the `longitude` value. +Extracts the `x` coordinate from a point. For points of type `geo_point`, this is equivalent to extracting the longitude value. ## Syntax @@ -8,15 +8,16 @@ Extracts the `x` coordinate from the supplied point. For points of type `geo_poi ### Parameters -#### `point` +#### point -Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. +Expression of type `geo_point` or `cartesian_point`. If the value is `null`, the function returns `null`. ## Examples -Extract the `x` (longitude) and `y` (latitude) coordinates from a `geo_point`: +Extract the `x` (longitude) and `y` (latitude) coordinates from a geo point: ```esql ROW point = TO_GEOPOINT("POINT(42.97109629958868 14.7552534006536)") | EVAL x = ST_X(point), y = ST_Y(point) -``` \ No newline at end of file +``` +This example creates a geo point and extracts its longitude and latitude values. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmax.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmax.txt index 12775a0f427e0..47d3418a97bac 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmax.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmax.txt @@ -1,6 +1,6 @@ # ST_XMAX -Extracts the maximum value of the `x` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this corresponds to the maximum `longitude` value. +Extracts the maximum value of the `x` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this is equivalent to extracting the maximum `longitude` value. ## Syntax @@ -8,12 +8,14 @@ Extracts the maximum value of the `x` coordinates from the supplied geometry. Fo ### Parameters -#### `point` +#### point -Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If `null`, the function returns `null`. +Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If the value is `null`, the function returns `null`. ## Examples +Extract the minimum and maximum x and y coordinates from the city boundary of the airport with abbreviation "CPH": + ```esql FROM airport_city_boundaries | WHERE abbrev == "CPH" @@ -22,4 +24,6 @@ FROM airport_city_boundaries | KEEP abbrev, airport, xmin, xmax, ymin, ymax ``` -This example calculates the bounding box of the city boundary for the airport with the abbreviation "CPH" and extracts the minimum and maximum `x` and `y` coordinates. \ No newline at end of file +## Limitations + +- Spatial types are not supported in the `SORT` processing command. Specifying a column of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape` as a sort parameter will result in an error. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmin.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmin.txt index 0f39dbb736d59..36594faa6c37c 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmin.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_xmin.txt @@ -1,6 +1,6 @@ # ST_XMIN -Extracts the minimum value of the `x` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this corresponds to the minimum `longitude` value. +Extracts the minimum value of the `x` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this is equivalent to extracting the minimum longitude value. ## Syntax @@ -8,15 +8,13 @@ Extracts the minimum value of the `x` coordinates from the supplied geometry. Fo ### Parameters -#### `point` +#### point -Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If `null`, the function returns `null`. +Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If the value is `null`, the function returns `null`. ## Examples -Extracting the bounding box coordinates of a city boundary - -The following query calculates the minimum and maximum `x` and `y` coordinates of the bounding box for the city boundary of the airport with the abbreviation "CPH": +Extract the minimum and maximum x and y coordinates from the city boundary envelope for the airport with abbreviation "CPH": ```esql FROM airport_city_boundaries diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_y.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_y.txt index 08eac86b2bd14..02d50f7af5508 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_y.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_y.txt @@ -1,6 +1,6 @@ # ST_Y -Extracts the `y` coordinate from the supplied point. For points of type `geo_point`, this corresponds to the `latitude` value. +Extracts the `y` coordinate from a point. For points of type `geo_point`, this is equivalent to extracting the latitude value. ## Syntax @@ -8,15 +8,15 @@ Extracts the `y` coordinate from the supplied point. For points of type `geo_poi ### Parameters -#### `point` +#### point -Expression of type `geo_point` or `cartesian_point`. If `null`, the function returns `null`. +Expression of type `geo_point` or `cartesian_point`. If the value is `null`, the function returns `null`. ## Examples +Extract the x and y coordinates from a geo point: + ```esql ROW point = TO_GEOPOINT("POINT(42.97109629958868 14.7552534006536)") -| EVAL x = ST_X(point), y = ST_Y(point) +| EVAL x = ST_X(point), y = ST_Y(point) ``` - -This example extracts the `x` (longitude) and `y` (latitude) coordinates from a `geo_point`. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymax.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymax.txt index 09a4dbab6c04d..a7a98a0ae1b9f 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymax.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymax.txt @@ -1,6 +1,6 @@ # ST_YMAX -Extracts the maximum value of the `y` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this corresponds to the maximum `latitude` value. +Extracts the maximum value of the `y` coordinates from the supplied geometry. For `geo_point` or `geo_shape` types, this is equivalent to extracting the maximum latitude value. ## Syntax @@ -8,15 +8,13 @@ Extracts the maximum value of the `y` coordinates from the supplied geometry. Fo ### Parameters -#### `point` +#### point -Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If `null`, the function returns `null`. +Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If the value is `null`, the function returns `null`. ## Examples -Extracting the bounding box coordinates of a city boundary - -The following query calculates the bounding box of the city boundary for the airport with the abbreviation "CPH" and extracts the minimum and maximum `x` and `y` coordinates: +Calculate the minimum and maximum x and y coordinates for the city boundary of the airport with abbreviation "CPH": ```esql FROM airport_city_boundaries @@ -25,3 +23,9 @@ FROM airport_city_boundaries | EVAL xmin = ST_XMIN(envelope), xmax = ST_XMAX(envelope), ymin = ST_YMIN(envelope), ymax = ST_YMAX(envelope) | KEEP abbrev, airport, xmin, xmax, ymin, ymax ``` + +**This example extracts the envelope of the city boundary geometry, then calculates the minimum and maximum x and y coordinates, including the maximum y (latitude) using `ST_YMAX`.** + +## Limitations + +Spatial types such as `geo_point`, `geo_shape`, `cartesian_point`, and `cartesian_shape` are not supported in the `SORT` processing command. Attempting to sort on these types will result in an error. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymin.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymin.txt index 37107e16564cb..1dcf2872f988f 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymin.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-st_ymin.txt @@ -1,6 +1,6 @@ # ST_YMIN -Extracts the minimum value of the `y` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this corresponds to the minimum `latitude` value. +Extracts the minimum value of the `y` coordinates from the supplied geometry. For geometries of type `geo_point` or `geo_shape`, this is equivalent to extracting the minimum `latitude` value. ## Syntax @@ -8,13 +8,13 @@ Extracts the minimum value of the `y` coordinates from the supplied geometry. Fo ### Parameters -#### `point` +#### point -Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If `null`, the function returns `null`. +Expression of type `geo_point`, `geo_shape`, `cartesian_point`, or `cartesian_shape`. If the value is `null`, the function returns `null`. ## Examples -Extracting the minimum `y` coordinate from a geometry +Extract the minimum latitude and other envelope coordinates for the city boundary of the airport with abbreviation "CPH": ```esql FROM airport_city_boundaries @@ -23,5 +23,4 @@ FROM airport_city_boundaries | EVAL xmin = ST_XMIN(envelope), xmax = ST_XMAX(envelope), ymin = ST_YMIN(envelope), ymax = ST_YMAX(envelope) | KEEP abbrev, airport, xmin, xmax, ymin, ymax ``` - -This example calculates the bounding box of a city boundary for the airport with the abbreviation "CPH" and extracts the minimum and maximum `x` and `y` coordinates, keeping only the relevant fields in the output. +This example calculates the minimum and maximum x and y coordinates for the city boundary geometry of the specified airport. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-starts_with.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-starts_with.txt index 31b096518f349..87b7cc8de0d02 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-starts_with.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-starts_with.txt @@ -1,6 +1,6 @@ # STARTS_WITH -Determines whether a keyword string starts with a specified prefix and returns a boolean result. +The STARTS_WITH function returns a boolean value indicating whether a keyword string begins with a specified prefix string. ## Syntax @@ -8,20 +8,20 @@ Determines whether a keyword string starts with a specified prefix and returns a ### Parameters -#### `str` +#### str -String expression. If `null`, the function returns `null`. +The string expression to evaluate. If this value is `null`, the function returns `null`. -#### `prefix` +#### prefix -String expression. If `null`, the function returns `null`. +The string expression to check as the prefix. If this value is `null`, the function returns `null`. ## Examples +Checks whether each employee's last name begins with the letter "B" and adds the result as a new column. + ```esql FROM employees | KEEP last_name | EVAL ln_S = STARTS_WITH(last_name, "B") -``` - -This example checks if the `last_name` column values start with the letter "B". \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats-by.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats-by.txt new file mode 100644 index 0000000000000..e7bceea067c7a --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats-by.txt @@ -0,0 +1,133 @@ +# STATS-BY + +The STATS command groups rows based on shared values and calculates one or more aggregated values over these groups. Each aggregated value can be filtered using a WHERE clause. If BY is omitted, the output contains a single row with aggregations over the entire dataset. Supported aggregation functions include ABSENT, AVG, COUNT, COUNT_DISTINCT, MAX, MEDIAN, MEDIAN_ABSOLUTE_DEVIATION, MIN, PERCENTILE, PRESENT, SAMPLE, ST_CENTROID_AGG, ST_EXTENT_AGG, STD_DEV, SUM, TOP, VALUES, VARIANCE, and WEIGHTED_AVG. When used with the TS source command, time series aggregation functions are also available. Supported grouping functions are BUCKET, TBUCKET, and CATEGORIZE. + +## Syntax + +`STATS [column1 =] expression1 [WHERE boolean_expression1][, ..., [columnN =] expressionN [WHERE boolean_expressionN]] [BY grouping_expression1[, ..., grouping_expressionN]]` + +### Parameters + +#### columnX + +The name for the aggregated value returned. If omitted, the name defaults to the corresponding expression. If multiple columns share the same name, only the rightmost column is kept. + +#### expressionX + +An expression that computes an aggregated value. + +#### grouping_expressionX + +An expression that determines the values to group by. If its name matches a computed column, that column will be ignored. + +#### boolean_expressionX + +(Optional) A condition that must be met for a row to be included in the evaluation of expressionX. + +## Examples + +Calculates the count of employees for each language and sorts the results by language. +```esql +FROM employees +| STATS count = COUNT(emp_no) BY languages +| SORT languages +``` + +Calculates the average value of the languages column across all employees. +```esql +FROM employees +| STATS avg_lang = AVG(languages) +``` + +Calculates both the average and maximum values of the languages column for all employees. +```esql +FROM employees +| STATS avg_lang = AVG(languages), max_lang = MAX(languages) +``` + +Calculates the average salary for employees born before 1960 and those born in or after 1960, grouped by gender, and sorts the results by gender. +```esql +FROM employees +| STATS avg50s = AVG(salary)::LONG WHERE birth_date < "1960-01-01", + avg60s = AVG(salary)::LONG WHERE birth_date >= "1960-01-01" + BY gender +| SORT gender +``` + +Counts the number of employees in different salary ranges: under 40K, between 40K and 60K, over 60K, and the total count. +```esql +FROM employees +| EVAL Ks = salary / 1000 +| STATS under_40K = COUNT(*) WHERE Ks < 40, + inbetween = COUNT(*) WHERE 40 <= Ks AND Ks < 60, + over_60K = COUNT(*) WHERE 60 <= Ks, + total = COUNT(*) +``` + +Calculates the average salary for each combination of hiring year and language, rounds the result, and sorts by hiring year and language. +```esql +FROM employees +| EVAL hired = DATE_FORMAT("yyyy", hire_date) +| STATS avg_salary = AVG(salary) BY hired, languages.long +| EVAL avg_salary = ROUND(avg_salary) +| SORT hired, languages.long +``` + +Demonstrates that if the grouping key is multivalued, the input row is included in all groups for each color. +```esql +ROW price = 10, color = ["blue", "pink", "yellow"] +| STATS SUM(price) BY color +``` + +Shows that if all grouping keys are multivalued, the input row is included in all possible group combinations of color and size. +```esql +ROW price = 10, color = ["blue", "pink", "yellow"], size = ["s", "m", "l"] +| STATS SUM(price) BY color, size +``` + +Illustrates that the input row is included in all groups, including group keys, when grouping by a multivalued column. +```esql +ROW color = ["blue", "pink", "yellow"] +| STATS VALUES(color) BY color +``` + +Demonstrates how to send each group key to the aggregation function by expanding the multivalued column first. +```esql +ROW color = ["blue", "pink", "yellow"] +| MV_EXPAND color +| STATS VALUES(color) BY color +``` + +Calculates the average salary change by first computing the average of salary changes per row and then averaging those values across all employees, rounding the result to 10 decimal places. +```esql +FROM employees +| STATS avg_salary_change = ROUND(AVG(MV_AVG(salary_change)), 10) +``` + +Groups employees by the first letter of their last name and counts the number of employees in each group, then sorts the results. +```esql +FROM employees +| STATS my_count = COUNT() BY LEFT(last_name, 1) +| SORT `LEFT(last_name, 1)` +``` + +Shows that if the output column name is not specified, it defaults to the aggregation expression. +```esql +FROM employees +| STATS AVG(salary) +``` + +Demonstrates how to use backticks to reference column names with special characters and round the average salary. +```esql +FROM employees +| STATS AVG(salary) +| EVAL avg_salary_rounded = ROUND(`AVG(salary)`) +``` + +## Limitations + +- STATS without any groups is significantly faster than when grouping is used. +- Grouping on a single expression is much more optimized than grouping on multiple expressions. Grouping on a single keyword column can be up to five times faster than grouping on two keyword columns. Combining columns with CONCAT and then grouping does not improve performance. +- Individual null values are skipped when computing aggregations. +- If a grouping expression or output column name coincides, the column will be ignored. +- Refer to elasticsearch/issues/134792 for more details on multivalued grouping behavior. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats.txt index 29ff6e3876841..e78a95712bbbc 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stats.txt @@ -1,6 +1,6 @@ # STATS -The `STATS` command groups rows based on a common value and calculates one or more aggregated values over the grouped rows. +The STATS command groups rows by a common value and calculates one or more aggregated values over the grouped rows. You can filter which rows are included in each aggregation using a WHERE clause. If no grouping is specified, the output contains a single row with the aggregation(s) applied over the entire dataset. ## Syntax @@ -8,82 +8,79 @@ The `STATS` command groups rows based on a common value and calculates one or mo ### Parameters -#### `columnX` +#### columnX -The name by which the aggregated value is returned. If omitted, the name defaults to the corresponding expression (`expressionX`). If multiple columns have the same name, all but the rightmost column with this name are ignored. +The name by which the aggregated value is returned. If omitted, the name is equal to the corresponding expression. If multiple columns have the same name, only the rightmost column with this name is used. -#### `expressionX` +#### expressionX An expression that computes an aggregated value. -#### `grouping_expressionX` +#### grouping_expressionX An expression that outputs the values to group by. If its name coincides with one of the computed columns, that column will be ignored. -#### `boolean_expressionX` +#### boolean_expressionX -The condition that must be met for a row to be included in the evaluation of `expressionX`. +(Optional) The condition that must be met for a row to be included in the evaluation of expressionX. + +Individual `null` values are skipped when computing aggregations. ## Examples -### Calculating a statistic and grouping by the values of another column +Calculate the count of employees grouped by language: ```esql FROM employees | STATS count = COUNT(emp_no) BY languages | SORT languages ``` +Groups employees by language and counts the number in each group. -Group rows by the `languages` column and calculate the count of `emp_no` for each group. - -### Omitting `BY` to return one row with the aggregations applied over the entire dataset +Return one row with the average number of languages across all employees: ```esql FROM employees | STATS avg_lang = AVG(languages) ``` +Calculates the average value of the `languages` field for all employees. -Calculate the average number of languages across all rows. - -### Calculating multiple values +Calculate multiple aggregated values: ```esql FROM employees | STATS avg_lang = AVG(languages), max_lang = MAX(languages) ``` +Calculates both the average and maximum number of languages. -Calculate both the average and maximum number of languages. - -### Filtering rows that go into an aggregation using `WHERE` +Filter rows for aggregation using WHERE: ```esql FROM employees -| STATS avg50s = AVG(salary)::LONG WHERE birth_date < "1960-01-01" +| STATS avg50s = AVG(salary)::LONG WHERE birth_date < "1960-01-01", avg60s = AVG(salary)::LONG WHERE birth_date >= "1960-01-01" BY gender +| SORT gender ``` +Calculates average salary for employees born before and after 1960, grouped by gender. -Group rows by `gender` and calculate the average salary for employees born before and after 1960. - -### Mixing aggregations with and without filters, and optional grouping +Mix aggregations with and without filters, and optional grouping: ```esql FROM employees | EVAL Ks = salary / 1000 // thousands -| STATS under_40K = COUNT(*) WHERE Ks < 40, inbetween = COUNT(*) WHERE Ks >= 40 and Ks <= 60, over_60K = COUNT(*) WHERE Ks > 60, total = COUNT(*) +| STATS under_40K = COUNT(*) WHERE Ks < 40, inbetween = COUNT(*) WHERE 40 <= Ks AND Ks < 60, over_60K = COUNT(*) WHERE 60 <= Ks, total = COUNT(*) ``` +Counts employees in different salary ranges and totals. -Calculate counts for salary ranges (under 40K, between 40K and 60K, over 60K) and the total count. - -### Grouping by a multivalued key +If the grouping key is multivalued, the input row is included in all groups: ```esql ROW i=1, a=["a", "b"] | STATS MIN(i) BY a | SORT a ASC ``` +Expands the row into multiple groups for each value in `a`. -Group rows by the multivalued column `a` and calculate the minimum value of `i` for each group. - -### Grouping by multiple values +Group by multiple values: ```esql FROM employees @@ -92,60 +89,53 @@ FROM employees | EVAL avg_salary = ROUND(avg_salary) | SORT hired, languages.long ``` +Calculates average salary grouped by hire year and language. -Group rows by the year of hire and the `languages.long` column, then calculate and round the average salary for each group. - -### Grouping by multiple multivalued keys +If all grouping keys are multivalued, the input row is included in all groups: ```esql ROW i=1, a=["a", "b"], b=[2, 3] | STATS MIN(i) BY a, b | SORT a ASC, b ASC ``` +Expands the row into all combinations of `a` and `b`. -Group rows by the multivalued columns `a` and `b` and calculate the minimum value of `i` for each group. - -### Using functions in aggregating and grouping expressions +Use functions in aggregating and grouping expressions: ```esql FROM employees | STATS avg_salary_change = ROUND(AVG(MV_AVG(salary_change)), 10) ``` +Calculates the average salary change, using `MV_AVG` to handle multivalued fields. -Calculate the average salary change using the `MV_AVG` function and round the result to 10 decimal places. - -### Grouping by an expression +Group by an expression, such as the first letter of last name: ```esql FROM employees | STATS my_count = COUNT() BY LEFT(last_name, 1) | SORT `LEFT(last_name, 1)` ``` +Counts employees grouped by the first letter of their last name. -Group rows by the first letter of the `last_name` column and calculate the count for each group. - -### Specifying the output column name (optional) +Specifying the output column name is optional: ```esql FROM employees | STATS AVG(salary) ``` +Returns a column named `AVG(salary)`. -Calculate the average salary. The output column name defaults to `AVG(salary)`. - -### Using quoted column names in subsequent commands +Quoting column names with special characters: ```esql FROM employees | STATS AVG(salary) | EVAL avg_salary_rounded = ROUND(`AVG(salary)`) ``` - -Use the calculated column `AVG(salary)` in subsequent commands by quoting its name. +Uses backticks to reference a column with special characters in its name. ## Limitations -- Individual `null` values are skipped when computing aggregations. -- `STATS` without any groups is faster than adding a group. -- Grouping on a single expression is more optimized than grouping on multiple expressions. For example, grouping on a single `keyword` column is significantly faster than grouping on two `keyword` columns. -- Avoid combining columns with functions like `CONCAT` for grouping, as it does not improve performance. +- A bug in the STATS command may yield incorrect results when grouping by exactly two keyword fields, where the first field has high cardinality (more than 65,000 distinct values). This issue was introduced in version 8.16.0 and fixed in 8.17.9 and 8.18.7. Workarounds include switching the order of the grouping keys or reducing the cardinality by filtering before STATS. +- Grouping on a single expression is much more optimized than grouping on multiple expressions. +- STATS without any groups is much faster than adding a group. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-std_dev.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-std_dev.txt index 6950518d29a4e..2c4a2c3552316 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-std_dev.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-std_dev.txt @@ -1,6 +1,6 @@ # STD_DEV -Calculates the standard deviation of a numeric field. +Calculates the population standard deviation of a numeric field. ## Syntax @@ -10,24 +10,20 @@ Calculates the standard deviation of a numeric field. #### number -A numeric field for which the standard deviation is calculated. +The numeric column or expression for which to calculate the population standard deviation. ## Examples -Calculate the standard deviation of a field +Calculates the population standard deviation of the height field for all employees. ```esql FROM employees -| STATS STD_DEV(height) +| STATS std_dev_height = STD_DEV(height) ``` -Calculate the standard deviation of the `height` field. - -Use inline functions with STD_DEV +Calculates the population standard deviation of the maximum salary change for each employee. ```esql FROM employees | STATS stddev_salary_change = STD_DEV(MV_MAX(salary_change)) -``` - -Calculate the standard deviation of each employee’s maximum salary changes by first applying the `MV_MAX` function to determine the maximum salary change per row, and then using `STD_DEV` on the result. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stddev_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stddev_over_time.txt new file mode 100644 index 0000000000000..99cff8aecff8d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-stddev_over_time.txt @@ -0,0 +1,26 @@ +# STDDEV_OVER_TIME + +Calculates the population standard deviation of a numeric field over a specified time window. + +## Syntax + +`STDDEV_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the standard deviation for. + +#### window + +The time window over which to compute the standard deviation. + +## Examples + +Calculates the maximum standard deviation of the `network.cost` field over time, grouping results by cluster and 1-minute time buckets. + +```esql +TS k8s +| STATS max_stddev_cost=MAX(STDDEV_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-substring.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-substring.txt index e76ce4936c122..4c3d99501ed2d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-substring.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-substring.txt @@ -1,6 +1,6 @@ # SUBSTRING -The `SUBSTRING` function extracts a portion of a string based on a specified start position and an optional length. +The SUBSTRING function returns a substring from a string, based on a specified start position and an optional length. ## Syntax @@ -8,50 +8,37 @@ The `SUBSTRING` function extracts a portion of a string based on a specified sta ### Parameters -#### `string` +#### string -The string expression to extract the substring from. If `null`, the function returns `null`. +The string expression to extract the substring from. If the value is `null`, the function returns `null`. -#### `start` +#### start -The starting position for the substring. A negative value is interpreted as being relative to the end of the string. +The position in the string where the substring begins. Negative values are interpreted as positions relative to the end of the string. -#### `length` (Optional) +#### length -The length of the substring to extract, starting from the `start` position. If omitted, the function returns all characters from the `start` position to the end of the string. +(Optional) The number of characters to include in the substring, starting from the start position. If omitted, all characters after the start position are returned. ## Examples -Extract the first three characters of every last name - +Extracts the first three characters from each employee's last name. ```esql FROM employees | KEEP last_name | EVAL ln_sub = SUBSTRING(last_name, 1, 3) ``` -This example extracts the first three characters from the `last_name` column. - -Extract the last three characters of every last name - +Extracts the last three characters from each employee's last name by using a negative start position. ```esql FROM employees | KEEP last_name | EVAL ln_sub = SUBSTRING(last_name, -3, 3) ``` -This example extracts the last three characters from the `last_name` column by using a negative start position. - -Extract all characters except for the first - +Extracts all characters from each employee's last name except for the first character by omitting the length parameter. ```esql FROM employees | KEEP last_name | EVAL ln_sub = SUBSTRING(last_name, 2) -``` - -This example extracts all characters from the `last_name` column starting from the second character, as the `length` parameter is omitted. - -## Limitations - -No specific limitations are mentioned for the `SUBSTRING` function. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-suffix operators.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-suffix operators.txt new file mode 100644 index 0000000000000..05748c3e534a0 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-suffix operators.txt @@ -0,0 +1,30 @@ +# SUFFIX OPERATORS + +The `IS NULL` and `IS NOT NULL` operators are used to check whether a column contains a NULL value or not. + +## Syntax + +` IS NULL` + +` IS NOT NULL` + +### Parameters + +#### column + +The column to be checked for NULL values. + +## Examples + +Finds all employees whose `birth_date` field is NULL. +```esql +FROM employees +| WHERE birth_date IS NULL +``` + +Counts the number of employees who have a non-NULL value in the `is_rehired` field. +```esql +FROM employees +| WHERE is_rehired IS NOT NULL +| STATS COUNT(emp_no) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum.txt index 8955e9a099eea..063f17bcb35e7 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum.txt @@ -1,6 +1,6 @@ # SUM -The `SUM` function calculates the total of a numeric expression. +The SUM function calculates the total of a numeric expression. ## Syntax @@ -8,26 +8,20 @@ The `SUM` function calculates the total of a numeric expression. ### Parameters -#### `number` +#### number -A numeric expression to be summed. +The numeric expression to be summed. Inline functions can be used within this parameter. ## Examples -#Summing a field - +Calculates the total sum of all values in the `languages` column from the `employees` data. ```esql FROM employees | STATS SUM(languages) ``` -Calculate the total number of languages across all employees. - -#Using inline functions - +Calculates the sum of each employee’s maximum salary change by first finding the maximum value in the `salary_change` array for each employee, then summing those maximums across all employees. ```esql FROM employees | STATS total_salary_changes = SUM(MV_MAX(salary_change)) -``` - -Calculate the total of each employee’s maximum salary changes by applying the `MV_MAX` function to each row and summing the results. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum_over_time.txt new file mode 100644 index 0000000000000..1d5a0cbadfb1c --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-sum_over_time.txt @@ -0,0 +1,26 @@ +# SUM_OVER_TIME + +Calculates the sum of a field's values over a specified time window. + +## Syntax + +`SUM_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the sum for. + +#### window + +The time window over which to compute the sum. + +## Examples + +Calculates the sum of network cost over each 1-minute interval, grouped by cluster. + +```esql +TS k8s +| STATS sum_cost=SUM(SUM_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-syntax.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-syntax.txt index bbf8063498c39..55982d344e673 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-syntax.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-syntax.txt @@ -1,156 +1,237 @@ ```markdown # ES|QL Syntax Reference -## Overview +The **Elasticsearch Query Language (ES|QL)** provides a powerful, piped syntax for filtering, transforming, and analyzing data stored in Elasticsearch. ES|QL is designed to be easy to learn and use, supporting a wide range of commands, functions, and operators for data operations such as filtering, aggregation, time-series analysis, and more. -The Elasticsearch Query Language (ES|QL) provides a powerful and flexible way to query, filter, transform, and analyze data stored in Elasticsearch. ES|QL uses a piped syntax (`|`) to chain commands and functions, enabling users to compose complex queries in a step-by-step manner. Each query starts with a source command (e.g., `FROM`) and can be followed by one or more processing commands. +--- + +## Basic Syntax + +An ES|QL query is composed of a **source command** followed by an optional series of **processing commands**, separated by a pipe character (`|`). Each command transforms the data, and the result of a query is the table produced by the final processing command. -### Basic Syntax +**Example:** -An ES|QL query is composed of: -1. A **source command**: Retrieves data from indices, data streams, or aliases. -2. A series of **processing commands**: Transform or filter the data. +```esql +FROM employees +| WHERE height > 2 +| SORT last_name +``` -Commands are separated by the pipe character (`|`), and the result of one command is passed as input to the next. For example: +You can write ES|QL queries as a single line or with each command on a new line for readability: ```esql FROM employees | WHERE height > 2 -| SORT height DESC +| SORT last_name ``` -The result of the query is the table produced by the final processing command. +--- -### Identifiers +## Identifiers -Identifiers must be quoted with backticks (`` ` ``) if: -- They don’t start with a letter, `_`, or `@`. -- They contain characters other than letters, numbers, or `_`. +- Identifiers (field or column names) must be quoted with backticks (`` ` ``) if: + - They don’t start with a letter, `_`, or `@` + - Any character is not a letter, number, or `_` + +**Example:** -For example: ```esql FROM index | KEEP `1.field` ``` -### Literals +When referencing a function alias that itself uses a quoted identifier, escape the backticks: -#### String Literals -String literals are enclosed in double quotes (`"`). If the string contains quotes, escape them with `\\` or use triple quotes (`"""`): ```esql -ROW name = """Indiana "Indy" Jones""" +FROM index +| STATS COUNT(`1.field`) +| EVAL my_count = `COUNT(``1.field``)` ``` -#### Numeric Literals -Numeric literals can be expressed in decimal or scientific notation: +--- + +## Literals + +### String Literals + +- Delimited by double quotes (`"`). +- If the string contains quotes, escape them (`\"`). +- Triple quotes (`"""`) can be used for convenience. + +**Examples:** + ```esql -ROW value1 = 1969, value2 = 3.14, value3 = 4E5 +FROM index +| WHERE first_name == "Georgi" + +ROW name == """Indiana "Indy" Jones""" ``` -### Comments +Special characters: +- `\r` (carriage return) +- `\n` (line feed) +- `\t` (tab) + +### Numeric Literals -ES|QL supports C++-style comments: -- Single-line comments: `//` -- Multi-line comments: `/* */` +- Decimal and scientific notation are supported. + +**Examples:** + +```esql +ROW a = 1969, b = 3.14, c = .1234, d = 4E5, e = 1.2e-3, f = -.1e2 +``` + +--- + +## Comments + +- Single line: `//` +- Block: `/* ... */` + +**Examples:** ```esql // Query the employees index FROM employees | WHERE height > 2 + +FROM /* Query the employees index */ employees +| WHERE height > 2 + +FROM employees +/* Query the + * employees + * index */ +| WHERE height > 2 ``` -### Timespan Literals +--- + +## Timespan Literals + +**Timespan literals** express datetime intervals and durations. They are a combination of a number and a temporal unit (e.g., `1 day`, `24h`, `7 weeks`). Whitespace is not significant: + +- `1day` +- `1 day` +- `1 day` + +Timespan literals are used in many ES|QL functions and commands, such as `DATE_TRUNC`, `BUCKET`, and in date math expressions. + +--- + +## Function Named Parameters + +Some functions (like `MATCH`) support named parameters using JSON-like syntax: -Timespan literals represent datetime intervals and are expressed as a combination of a number and a temporal unit (e.g., `1 day`, `24h`, `7 weeks`). They are not whitespace-sensitive: ```esql -1day -1 day -1 day +FROM library +| WHERE match(author, "Frank Herbert", {"minimum_should_match": 2, "operator": "AND"}) +| LIMIT 5 ``` -Timespan literals can be used in various commands and functions, such as `WHERE`, `DATE_TRUNC`, and `BUCKET`. - --- -## Example Queries Using Timespan Literals +# Examples: Using Timespan Literals in ES|QL + +Below are several ES|QL queries demonstrating the use of timespan literals with different commands and functions. Each example uses a different interval or unit, and combines ISO timestamps, `NOW()`, and timespan literals. -Below are five example queries showcasing the use of timespan literals in combination with different commands and functions. +--- -Filtering Logs from the Last 24 Hours -This query retrieves logs from the last 24 hours and calculates the total number of logs per hour. +### 1. Filter Events from the Last 24 Hours ```esql FROM logs-* | WHERE @timestamp >= NOW() - 24h -| EVAL hour = DATE_TRUNC(1 hour, @timestamp) -| STATS log_count = COUNT(*) BY hour -| SORT hour +| SORT @timestamp DESC +| LIMIT 10 ``` +*This query retrieves the 10 most recent log entries from the last 24 hours.* + +--- -Grouping by Weekly Buckets -This query groups employee hire dates into weekly buckets for the last 7 weeks and calculates the number of hires per week. +### 2. Aggregate Events into 1-Hour Buckets for the Last Day ```esql -FROM employees -| WHERE hire_date >= NOW() - 7 weeks -| STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, 1 week) -| SORT week +FROM logs-* +| WHERE @timestamp >= NOW() - 1 day AND @timestamp < NOW() +| STATS event_count = COUNT(*) BY hour_bucket = BUCKET(@timestamp, 1 hour) +| SORT hour_bucket ``` +*This query counts events per hour for the last day, using `BUCKET` with a `1 hour` timespan.* + +--- -Calculating Monthly Averages -This query calculates the average salary of employees grouped by monthly buckets for the year 2023. +### 3. Calculate Average Value per Day for the Last Week ```esql -FROM employees -| WHERE hire_date >= "2023-01-01T00:00:00Z" AND hire_date < "2024-01-01T00:00:00Z" -| EVAL month = DATE_TRUNC(1 month, hire_date) -| STATS avg_salary = AVG(salary) BY month -| SORT month +FROM metrics +| WHERE timestamp >= NOW() - 7 days +| STATS avg_value = AVG(value) BY day = DATE_TRUNC(1 day, timestamp) +| SORT day ``` +*This query calculates the daily average of the `value` field for the past 7 days, using `DATE_TRUNC` with a `1 day` interval.* -Creating Hourly Buckets for the Last Day -This query creates hourly buckets for the last 1 day and calculates the total number of events in each bucket. +--- + +### 4. Group Data into Weekly Buckets Over a Custom Range ```esql -FROM events -| WHERE @timestamp >= NOW() - 1 day -| STATS event_count = COUNT(*) BY hour = BUCKET(@timestamp, 1 hour) -| SORT hour +FROM sales +| WHERE sale_date >= "2024-05-01T00:00:00Z" AND sale_date < "2024-06-01T00:00:00Z" +| STATS total_sales = SUM(amount) BY week = BUCKET(sale_date, 1 week) +| SORT week ``` +*This query sums sales amounts per week for May 2024, using `BUCKET` with a `1 week` interval.* -Filtering and Aggregating by Custom Time Range -This query filters logs within a custom time range and calculates the maximum response time for each 6-hour interval. +--- + +### 5. Find Events in the Last 3 Hours and Truncate to 30-Minute Intervals ```esql -FROM logs-* -| WHERE @timestamp >= "2023-10-01T00:00:00Z" AND @timestamp < "2023-10-02T00:00:00Z" -| EVAL interval = DATE_TRUNC(6 hours, @timestamp) -| STATS max_response_time = MAX(response_time) BY interval +FROM events +| WHERE event_time >= NOW() - 3h +| EVAL interval = DATE_TRUNC(30 minutes, event_time) +| STATS count = COUNT(*) BY interval | SORT interval ``` +*This query counts events in 30-minute intervals for the last 3 hours, using `DATE_TRUNC` with a `30 minutes` timespan.* --- -## Key Features of ES|QL Syntax +### 6. Calculate Median Value in 15-Minute Buckets for the Last 2 Hours -### Named Parameters in Functions -Some functions, like `MATCH`, support named parameters for additional options: ```esql -FROM library -| WHERE MATCH(author, "Frank Herbert", {"minimum_should_match": 2, "operator": "AND"}) -| LIMIT 5 +FROM sensor_data +| WHERE reading_time >= NOW() - 2h +| STATS median_reading = MEDIAN(reading) BY bucket = BUCKET(reading_time, 15 minutes) +| SORT bucket ``` +*This query calculates the median sensor reading in 15-minute buckets for the last 2 hours.* + +--- -### Supported Commands and Functions -ES|QL supports a wide range of commands and functions for filtering, transforming, and analyzing data. For example: -- **Commands**: `FROM`, `WHERE`, `SORT`, `STATS`, `EVAL`, `KEEP`, `DROP`, `LIMIT`, `BUCKET`, `DATE_TRUNC` -- **Functions**: `COUNT`, `AVG`, `MAX`, `MIN`, `DATE_EXTRACT`, `DATE_DIFF`, `CASE` +### 7. Filter and Aggregate Using Weeks and Days -Refer to the [Commands](#commands) and [Functions](#functions) sections for detailed descriptions and examples. +```esql +FROM activity_logs +| WHERE activity_date >= NOW() - 2 weeks +| STATS daily_count = COUNT(*) BY day = DATE_TRUNC(1 day, activity_date) +| SORT day +``` +*This query counts activities per day for the last two weeks.* --- -## Conclusion +## Summary -ES|QL provides a robust and intuitive syntax for querying and analyzing data in Elasticsearch. By leveraging its piped syntax, timespan literals, and rich set of commands and functions, users can perform complex data transformations and aggregations with ease. The examples above demonstrate the flexibility and power of ES|QL, making it a valuable tool for data exploration and analysis. -``` +- **ES|QL** uses a piped syntax for composing queries. +- **Timespan literals** (e.g., `1 day`, `24h`, `7 weeks`) are used for date math, bucketing, and truncation. +- **Commands and functions** such as `BUCKET`, `DATE_TRUNC`, `NOW()`, and `WHERE` support timespan literals. +- **Identifiers** and **literals** follow specific quoting and escaping rules. +- **Comments** can be added using `//` or `/* ... */`. + +For a complete list of commands, functions, and operators, refer to the [ES|QL Reference](#). + +--- +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tan.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tan.txt index 059ea12dcbf12..550bc26f8609d 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tan.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tan.txt @@ -1,6 +1,6 @@ -## TAN +# TAN -The `TAN` function calculates the tangent of a given angle. +The TAN function returns the tangent of a given angle, where the angle is specified in radians. ## Syntax @@ -8,15 +8,15 @@ The `TAN` function calculates the tangent of a given angle. ### Parameters -#### `angle` +#### angle -An angle, in radians. If `null`, the function returns `null`. +An angle in radians. If the value is `null`, the function returns `null`. ## Examples -Calculate the tangent of the angle `1.8` radians: +Calculates the tangent of the value in column `a` (which is set to 1.8 radians) and stores the result in a new column called `tan`: ```esql ROW a=1.8 -| EVAL tan = TAN(a) -``` \ No newline at end of file +| EVAL tan=TAN(a) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tanh.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tanh.txt index 0f623b83e8cfb..451b2c68bceda 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tanh.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tanh.txt @@ -8,15 +8,15 @@ Returns the hyperbolic tangent of a number. ### Parameters -#### `number` +#### number Numeric expression. If `null`, the function returns `null`. ## Examples +Calculates the hyperbolic tangent of the value in column `a` (which is set to 1.8). + ```esql ROW a=1.8 -| EVAL tanh = TANH(a) -``` - -Calculate the hyperbolic tangent of the value `1.8`. \ No newline at end of file +| EVAL tanh=TANH(a) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tau.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tau.txt index 45d8c5cdebdaa..912821de7cdcb 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tau.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tau.txt @@ -1,6 +1,6 @@ # TAU -Returns the ratio of a circle’s circumference to its radius. +The TAU function returns the ratio of a circle’s circumference to its radius, which is a mathematical constant equal to approximately 6.283185307179586. ## Syntax @@ -8,12 +8,12 @@ Returns the ratio of a circle’s circumference to its radius. ### Parameters -This function does not take any parameters. +This function does not require any parameters. ## Examples +Returns the value of the mathematical constant TAU as a single row. + ```esql ROW TAU() -``` - -This example returns the mathematical constant τ (tau). +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tbucket.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tbucket.txt new file mode 100644 index 0000000000000..2c4f2ee0c13df --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-tbucket.txt @@ -0,0 +1,35 @@ +# TBUCKET + +The TBUCKET function creates groups of values, known as buckets, from a `@timestamp` attribute. You must specify the size of each bucket directly, using a time duration or date period. + +## Syntax + +`TBUCKET(buckets)` + +### Parameters + +#### buckets + +The desired bucket size. This must be provided as a time duration or date period, either directly or as a string (for example, `1 hour` or `"1 hour"`). The reference point for bucketing is the epoch, which starts at `0001-01-01T00:00:00Z`. + +## Examples + +Groups the `@timestamp` values into hourly buckets and calculates the minimum and maximum timestamp in each bucket, then sorts the results by the minimum timestamp. + +```esql +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET(1 hour) +| SORT min +``` + +Groups the `@timestamp` values into hourly buckets using a string for the bucket size, calculates the minimum and maximum timestamp in each bucket, and sorts the results by the minimum timestamp. + +```esql +FROM sample_data +| STATS min = MIN(@timestamp), max = MAX(@timestamp) BY bucket = TBUCKET("1 hour") +| SORT min +``` + +## Limitations + +The bucket size must be a time duration or date period, either as a direct value or a string. The reference for bucketing is the epoch (`0001-01-01T00:00:00Z`). \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-text_embedding.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-text_embedding.txt new file mode 100644 index 0000000000000..063156f9e28e9 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-text_embedding.txt @@ -0,0 +1,27 @@ +# TEXT_EMBEDDING + +Generates dense vector embeddings from text input using a specified inference endpoint. This function is useful for creating query vectors for KNN searches against your vectorized data or for other dense vector operations. + +## Syntax + +`TEXT_EMBEDDING(text, inference_id)` + +### Parameters + +#### text + +Text string to generate embeddings from. Must be a non-null literal string value. + +#### inference_id + +Identifier of an existing inference endpoint that will generate the embeddings. The inference endpoint must have the `text_embedding` task type and should use the same model that was used to embed your indexed data. + +## Examples + +Generates a dense vector embedding for the phrase "be excellent to each other" using the 'test_dense_inference' endpoint and performs a KNN search against the `text_embedding_field`. + +```esql +FROM dense_vector_text METADATA _score +| WHERE KNN(text_embedding_field, TEXT_EMBEDDING("be excellent to each other", "test_dense_inference")) +``` +This example generates a dense vector embedding for the phrase "be excellent to each other" using the 'test_dense_inference' endpoint and performs a KNN search against the `text_embedding_field`. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_aggregate_metric_double.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_aggregate_metric_double.txt new file mode 100644 index 0000000000000..92550134963cb --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_aggregate_metric_double.txt @@ -0,0 +1,31 @@ +# TO_AGGREGATE_METRIC_DOUBLE + +The TO_AGGREGATE_METRIC_DOUBLE function encodes a numeric value as an aggregate_metric_double. + +## Syntax + +`TO_AGGREGATE_METRIC_DOUBLE(number)` + +### Parameters + +#### number + +Input value to be encoded. The input can be a single- or multi-valued column or an expression. + +## Examples + +Encode a single numeric value as an aggregate_metric_double: +Encodes the numeric value 3892095203 as an aggregate_metric_double and stores it in a new column. + +```esql +ROW x = 3892095203 +| EVAL agg_metric = TO_AGGREGATE_METRIC_DOUBLE(x) +``` + +Encode multiple numeric values as an aggregate_metric_double: +Encodes the array of numeric values [5032, 11111, 40814] as an aggregate_metric_double and stores it in a new column. + +```esql +ROW x = [5032, 11111, 40814] +| EVAL agg_metric = TO_AGGREGATE_METRIC_DOUBLE(x) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_base64.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_base64.txt index e0c64eba07d58..5b1445fd7b4f9 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_base64.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_base64.txt @@ -1,15 +1,22 @@ -## TO_BASE64 +# TO_BASE64 -Encodes a string to a base64 string. +The TO_BASE64 command encodes a string into its base64 representation. -### Examples +## Syntax + +`TO_BASE64(string)` + +### Parameters + +#### string + +The string value to be encoded. + +## Examples + +Encodes the string "elastic" into its base64 format and stores the result in a new column. ```esql ROW a = "elastic" | EVAL e = TO_BASE64(a) -``` - -```esql -ROW text = "Hello, World!" -| EVAL encoded_text = TO_BASE64(text) ``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_boolean.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_boolean.txt index 1f02e8fb48210..b8156f318558c 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_boolean.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_boolean.txt @@ -1,6 +1,6 @@ # TO_BOOLEAN -Converts an input value to a boolean value. A string value of `true` will be case-insensitively converted to the Boolean `true`. For anything else, including the empty string, the function will return `false`. The numerical value of `0` will be converted to `false`, and anything else will be converted to `true`. +Converts an input value to a boolean value. A string value of `true` is case-insensitively converted to the Boolean `true`. Any other string, including the empty string, returns `false`. The numerical value `0` is converted to `false`, while any other number is converted to `true`. ## Syntax @@ -8,28 +8,15 @@ Converts an input value to a boolean value. A string value of `true` will be cas ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +Input value to be converted. The input can be a single- or multi-valued column or an expression. ## Examples +Converts each string in the list to its boolean equivalent, where only case-insensitive "true" becomes true and all other values become false. + ```esql ROW str = ["true", "TRuE", "false", "", "yes", "1"] | EVAL bool = TO_BOOLEAN(str) -``` - -This example converts a multi-valued string column into boolean values. For instance: -- `"true"` and `"TRuE"` are converted to `true`. -- `"false"`, `""` (empty string), and other non-`true` strings are converted to `false`. -- `"1"` is converted to `true`. - -```esql -ROW num = [0, 1, 2, -1] -| EVAL bool = TO_BOOLEAN(num) -``` - -## Notes - -- A string value of `true` is case-insensitively converted to the boolean `true`. For any other value, including an empty string, the function returns `false`. -- A numerical value of `0` is converted to `false`, while any other numerical value is converted to `true`. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianpoint.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianpoint.txt index 4e5f682fe4ac9..253a97222d270 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianpoint.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianpoint.txt @@ -1,6 +1,6 @@ # TO_CARTESIANPOINT -Converts an input value to a `cartesian_point` value. A string will only be successfully converted if it adheres to the WKT Point format. +Converts an input value to a `cartesian_point` value. A string will only be successfully converted if it follows the WKT Point format. ## Syntax @@ -8,20 +8,17 @@ Converts an input value to a `cartesian_point` value. A string will only be succ ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +Input value to be converted. The input can be a single- or multi-valued column or an expression. ## Examples -Convert WKT-formatted strings to `cartesian_point` values: +Expands a list of WKT Point strings and converts each string to a cartesian_point value: ```esql ROW wkt = ["POINT(4297.11 -1475.53)", "POINT(7580.93 2272.77)"] | MV_EXPAND wkt | EVAL pt = TO_CARTESIANPOINT(wkt) ``` - -## Limitations - -- The input string must strictly follow the WKT Point format for successful conversion. \ No newline at end of file +This example expands a list of WKT Point strings and converts each to a cartesian_point value. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianshape.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianshape.txt index fc19a6fb45a7d..11344c5d106ea 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianshape.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_cartesianshape.txt @@ -1,6 +1,6 @@ # TO_CARTESIANSHAPE -Converts an input value to a `cartesian_shape` value. A string will only be successfully converted if it adheres to the WKT (Well-Known Text) format. +Converts an input value to a `cartesian_shape` value. The conversion is successful for strings that follow the WKT (Well-Known Text) format. ## Syntax @@ -10,21 +10,15 @@ Converts an input value to a `cartesian_shape` value. A string will only be succ #### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +Input value to be converted. The input can be a single- or multi-valued column, or an expression. ## Examples -Converting WKT strings to `cartesian_shape` +Converts a list of WKT strings into individual cartesian shape values by expanding the list and applying the conversion to each element. ```esql ROW wkt = ["POINT(4297.11 -1475.53)", "POLYGON ((3339584.72 1118889.97, 4452779.63 4865942.27, 2226389.81 4865942.27, 1113194.90 2273030.92, 3339584.72 1118889.97))"] | MV_EXPAND wkt | EVAL geom = TO_CARTESIANSHAPE(wkt) ``` - -This example converts a multi-valued column containing WKT strings into `cartesian_shape` values. - -## Notes - -- The input value can be a single or multi-valued column or an expression. -- The function will only successfully convert a string if it adheres to the WKT format. +This example expands a list of WKT strings and converts each to a cartesian shape value. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_date_nanos.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_date_nanos.txt index b4c2d8cdf21ee..c0b1ff40af2f8 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_date_nanos.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_date_nanos.txt @@ -1,6 +1,6 @@ # TO_DATE_NANOS -Converts an input to a nanosecond-resolution date value (`date_nanos`). +Converts an input value to a nanosecond-resolution date value (date_nanos). The valid range for date nanos is from 1970-01-01T00:00:00.000000000Z to 2262-04-11T23:47:16.854775807Z. Values outside this range will result in null and a warning. Integers cannot be converted to date nanos, as their range only covers about 2 seconds after the epoch. ## Syntax @@ -8,31 +8,22 @@ Converts an input to a nanosecond-resolution date value (`date_nanos`). ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column or an expression. ## Examples -Converting a timestamp string to `date_nanos` +Filters the `date_nanos` table to rows where the minimum value in the `nanos` column is before a specific nanosecond timestamp and the `millis` column is after January 1, 2000, then sorts the results by `nanos` in descending order. ```esql -ROW timestamp = "2023-10-26T12:34:56.123456789Z" -| EVAL nanos_date = TO_DATE_NANOS(timestamp) +FROM date_nanos +| WHERE MV_MIN(nanos) < TO_DATE_NANOS("2023-10-23T12:27:28.948Z") + AND millis > "2000-01-01" +| SORT nanos DESC ``` -This example converts a timestamp string into a `date_nanos` value. - -Handling values outside the `date_nanos` range - -```esql -ROW timestamp = "2500-01-01T00:00:00.000000000Z" -| EVAL nanos_date = TO_DATE_NANOS(timestamp) -``` - -This example attempts to convert a timestamp outside the valid `date_nanos` range. The result will be `null` with a warning. - ## Limitations -- The valid range for `date_nanos` is from `1970-01-01T00:00:00.000000000Z` to `2262-04-11T23:47:16.854775807Z`. Values outside this range will result in `null` and trigger a warning. -- Integer values cannot be converted into `date_nanos` because the range of integer nanoseconds only spans approximately 2 seconds after the epoch. +- The valid range for date nanos is 1970-01-01T00:00:00.000000000Z to 2262-04-11T23:47:16.854775807Z. Values outside this range will result in null and a warning. +- Integers cannot be converted to date nanos, as their range only covers about 2 seconds after the epoch. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dateperiod.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dateperiod.txt index 8bcb12953dd7d..0b8d97837ef38 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dateperiod.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dateperiod.txt @@ -1,6 +1,6 @@ # TO_DATEPERIOD -Converts an input value into a `date_period` value. +The TO_DATEPERIOD function converts an input value into a `date_period` value. ## Syntax @@ -8,15 +8,14 @@ Converts an input value into a `date_period` value. ### Parameters -#### `field` +#### field -The input value. This must be a valid constant date period expression. +Input value. The input must be a valid constant date period expression. ## Examples +Adds and subtracts a date period of 3 days to and from a datetime value. ```esql ROW x = "2024-01-01"::datetime -| EVAL y = x + "3 DAYS"::date_period, z = x - TO_DATEPERIOD(`3 days`) -``` - -This example demonstrates how to add and subtract a `date_period` value to/from a datetime field. \ No newline at end of file +| EVAL y = x + "3 DAYS"::date_period, z = x - TO_DATEPERIOD("3 days"); +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_datetime.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_datetime.txt index 5f2d4b6e3b360..79b6825f842e3 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_datetime.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_datetime.txt @@ -1,6 +1,6 @@ # TO_DATETIME -Converts an input value to a date value. A string will only be successfully converted if it follows the format `yyyy-MM-dd'T'HH:mm:ss.SSS'Z'`. For other date formats, use the `DATE_PARSE` function. When converting from nanosecond resolution to millisecond resolution, the nanosecond date is truncated, not rounded. +The TO_DATETIME function converts an input value to a date value. Strings are only converted successfully if they match the format `yyyy-MM-dd'T'HH:mm:ss.SSS'Z'`. For other date formats, use the `DATE_PARSE` function. When converting from nanosecond to millisecond resolution, the nanosecond date is truncated, not rounded. ## Syntax @@ -8,43 +8,27 @@ Converts an input value to a date value. A string will only be successfully conv ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column, or an expression. ## Examples -Converting strings to datetime +Converts each string in a multi-valued column to a datetime, returning `null` for strings that do not match the required format. ```esql ROW string = ["1953-09-02T00:00:00.000Z", "1964-06-02T00:00:00.000Z", "1964-06-02 00:00:00"] | EVAL datetime = TO_DATETIME(string) ``` -In this example, the first two values in the `string` column are successfully converted to datetime values because they follow the required format. However, the last value does not match the format and is converted to `null`. When this happens, a **Warning** header is added to the response, providing details about the failure: - -``` -"Line 1:112: evaluation of [TO_DATETIME(string)] failed, treating result as null. "Only first 20 failures recorded."" -``` - -A subsequent header will include the failure reason and the problematic value: - -``` -"java.lang.IllegalArgumentException: failed to parse date field [1964-06-02 00:00:00] -with format [yyyy-MM-dd'T'HH:mm:ss.SSS'Z']" -``` - -Converting numeric values to datetime - -If the input is numeric, the value is interpreted as milliseconds since the [Unix epoch](https://en.wikipedia.org/wiki/Unix_time). For example: +Converts integer values representing milliseconds since the Unix epoch to datetime values. ```esql ROW int = [0, 1] | EVAL dt = TO_DATETIME(int) ``` -In this example, the numeric values `0` and `1` are converted to datetime values representing the Unix epoch and one millisecond after the epoch, respectively. - -## Notes +## Limitations -- A string will only be successfully converted if it’s respecting the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z'. To convert dates in other formats, use DATE_PARSE. +- Only strings matching the format `yyyy-MM-dd'T'HH:mm:ss.SSS'Z'` are converted; others result in `null`. +- When converting from nanosecond to millisecond resolution, the value is truncated, not rounded. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_degrees.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_degrees.txt index 64b92f493c4c4..c46a56ff7c590 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_degrees.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_degrees.txt @@ -1,6 +1,6 @@ # TO_DEGREES -Converts a number in radians to degrees. +Converts a numeric value from radians to degrees. ## Syntax @@ -8,23 +8,14 @@ Converts a number in radians to degrees. ### Parameters -#### `number` +#### number -The input value to be converted. It can be a single- or multi-valued column or an expression. +Input value to be converted from radians to degrees. The input can be a single- or multi-valued column or an expression. ## Examples -Convert a list of radian values to degrees: - +Converts an array of radian values to their corresponding degree values and stores the result in a new column. ```esql ROW rad = [1.57, 3.14, 4.71] | EVAL deg = TO_DEGREES(rad) -``` - -### Result - -| rad | deg | -|-------|-----------| -| 1.57 | 89.954373 | -| 3.14 | 179.908747| -| 4.71 | 269.86312 | \ No newline at end of file +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dense_vector.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dense_vector.txt new file mode 100644 index 0000000000000..81072728143ca --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_dense_vector.txt @@ -0,0 +1,25 @@ +# TO_DENSE_VECTOR + +The TO_DENSE_VECTOR function converts a multi-valued input of numbers or a hexadecimal string into a dense_vector. + +## Syntax + +`TO_DENSE_VECTOR(field)` + +### Parameters + +#### field + +The multi-valued input of numbers or hexadecimal string to convert. + +## Examples + +Convert an array of integers into a dense_vector: + +Takes an array of integers and converts it into a dense_vector, keeping only the resulting vector. + +```esql +ROW ints = [1, 2, 3] +| EVAL vector = TO_DENSE_VECTOR(ints) +| KEEP vector +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_double.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_double.txt index 2a802fbc1c6a0..13c5f9508a191 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_double.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_double.txt @@ -1,6 +1,6 @@ # TO_DOUBLE -The TO_DOUBLE function converts an input value into a double value. +Converts an input value to a double. If the input is a date type, its value is interpreted as milliseconds since the Unix epoch and converted to double. Boolean `true` is converted to `1.0`, and `false` to `0.0`. ## Syntax @@ -10,16 +10,15 @@ The TO_DOUBLE function converts an input value into a double value. #### field -The input value. This can be a single or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column, or an expression. ## Examples +Converts string and numeric values to double, including handling a non-numeric string which results in a null value and a warning. + ```esql ROW str1 = "5.20128E11", str2 = "foo" | EVAL dbl = TO_DOUBLE("520128000000"), dbl1 = TO_DOUBLE(str1), dbl2 = TO_DOUBLE(str2) ``` -## Notes - -- If the input parameter is of a date type, its value will be interpreted as milliseconds since the Unix epoch and converted to a double. -- A boolean value of true will be converted to a double value of 1.0, and false will be converted to 0.0. +This example converts a string representing a number and a numeric string in scientific notation to double, and attempts to convert a non-numeric string, which results in a `null` value and a warning header in the response. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geohash.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geohash.txt new file mode 100644 index 0000000000000..da4350527f94f --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geohash.txt @@ -0,0 +1,23 @@ +# TO_GEOHASH + +The TO_GEOHASH function converts an input value to a geohash value. The conversion is successful only if the input string follows the geohash format. + +## Syntax + +`TO_GEOHASH(field)` + +### Parameters + +#### field + +The input value to be converted. This can be a single- or multi-valued column, or an expression. + +## Examples + +Converts the string "u3bu" into a geohash value and stores it in a new column. + +```esql +ROW string = "u3bu" +| EVAL geohash = TO_GEOHASH(string) +``` +This example converts the string "u3bu" into a geohash value. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geohex.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geohex.txt new file mode 100644 index 0000000000000..29a2e1180b18b --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geohex.txt @@ -0,0 +1,21 @@ +# TO_GEOHEX + +Converts an input value to a `geohex` value. A string will only be successfully converted if it follows the `geohex` format, as described for the geohex grid aggregation. + +## Syntax + +`TO_GEOHEX(field)` + +### Parameters + +#### field + +Input value to be converted. The input can be a single- or multi-valued column or an expression. + +## Examples + +Converts a string column containing a geohex value into a geohex type. +```esql +ROW string = "841f059ffffffff" +| EVAL geohex = TO_GEOHEX(string) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geopoint.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geopoint.txt index 081d062e5dd0a..6f0010c847349 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geopoint.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geopoint.txt @@ -1,6 +1,6 @@ # TO_GEOPOINT -Converts an input value to a `geo_point` value. A string will only be successfully converted if it adheres to the WKT Point format. +The TO_GEOPOINT function converts an input value to a `geo_point` type. Strings are only converted successfully if they follow the WKT Point format. ## Syntax @@ -8,19 +8,14 @@ Converts an input value to a `geo_point` value. A string will only be successful ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column, or an expression. ## Examples -Convert a WKT Point string to a `geo_point` value: - +Converts a WKT Point string into a geo_point value and stores it in a new column. ```esql ROW wkt = "POINT(42.97109630194 14.7552534413725)" | EVAL pt = TO_GEOPOINT(wkt) ``` - -## Limitations - -- The input string must strictly follow the WKT Point format for successful conversion. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geoshape.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geoshape.txt index c7ae44553a42e..d37fb311530be 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geoshape.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geoshape.txt @@ -1,6 +1,6 @@ # TO_GEOSHAPE -Converts an input value to a `geo_shape` value. A string will only be successfully converted if it adheres to the WKT (Well-Known Text) format. +Converts an input value to a `geo_shape` value. A string will only be successfully converted if it follows the WKT (Well-Known Text) format. ## Syntax @@ -8,15 +8,15 @@ Converts an input value to a `geo_shape` value. A string will only be successful ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column, or an expression. ## Examples +Converts a WKT string representing a polygon into a geo_shape value and stores it in a new column. + ```esql ROW wkt = "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))" | EVAL geom = TO_GEOSHAPE(wkt) ``` - -This example converts a WKT string representing a polygon into a `geo_shape` value. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geotile.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geotile.txt new file mode 100644 index 0000000000000..1ca41baae750e --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_geotile.txt @@ -0,0 +1,21 @@ +# TO_GEOTILE + +Converts an input value to a `geotile` value. A string will only be successfully converted if it follows the `geotile` format, as described for the geotile grid aggregation. + +## Syntax + +`TO_GEOTILE(field)` + +### Parameters + +#### field + +Input value to be converted. The input can be a single- or multi-valued column or an expression. + +## Examples + +Converts the string "4/8/5" into a geotile value and stores it in a new column. +```esql +ROW string = "4/8/5" +| EVAL geotile = TO_GEOTILE(string) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_integer.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_integer.txt index 754599630cb6c..c45590cd436f7 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_integer.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_integer.txt @@ -1,49 +1,41 @@ # TO_INTEGER -Converts an input value to an integer. If the input is a date type, it is interpreted as milliseconds since the Unix epoch and converted to an integer. Boolean values are converted to integers: `true` becomes `1` and `false` becomes `0`. +Converts an input value to an integer. If the input is a date type, its value is interpreted as milliseconds since the Unix epoch and converted to an integer. Boolean `true` is converted to `1`, and `false` to `0`. When two arguments are provided—a string value and a whole number base—the string is parsed as an integer in the specified base. If parsing fails, a warning is generated and the result is `null`. A leading '0x' prefix is allowed for base 16. ## Syntax -`TO_INTEGER(field)` +`TO_INTEGER(field, base)` ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column or an expression. + +#### base + +(Optional) The radix or base used to convert the input value. When a base is specified, the input type must be `keyword` or `text`. ## Examples -Converting long values to integers +Converts each value in the multi-valued field `long` to an integer, returning `null` for values that cannot be converted and generating a warning. ```esql ROW long = [5013792, 2147483647, 501379200000] | EVAL int = TO_INTEGER(long) ``` -| long | int | -|----------------|--------------| -| 5013792 | 5013792 | -| 2147483647 | 2147483647 | -| 501379200000 | null | - -In this example, the first two values are successfully converted to integers. However, the last value exceeds the range of an integer, resulting in a `null` value. When such a failure occurs, a warning is added to the response. +Parses the string `str1` as a base 16 integer and `str2` as a base 13 integer, then keeps the original and converted values. -### Warning Example - -If a value cannot be converted, the response includes a warning header with details about the failure: - -``` -"Line 1:61: evaluation of [TO_INTEGER(long)] failed, treating result as null. Only first 20 failures recorded." +```esql +ROW str1 = "0x32", str2 = "31" +| EVAL int1 = TO_INTEGER(str1, 16), int2 = TO_INTEGER(str2, 13) +| KEEP str1, int1, str2, int2 ``` -Additionally, another header provides the failure reason and the problematic value: +Attempts to parse the string `str1` as an integer in base 27 (which succeeds) and in base 10 (which fails, returning `null` and generating a warning). +```esql +ROW str1 = "Kona" +| EVAL int1 = TO_INTEGER(str1, 27), fail1 = TO_INTEGER(str1, 10) ``` -"org.elasticsearch.xpack.esql.core.InvalidArgumentException: [501379200000] out of [integer] range" -``` - -## Limitations - -- Values that exceed the range of an integer will result in a `null` value. -- A warning is generated when conversion fails, and only the first 20 failures are recorded in the response headers. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_ip.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_ip.txt index 92fc3fff3a8f6..ce997f3496b91 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_ip.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_ip.txt @@ -1,39 +1,46 @@ # TO_IP -Converts an input string to an IP value. +The TO_IP function converts an input string to an IP value. If the input cannot be converted, the result is a `null` value and a warning header is added to the response with details about the failure. ## Syntax -`TO_IP(field)` +`TO_IP(field, options, leading_zeros)` ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column or an expression. + +#### options + +Optional. Additional options for the conversion. + +#### leading_zeros + +Specifies how to handle leading zeros in IPv4 addresses. This is a keyword parameter. ## Examples -Converting strings to IP values +Convert two string values to IP addresses and filter the results to include only those within the specified CIDR range: ```esql ROW str1 = "1.1.1.1", str2 = "foo" | EVAL ip1 = TO_IP(str1), ip2 = TO_IP(str2) | WHERE CIDR_MATCH(ip1, "1.0.0.0/8") ``` +This example converts two strings to IP values and filters the results by a CIDR range. If a string cannot be converted, the result is `null` and a warning is generated. -In this example: -- The string `"1.1.1.1"` is successfully converted to an IP value and stored in `ip1`. -- The string `"foo"` cannot be converted to an IP value, resulting in a `null` value for `ip2`. - -When a conversion fails, a *Warning* header is added to the response. The header provides details about the failure, including the source of the issue and the offending value. For instance: +Parse an IPv4 address with leading zeros as octal notation, interpreting each octet accordingly: ```esql -"Line 1:68: evaluation of [TO_IP(str2)] failed, treating result as null. Only first 20 failures recorded." +ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"octal"}) ``` +This example parses an IPv4 address with leading zeros as octal, similar to how `ping` or `ftp` handle such addresses. -The failure reason and the problematic value are also included in a subsequent header: +Parse an IPv4 address with leading zeros as decimal notation, treating each octet as a standard decimal value: ```esql -"java.lang.IllegalArgumentException: 'foo' is not an IP string literal." +ROW s = "1.1.010.1" | EVAL ip = TO_IP(s, {"leading_zeros":"decimal"}) ``` +This example parses an IPv4 address with leading zeros as decimal, following Java's `InetAddress.getByName` behavior. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_long.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_long.txt index ed0340a2e1d96..2f1498f2878dc 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_long.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_long.txt @@ -1,26 +1,41 @@ # TO_LONG -Converts an input value to a long value. If the input is of a date type, it is interpreted as milliseconds since the Unix epoch and converted to a long. Boolean values are converted as follows: `true` to `1` and `false` to `0`. +Converts the input value to a long integer. If the input is a date type, its value is interpreted as milliseconds since the Unix epoch and converted to long. Boolean `true` is converted to `1`, and `false` to `0`. When two arguments are provided—a string value and a whole number base—the string is parsed as a long in the specified base. If parsing fails, a warning is generated and the result is `null`. A leading '0x' prefix is allowed for base 16. ## Syntax -`TO_LONG(field)` +`TO_LONG(field, base)` ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column or an expression. + +#### base + +(Optional) The radix or base used to convert the input value. When a base is specified, the input type must be `keyword` or `text`. ## Examples -Converting strings to long values +Converts several string values to long integers, with invalid conversions resulting in null and a warning. ```esql ROW str1 = "2147483648", str2 = "2147483648.2", str3 = "foo" | EVAL long1 = TO_LONG(str1), long2 = TO_LONG(str2), long3 = TO_LONG(str3) ``` -- `str1` is successfully converted to a long value. -- `str2` is also converted to a long value, truncating the decimal part. -- `str3` cannot be converted, resulting in a `null` value. +Parses string values as long integers using base 16 and base 13, and keeps both the original and converted values. + +```esql +ROW str1 = "0x32", str2 = "31" +| EVAL long1 = TO_LONG(str1, 16), long2 = TO_LONG(str2, 13) +| KEEP str1, long1, str2, long2 +``` + +Attempts to parse a string as a long integer in base 36 (succeeds) and base 10 (fails, returns null and warning). + +```esql +ROW str1 = "Hazelnut" +| EVAL long1 = TO_LONG(str1, 36), fail1 = TO_LONG(str1, 10) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_lower.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_lower.txt index 0f3a545d9c396..c3263dedcb89c 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_lower.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_lower.txt @@ -1,6 +1,6 @@ # TO_LOWER -Returns a new string with all characters in the input string converted to lowercase. +Returns a new string with all characters in the input string converted to lower case. ## Syntax @@ -8,15 +8,21 @@ Returns a new string with all characters in the input string converted to lowerc ### Parameters -#### `str` +#### str -String expression. If `null`, the function returns `null`. +String expression to be converted to lower case. If the value is `null`, the function returns `null`. The input can be a single-valued column or expression, or a multi-valued column or expression. ## Examples +Converts the value of the `message` column to lower case and stores it in a new column called `message_lower`: + ```esql ROW message = "Some Text" | EVAL message_lower = TO_LOWER(message) ``` -This example converts the string in the `message` column to lowercase and stores the result in a new column named `message_lower`. \ No newline at end of file +Converts each value in the array `["Some", "Text"]` to lower case and stores the result in column `v`: + +```esql +ROW v = TO_LOWER(["Some", "Text"]) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_radians.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_radians.txt index 830ba0cef6d86..7e18f9b9a0ff3 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_radians.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_radians.txt @@ -8,14 +8,13 @@ Converts a number in degrees to radians. ### Parameters -#### `number` +#### number -The input value to be converted. It can be a single- or multi-valued column or an expression. +Input value to be converted from degrees to radians. The input can be a single- or multi-valued column or an expression. ## Examples -Convert a list of degree values to radians: - +Converts an array of degree values (90.0, 180.0, 270.0) to their corresponding values in radians and stores the result in a new column. ```esql ROW deg = [90.0, 180.0, 270.0] | EVAL rad = TO_RADIANS(deg) diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_string.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_string.txt index 35734e3c99c2f..daf45e7cd9509 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_string.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_string.txt @@ -1,6 +1,6 @@ # TO_STRING -Converts an input value into a string. +The TO_STRING function converts an input value into a string. It can be used on single-valued or multi-valued columns, as well as expressions. ## Syntax @@ -8,21 +8,19 @@ Converts an input value into a string. ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert to a string. This can be a single- or multi-valued column, or an expression. ## Examples -Converting a single value to a string - +Converts the integer value in column `a` to a string and stores it in column `j`: ```esql ROW a=10 | EVAL j = TO_STRING(a) ``` -Converting a multivalued field to a string - +Converts the array of integers in column `a` to a string and stores it in column `j`: ```esql ROW a=[10, 9, 8] | EVAL j = TO_STRING(a) diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_timeduration.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_timeduration.txt index 50e4e1c71ad17..926d3e264da33 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_timeduration.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_timeduration.txt @@ -1,6 +1,6 @@ # TO_TIMEDURATION -Converts an input value into a `time_duration` value. +The TO_TIMEDURATION function converts an input value into a `time_duration` value. The input must be a valid constant time duration expression. ## Syntax @@ -8,17 +8,15 @@ Converts an input value into a `time_duration` value. ### Parameters -#### `field` +#### field -The input value. Must be a valid constant time duration expression. +The input value to convert. This should be a valid constant time duration expression. ## Examples -Adding and subtracting time durations - +Adds a 3-hour time duration to a datetime value and subtracts a 3-hour time duration using the TO_TIMEDURATION function. ```esql ROW x = "2024-01-01"::datetime -| EVAL y = x + "3 hours"::time_duration, z = x - TO_TIMEDURATION(`3 hours`) +| EVAL y = x + "3 hours"::time_duration, z = x - TO_TIMEDURATION("3 hours"); ``` - -This example demonstrates how to add and subtract a time duration (`3 hours`) to and from a datetime value (`2024-01-01`). +This example adds and subtracts a 3-hour time duration to and from a datetime value. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_unsigned_long.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_unsigned_long.txt index 5922a3cb741bd..ea797bbc360e0 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_unsigned_long.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_unsigned_long.txt @@ -1,6 +1,6 @@ # TO_UNSIGNED_LONG -The TO_UNSIGNED_LONG function converts an input value into an unsigned long value. +Converts an input value to an unsigned long value. If the input is a date type, its value is interpreted as milliseconds since the Unix epoch and converted to unsigned long. Boolean `true` is converted to `1`, and `false` to `0`. ## Syntax @@ -10,22 +10,15 @@ The TO_UNSIGNED_LONG function converts an input value into an unsigned long valu #### field -The input value. This can be a single or multi-valued column or an expression. +Input value to be converted. The input can be a single- or multi-valued column or an expression. ## Examples -The following example demonstrates the use of the TO_UNSIGNED_LONG function: +Converts string values to unsigned long, including handling a non-numeric string which results in a `null` value and a warning. ```esql ROW str1 = "2147483648", str2 = "2147483648.2", str3 = "foo" | EVAL long1 = TO_UNSIGNED_LONG(str1), long2 = TO_ULONG(str2), long3 = TO_UL(str3) ``` -```esql -ROW date1 = TO_DATETIME("2023-12-02T11:00:00.000Z"), date2 = TO_DATETIME("2023-12-02T11:00:00.001Z") -| EVAL long_date1 = TO_UNSIGNED_LONG(date1), long_date2 = TO_UNSIGNED_LONG(date2) -``` - -## Notes - -If the input parameter is of a date type, its value will be interpreted as milliseconds since the Unix epoch and then converted to an unsigned long. A boolean value of true will be converted to an unsigned long value of 1, and false will be converted to 0. +This example converts two string values to unsigned long, and attempts to convert a non-numeric string, which results in a `null` value and a warning header in the response. diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_upper.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_upper.txt index 6a58bf44d3dca..955f9c58c17ac 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_upper.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_upper.txt @@ -1,6 +1,6 @@ # TO_UPPER -Converts the input string to upper case and returns the result. +Returns a new string with all characters in the input string converted to upper case. ## Syntax @@ -8,13 +8,13 @@ Converts the input string to upper case and returns the result. ### Parameters -#### `str` +#### str -String expression. If `null`, the function returns `null`. +String expression to be converted to upper case. If the value is `null`, the function returns `null`. The input can be a single-valued or multi-valued column or expression. ## Examples -Convert a string to upper case: +Converts the contents of the `message` column to upper case and stores the result in a new column called `message_upper`: ```esql ROW message = "Some Text" diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_version.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_version.txt index 965b9bd07d26e..ca37af591979a 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_version.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-to_version.txt @@ -1,6 +1,6 @@ # TO_VERSION -Converts an input string to a version value. +The TO_VERSION function converts an input string to a version value. ## Syntax @@ -8,14 +8,13 @@ Converts an input string to a version value. ### Parameters -#### `field` +#### field -The input value to be converted. This can be a single- or multi-valued column or an expression. +The input value to convert. This can be a single- or multi-valued column, or an expression. ## Examples +Converts the string "1.2.3" into a version value and assigns it to the column `v`. ```esql ROW v = TO_VERSION("1.2.3") -``` - -Convert the string `"1.2.3"` into a version value. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top.txt index 39cb12ca5baf5..47729c8d7543c 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top.txt @@ -1,30 +1,34 @@ -## TOP +# TOP -The `TOP` function collects the top values for a specified field, including repeated values. +The TOP function collects the top values for a specified field, including repeated values. ## Syntax -`TOP(field, limit, order)` +`TOP(field, limit, order, outputField)` ### Parameters -#### `field` +#### field The field to collect the top values for. -#### `limit` +#### limit The maximum number of values to collect. -#### `order` +#### order -The order in which to calculate the top values. Can be either `asc` (ascending) or `desc` (descending). +The order to calculate the top values. Either `asc` or `desc`. If omitted, defaults to `asc`. + +#### outputField + +Optional. If present, this field will be the output of the TOP call instead of `field`. ## Examples -Collecting top salaries +Retrieves the top 3 highest salary values from the employees index in descending order and also calculates the maximum salary. + ```esql FROM employees | STATS top_salaries = TOP(salary, 3, "desc"), top_salary = MAX(salary) -``` -This example collects the top three salaries in descending order and calculates the maximum salary. +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top_snippets.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top_snippets.txt new file mode 100644 index 0000000000000..3c13f9a249f2e --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-top_snippets.txt @@ -0,0 +1,46 @@ +# TOP_SNIPPETS + +The TOP_SNIPPETS function extracts the best matching snippets from a text field based on a given query string. It works with fields from the text family, such as text and semantic_text, and allows you to control the number and length of returned snippets. + +## Syntax + +`TOP_SNIPPETS(field, query, options, num_words, num_snippets)` + +### Parameters + +#### field + +The input field to extract snippets from. + +#### query + +The input text containing only query terms for snippet extraction. Lucene query syntax, operators, and wildcards are not allowed. + +#### options + +(Optional) Additional options for snippet extraction, provided as function named parameters. + +#### num_words + +(integer) The maximum number of words to return in each snippet. This helps control inference costs by limiting the size of tokens per snippet. + +#### num_snippets + +(integer) The maximum number of matching snippets to return. + +## Examples + +Extracts the best matching snippets from the `description` field for records in the `books` index that are relevant to the query "Tolkien". + +```esql +FROM books +| EVAL snippets = TOP_SNIPPETS(description, "Tolkien") +``` + +Filters books with "Return" in the title, then extracts up to 3 snippets of up to 25 words each from the `description` field for the query "Tolkien". + +```esql +FROM books +| WHERE MATCH(title, "Return") +| EVAL snippets = TOP_SNIPPETS(description, "Tolkien", { "num_snippets": 3, "num_words": 25 }) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trange.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trange.txt new file mode 100644 index 0000000000000..206eebc1f2943 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trange.txt @@ -0,0 +1,67 @@ +# TRANGE + +The TRANGE function filters data for a specified time range using the `@timestamp` attribute. It can be used with either a single offset from the current time or with explicit start and end times. + +## Syntax + +`TRANGE(start_time_or_offset, end_time)` + +### Parameters + +#### start_time_or_offset + +Offset from NOW when used in single parameter mode. In two parameter mode, this is the start time, which can be provided as a date string, date, date_nanos, or epoch milliseconds. + +#### end_time + +Explicit end time for the range. This can be a date string, date, date_nanos, or epoch milliseconds. + +## Examples + +Filters data from the last hour based on the `@timestamp` field. + +```esql +FROM k8s +| WHERE TRANGE(1h) +| KEEP @timestamp +``` + +Filters data between two specific ISO8601 timestamps and returns the first 10 results sorted by `@timestamp`. + +```esql +FROM k8s +| WHERE TRANGE("2024-05-10T00:17:14.000Z", "2024-05-10T00:18:33.000Z") +| SORT @timestamp +| KEEP @timestamp +| LIMIT 10 +``` + +Filters data between two specific times by converting ISO8601 strings to datetime objects, sorts by `@timestamp`, and limits the output to 10 results. + +```esql +FROM k8s +| WHERE TRANGE(to_datetime("2024-05-10T00:17:14Z"), to_datetime("2024-05-10T00:18:33Z")) +| SORT @timestamp +| KEEP @timestamp +| LIMIT 10 +``` + +Filters data between two specific times with millisecond precision by converting ISO8601 strings to datetime objects, sorts by `@timestamp`, and limits the output to 10 results. + +```esql +FROM k8s +| WHERE TRANGE(to_datetime("2024-05-10T00:17:14.000Z"), to_datetime("2024-05-10T00:18:33.000Z")) +| SORT @timestamp +| KEEP @timestamp +| LIMIT 10 +``` + +Filters data between two specific points in time using epoch milliseconds, sorts by `@timestamp`, and limits the output to 10 results. + +```esql +FROM k8s +| WHERE TRANGE(1715300236000, 1715300282000) +| SORT @timestamp +| KEEP @timestamp +| LIMIT 10 +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trim.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trim.txt index 8211a67a99764..086cb653a4d82 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trim.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-trim.txt @@ -8,16 +8,16 @@ Removes leading and trailing whitespaces from a string. ### Parameters -#### `string` +#### string -String expression. If `null`, the function returns `null`. +String expression to be trimmed. If the value is `null`, the function returns `null`. ## Examples +Removes leading and trailing spaces from the `message` and `color` columns in a single row. + ```esql -ROW message = " some text ", color = " red " +ROW message = " some text ", color = " red " | EVAL message = TRIM(message) | EVAL color = TRIM(color) ``` - -This example removes leading and trailing whitespaces from the `message` and `color` columns. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ts.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ts.txt new file mode 100644 index 0000000000000..39db6b6d7e53b --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-ts.txt @@ -0,0 +1,61 @@ +# TS + +The TS source command enables time series semantics and allows the use of time series aggregation functions within the STATS command, such as AVG_OVER_TIME() or RATE. These functions are evaluated per time series and then aggregated by group using a secondary aggregation function. TS is similar to the FROM source command, but specifically targets time series indices and supports time series aggregations. + +## Syntax + +`TS index_pattern [METADATA fields]` + +### Parameters + +#### index_pattern + +A list of indices, data streams, or aliases to query. Supports wildcards and date math. + +#### fields + +A comma-separated list of metadata fields to retrieve. This parameter is optional. + +## Examples + +Calculates the total rate of search requests per host and hour for the past hour. + +```esql +TS metrics + | WHERE @timestamp >= now() - 1 hour + | STATS SUM(RATE(search_requests)) BY TBUCKET(1 hour), host +``` + +Returns the average of the most recent memory usage values for each time series. + +```esql +TS metrics | STATS AVG(memory_usage) +``` + +Returns the average of the most recent memory usage values for each time series, explicitly using the LAST_OVER_TIME function. + +```esql +TS metrics | STATS AVG(LAST_OVER_TIME(memory_usage)) +``` + +Calculates the average memory usage by first averaging per time series and then averaging those results. + +```esql +TS metrics | STATS AVG(AVG_OVER_TIME(memory_usage)) +``` + +Calculates the sum of average memory usage per host and hourly bucket for the last day. + +```esql +TS metrics +| WHERE @timestamp >= now() - 1 day +| STATS SUM(AVG_OVER_TIME(memory_usage)) BY host, TBUCKET(1 hour) +``` + +## Limitations + +- TS can only be used with time series indices. +- Time series aggregation functions must be wrapped inside a regular aggregation function in STATS. Using a time series aggregation function as the outer function causes an error. +- TS cannot be combined with certain operations (such as FORK) before the STATS command is applied. +- Avoid aggregating multiple metrics with different dimensional cardinalities in the same query, as this can result in null values for some dimension combinations. +- It is recommended to add a time range filter on `@timestamp` to limit the data volume scanned and improve query performance. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-unary operators.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-unary operators.txt new file mode 100644 index 0000000000000..99ac5548746ab --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-unary operators.txt @@ -0,0 +1,36 @@ +# UNARY OPERATORS + +The only unary operator is negation (`-`). Negation is used to invert the sign of a numeric value. + +## Syntax + +`-number` + +### Parameters + +#### number + +The numeric value whose sign you want to invert. + +## Examples + +Negates the value 5 and stores the result in column `b`. + +```esql +ROW a = 5 +| EVAL b = -a +``` + +Negates the value -3 and stores the result in column `b`. + +```esql +ROW a = -3 +| EVAL b = -a +``` + +Negates the values in the `value` column from the `numbers` index and stores the result in a new column `negated_value`. + +```esql +FROM numbers +| EVAL negated_value = -value +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_decode.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_decode.txt new file mode 100644 index 0000000000000..e21af2b60444d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_decode.txt @@ -0,0 +1,22 @@ +# URL_DECODE + +The URL_DECODE function decodes a URL-encoded string. If the input cannot be decoded, it returns `null` and adds a warning header to the response. + +## Syntax + +`URL_DECODE(string)` + +### Parameters + +#### string + +The URL-encoded string to decode. + +## Examples + +Decodes a URL-encoded string representing a URL with query parameters and returns the decoded version. + +```esql +ROW u = "https%3A%2F%2Fexample.com%2F%3Fx%3Dfoo%20bar%26y%3Dbaz" +| EVAL u = URL_DECODE(u) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_encode.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_encode.txt new file mode 100644 index 0000000000000..409987fcd0813 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_encode.txt @@ -0,0 +1,20 @@ +# URL_ENCODE + +The URL_ENCODE function encodes a string for safe use in URLs. All characters are percent-encoded except for alphanumerics, `.`, `-`, `_`, and `~`. Spaces are encoded as `+`. + +## Syntax + +`URL_ENCODE(string)` + +### Parameters + +#### string + +The string value to encode as a URL. + +## Examples + +Encodes a URL string by converting spaces to `+` and special characters to their percent-encoded representations. +```esql +ROW u = "https://example.com/?x=foo bar&y=baz" | EVAL u = URL_ENCODE(u) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_encode_component.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_encode_component.txt new file mode 100644 index 0000000000000..b386c5baad650 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-url_encode_component.txt @@ -0,0 +1,22 @@ +# URL_ENCODE_COMPONENT + +The URL_ENCODE_COMPONENT function encodes a string for safe inclusion in a URL component. All characters are percent-encoded except for alphanumerics, `.`, `-`, `_`, and `~`. Spaces are encoded as `%20`. + +## Syntax + +`URL_ENCODE_COMPONENT(string)` + +### Parameters + +#### string + +The URL or string to encode. + +## Examples + +Encodes a URL string containing spaces and special characters so it can be safely used as a URL component. + +```esql +ROW u = "https://example.com/?x=foo bar&y=baz" +| EVAL u = URL_ENCODE_COMPONENT(u) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_cosine.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_cosine.txt new file mode 100644 index 0000000000000..62fb39b03b0b6 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_cosine.txt @@ -0,0 +1,28 @@ +# V_COSINE + +Calculates the cosine similarity between two dense_vectors. + +## Syntax + +`V_COSINE(left, right)` + +### Parameters + +#### left + +The first dense_vector used to calculate cosine similarity. + +#### right + +The second dense_vector used to calculate cosine similarity. + +## Examples + +Calculates the cosine similarity between each row's `rgb_vector` and the vector `[0, 255, 255]`, filters out rows where the color is "black", and sorts the results by similarity (descending) and color (ascending): + +```esql +from colors +| where color != "black" +| eval similarity = v_cosine(rgb_vector, [0, 255, 255]) +| sort similarity desc, color asc +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_dot_product.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_dot_product.txt new file mode 100644 index 0000000000000..6c7487543589a --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_dot_product.txt @@ -0,0 +1,27 @@ +# V_DOT_PRODUCT + +Calculates the dot product between two dense_vectors. + +## Syntax + +`V_DOT_PRODUCT(left, right)` + +### Parameters + +#### left + +The first dense_vector used to calculate the dot product similarity. + +#### right + +The second dense_vector used to calculate the dot product similarity. + +## Examples + +Calculates the dot product similarity between the `rgb_vector` column and the vector `[0, 255, 255]`, then sorts the results by similarity in descending order and color in ascending order. + +```esql +from colors +| eval similarity = v_dot_product(rgb_vector, [0, 255, 255]) +| sort similarity desc, color asc +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_hamming.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_hamming.txt new file mode 100644 index 0000000000000..08bfaa354417c --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_hamming.txt @@ -0,0 +1,27 @@ +# V_HAMMING + +Calculates the Hamming distance between two dense vectors. + +## Syntax + +`V_HAMMING(left, right)` + +### Parameters + +#### left + +First dense_vector to use to calculate the Hamming distance. + +#### right + +Second dense_vector to use to calculate the Hamming distance. + +## Examples + +Calculates the Hamming distance between each row's `rgb_byte_vector` and the vector `[0, 127, 127]`, storing the result as `similarity`, and then sorts the output by descending similarity and ascending color. + +```esql +from colors +| eval similarity = v_hamming(rgb_byte_vector, [0, 127, 127]) +| sort similarity desc, color asc +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_l1_norm.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_l1_norm.txt new file mode 100644 index 0000000000000..7416823a2602d --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_l1_norm.txt @@ -0,0 +1,27 @@ +# V_L1_NORM + +Calculates the l1 norm (Manhattan distance) between two dense_vectors. + +## Syntax + +`V_L1_NORM(left, right)` + +### Parameters + +#### left + +First dense_vector to calculate l1 norm similarity. + +#### right + +Second dense_vector to calculate l1 norm similarity. + +## Examples + +Calculates the l1 norm similarity between the `rgb_vector` column and the vector `[0, 255, 255]`, then sorts the results by similarity in descending order and color in ascending order. + +```esql +from colors +| eval similarity = v_l1_norm(rgb_vector, [0, 255, 255]) +| sort similarity desc, color asc +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_l2_norm.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_l2_norm.txt new file mode 100644 index 0000000000000..0587f8b848a33 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-v_l2_norm.txt @@ -0,0 +1,27 @@ +# V_L2_NORM + +Calculates the l2 norm between two dense_vectors, which is a measure of similarity or distance between them. + +## Syntax + +`V_L2_NORM(left, right)` + +### Parameters + +#### left + +The first dense_vector to calculate l2 norm similarity. + +#### right + +The second dense_vector to calculate l2 norm similarity. + +## Examples + +Calculates the l2 norm similarity between the `rgb_vector` column and the vector `[0, 255, 255]`, adds the result as a new column called `similarity`, and sorts the results by similarity in descending order and color in ascending order: + +```esql +from colors +| eval similarity = v_l2_norm(rgb_vector, [0, 255, 255]) +| sort similarity desc, color asc +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-values.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-values.txt index 24a0116733aec..0f541bf20e660 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-values.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-values.txt @@ -1,6 +1,6 @@ # VALUES -The `VALUES` function retrieves all values in a group as a multivalued field. The order of the returned values is not guaranteed. To ensure the values are returned in order, use the `MV_SORT` function. +The VALUES function returns the unique values from a field as a multivalued field. The order of the returned values is not guaranteed. If you need the values in a specific order, use the `MV_SORT` function. ## Syntax @@ -8,14 +8,13 @@ The `VALUES` function retrieves all values in a group as a multivalued field. Th ### Parameters -#### `field` +#### field -The field from which to retrieve all values. +The field from which to extract unique values. ## Examples -Retrieve and sort first names by their first letter -The following query extracts the first letter of each employee's first name, groups the data by this letter, and retrieves all first names in each group as a multivalued field. The `MV_SORT` function is used to sort the names within each group. +Extracts the first letter from each employee's first name, then groups employees by this letter and collects all unique first names for each group, sorting them alphabetically. ```esql FROM employees @@ -24,7 +23,6 @@ FROM employees | SORT first_letter ``` -## Notes +## Limitations -- This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. -- The `VALUES` function can consume a significant amount of memory. ES|QL does not currently support growing aggregations beyond available memory. If the aggregation collects more values than can fit into memory, the query will fail with a Circuit Breaker Error. +This function can use a significant amount of memory. Aggregations do not grow beyond available memory, so if the number of collected values exceeds memory limits, the query will fail with a Circuit Breaker Error. If you need to keep repeated values, use the `TOP` function instead. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-variance.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-variance.txt new file mode 100644 index 0000000000000..b863779951bd2 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-variance.txt @@ -0,0 +1,22 @@ +# VARIANCE + +The VARIANCE function calculates the population variance of a numeric field. + +## Syntax + +`VARIANCE(number)` + +### Parameters + +#### number + +The numeric field for which to calculate the population variance. + +## Examples + +Calculates the population variance of the `height` field across all employees. + +```esql +FROM employees +| STATS var_height = VARIANCE(height) +``` \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-variance_over_time.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-variance_over_time.txt new file mode 100644 index 0000000000000..4baa5b2e20864 --- /dev/null +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-variance_over_time.txt @@ -0,0 +1,26 @@ +# VARIANCE_OVER_TIME + +Calculates the population variance over time for a numeric field within a specified time window. + +## Syntax + +`VARIANCE_OVER_TIME(field, window)` + +### Parameters + +#### field + +The metric field to calculate the variance for. + +#### window + +The time window over which to compute the variance. + +## Examples + +Calculates the average variance of the `network.cost` field for each cluster and 1-minute time bucket. + +```esql +TS k8s +| STATS avg_var_cost=AVG(VARIANCE_OVER_TIME(network.cost)) BY cluster, time_bucket = TBUCKET(1minute) +``` diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-weighted_avg.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-weighted_avg.txt index 1ed6aa3dc65b1..93917a069b2a1 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-weighted_avg.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-weighted_avg.txt @@ -1,6 +1,6 @@ -## WEIGHTED_AVG +# WEIGHTED_AVG -Calculates the weighted average of a numeric expression. +Calculates the weighted average of a numeric expression using specified weights. ## Syntax @@ -8,16 +8,18 @@ Calculates the weighted average of a numeric expression. ### Parameters -#### `number` +#### number -A numeric value. +A numeric value to be averaged. -#### `weight` +#### weight -A numeric weight. +A numeric value representing the weight for each corresponding number. ## Examples +Calculates the weighted average salary for each language group, using height as the weight, rounds the result, and displays the weighted average alongside the language, sorted by language. + ```esql FROM employees | STATS w_avg = WEIGHTED_AVG(salary, height) BY languages @@ -25,5 +27,3 @@ FROM employees | KEEP w_avg, languages | SORT languages ``` - -This example calculates the weighted average of employee salaries using their heights as weights, grouped by languages. The result is rounded and sorted by language. \ No newline at end of file diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-where.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-where.txt index cbe2d7befb1cc..40377b2354f62 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-where.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/esql_docs/esql-where.txt @@ -1,6 +1,6 @@ -## WHERE +# WHERE -The `WHERE` command filters rows from the input table, returning only those for which the specified condition evaluates to `true`. +The WHERE command filters rows from the input table based on a condition that evaluates to true. Fields with null values are excluded from results when using value exclusions, meaning WHERE field != "value" is interpreted as WHERE field != "value" AND field IS NOT NULL. ## Syntax @@ -8,125 +8,168 @@ The `WHERE` command filters rows from the input table, returning only those for ### Parameters -#### `expression` +#### expression -A boolean expression that determines which rows are included in the output. +A boolean expression used to filter rows. ## Examples -### Basic Usage - -Filter rows where the `still_hired` field is `true`: +Filter employees who are still hired: +Returns only employees whose `still_hired` field is true, showing their first and last names. ```esql FROM employees | KEEP first_name, last_name, still_hired | WHERE still_hired == true ``` -If `still_hired` is a boolean field, the query can be simplified to: +Simplify the condition when the field is boolean: +Filters employees to include only those who are still hired, using the boolean field directly. ```esql FROM employees | KEEP first_name, last_name, still_hired | WHERE still_hired ``` -### Using Date Math - -Retrieve rows from the last hour of logs: +Retrieve logs from the last hour using date math: +Returns log entries from the last hour by comparing the `@timestamp` field to the current time minus one hour. ```esql FROM sample_data | WHERE @timestamp > NOW() - 1 hour ``` -### Using Functions - -Filter rows where the length of the `first_name` field is less than 4: +Filter rows using a function, such as LENGTH: +Shows employees whose first names are shorter than four characters. ```esql FROM employees | KEEP first_name, last_name, height | WHERE LENGTH(first_name) < 4 ``` -### NULL Comparison - -Filter rows where the `birth_date` field is `NULL`: +Check for NULL values using IS NULL: +Finds employees whose `birth_date` field is missing or null. ```esql FROM employees | WHERE birth_date IS NULL -| KEEP first_name, last_name -| SORT first_name -| LIMIT 3 ``` -Filter rows where the `is_rehired` field is not `NULL`: +Check for non-NULL values and count employees: +Counts the number of employees who have a non-null value in the `is_rehired` field. ```esql FROM employees | WHERE is_rehired IS NOT NULL | STATS COUNT(emp_no) ``` -### Using LIKE for String Patterns +Match text using the MATCH function: -Filter rows based on string patterns using wildcards. The following wildcard characters are supported: -- `*` matches zero or more characters. -- `?` matches one character. +Finds books where the `author` field matches the term "Faulkner". +```esql +FROM books +| WHERE MATCH(author, "Faulkner") +``` -Filter rows where `first_name` matches the pattern `?b*`: +Match text with multiple terms and keep the title: +Returns book titles that match all the terms "Hobbit", "Back", and "Again" in the `title` field. +```esql +FROM books +| WHERE MATCH(title, "Hobbit Back Again", {"operator": "AND"}) +| KEEP title; +``` + +Use the LIKE operator to filter based on string patterns: + +Finds employees whose first names match the pattern "?b*", where "?" is any character followed by "b" and any characters after. ```esql FROM employees | WHERE first_name LIKE """?b*""" | KEEP first_name, last_name ``` -To match the exact characters `*` or `.`, escape them using a backslash (`\\`). For example: +Escape special characters in LIKE patterns: +Filters rows where the `message` field exactly matches "foo * bar", escaping the asterisk. ```esql ROW message = "foo * bar" | WHERE message LIKE "foo \\* bar" ``` -To simplify escaping, use triple-quoted strings: +Reduce escaping overhead with triple quotes in LIKE: +Filters rows where the `message` field matches "foo * bar", using triple quotes to simplify escaping. ```esql ROW message = "foo * bar" | WHERE message LIKE """foo \* bar""" ``` -### Using RLIKE for Regular Expressions +Match against multiple LIKE patterns: -Filter rows based on regular expressions. For example, filter rows where `first_name` matches the pattern `.leja.*`: +Returns rows where the `message` field matches either the pattern "foo*" or "bar?". +```esql +ROW message = "foobar" +| WHERE message like ("foo*", "bar?") +``` + +Use REST query placeholders with LIKE: +Filters employees whose first names match a pattern provided as a REST query parameter. +```esql +FROM employees +| WHERE first_name LIKE ?pattern +| KEEP first_name, last_name +``` + +Filter using RLIKE with regular expressions: + +Finds employees whose first names match the regular expression ".leja.*". ```esql FROM employees | WHERE first_name RLIKE """.leja.*""" | KEEP first_name, last_name ``` -Escape special characters in regular expressions using a backslash (`\\`). For example: +Escape special characters in RLIKE patterns: +Filters rows where the `message` field matches the regular expression "foo \( bar", escaping the parenthesis. ```esql ROW message = "foo ( bar" | WHERE message RLIKE "foo \\( bar" ``` -To simplify escaping, use triple-quoted strings: +Reduce escaping overhead with triple quotes in RLIKE: +Filters rows where the `message` field matches the regular expression "foo \( bar", using triple quotes for easier escaping. ```esql ROW message = "foo ( bar" | WHERE message RLIKE """foo \( bar""" ``` -### Using IN Operator +Match against multiple RLIKE patterns: + +Returns rows where the `message` field matches either the regular expression "foo.*" or "bar.". +```esql +ROW message = "foobar" +| WHERE message RLIKE ("foo.*", "bar.") +``` + +Use REST query placeholders with RLIKE: + +Filters employees whose first names match a regular expression provided as a REST query parameter. +```esql +FROM employees +| WHERE first_name RLIKE ?pattern +| KEEP first_name, last_name +``` -Filter rows where an expression matches any value in a list of literals, fields, or expressions: +Test membership in a list using IN: +Keeps rows where the value of `c-a` is in the list (3, b / 2, a). ```esql ROW a = 1, b = 4, c = 3 | WHERE c-a IN (3, b / 2, a) diff --git a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/prompts/syntax.txt b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/prompts/syntax.txt index b3a1deaa7d2fe..204615c555260 100644 --- a/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/prompts/syntax.txt +++ b/x-pack/platform/plugins/shared/inference/server/tasks/nl_to_esql/prompts/syntax.txt @@ -15,9 +15,10 @@ Source commands select a data source. - - FROM: Selects one or multiple indices, data streams or aliases to use as source. - - ROW: Produces a row with one or more columns with values that you specify. + - FROM: selects one or multiple indices, data streams or aliases to use as source. + - ROW: produces a row with one or more columns with values that you specify. - SHOW: returns information about the deployment. + - TS: selects one or multiple data streams with support for time series semantics and time series aggregation functions @@ -39,101 +40,135 @@ - [preview] SAMPLE: samples a fraction of the table rows - [preview] COMPLETION: send prompts and context to an LLM - [preview] CHANGE_POINT: detects spikes, dips, and change points in a metric. + - [preview] FORK creates multiple execution branches to operate +on the same input data and combines the results in a single output table. A discriminator column (`_fork`) is added to identify which branch each row came from. + - [preview] FUSE merges rows from multiple result sets and assigns +new relevance scores. `FUSE` enables hybrid search to combine and score results from multiple queries, together with the `FORK` command. + - BUCKET: Create groups of values out of a datetime or numeric input - CATEGORIZE: Organize textual data into groups of similar format + BUCKET: Creates groups of values - buckets - out of a datetime or numeric input + TBUCKET: Creates groups of values - buckets - out of a @timestamp attribute + CATEGORIZE: Groups text messages into categories of similarly formatted text values - AVG: calculate the average of a numeric field - COUNT: return the total number of input values - COUNT_DISTINCT: return the number of distinct values in a field - MAX: calculate the maximum value of a field - MEDIAN: calculate the median value of a numeric field - MEDIAN_ABSOLUTE_DEVIATION: calculate the median absolute deviation of a numeric field - MIN: calculate the minimum value of a field - PERCENTILE: calculate a specified percentile of a numeric field - STD_DEV: calculate the standard deviation of a numeric field - SUM: calculate the total sum of a numeric expression - TOP: collect the top values for a specified field - VALUES: return all values in a group as a multivalued field - WEIGHTED_AVG: calculate the weighted average of a numeric expression + ABSENT: Returns true if the input expression yields no non-null values within the current aggregation context + AVG: The average of a numeric field. + COUNT: Returns the total number (count) of input values. + COUNT_DISTINCT: Returns the approximate number of distinct values + MAX: The maximum value of a field. + MEDIAN: The value that is greater than half of all values and less than half of all values, also known as the 50% PERCENTILE + MEDIAN_ABSOLUTE_DEVIATION: Returns the median absolute deviation, a measure of variability + MIN: The minimum value of a field. + PERCENTILE: Returns the value at which a certain percentage of observed values occur + PRESENT: Returns true if the input expression yields any non-null values within the current aggregation context + SAMPLE: Collects sample values for a field. + ST_CENTROID_AGG: Calculate the spatial centroid over a field with spatial point geometry type. + ST_EXTENT_AGG: Calculate the spatial extent over a field with geometry type + STD_DEV: The population standard deviation of a numeric field. + SUM: The sum of a numeric expression. + TOP: Collects the top values for a field + VALUES: Returns unique values as a multivalued field + VARIANCE: The population variance of a numeric field. + WEIGHTED_AVG: The weighted average of a numeric expression. - CASE: accept pairs of conditions and values and return the value for the first true condition - COALESCE: return the first non-null argument from the list of provided arguments - GREATEST: return the maximum value from multiple columns - LEAST: return the smallest value from multiple columns + CASE: Accepts pairs of conditions and values + COALESCE: Returns the first of its arguments that is not null + GREATEST: Returns the maximum value from multiple columns + LEAST: Returns the minimum value from multiple columns + CLAMP: Limits (or clamps) the values of all samples to have a lower limit of min and an upper limit of max. + CLAMP_MIN: Limits (or clamps) all input sample values to a lower bound of min + CLAMP_MAX: Limits (or clamps) all input sample values to an upper bound of max + DECAY: Calculates a relevance score that decays based on the distance of a numeric, spatial or date type value from a target origin, using configurable decay functions + KQL: perform a KQL query. KQL is a simple text-based query language for filtering data. It can also be used to search for patterns in complete Event MATCH: Use MATCH to perform a match query on the specified field + MATCH_PHRASE: Use MATCH_PHRASE to perform a match_phrase on the specified field MATCH: execute a match query on a specified field - equivalent to match query for Elasticsearch Query DSL - QSTR: perform a Lucene query string query. It can also be used to search for patterns in complete Event. - KQL: perform a KQL query. KQL is a simple text-based query language for filtering data. It can also be used to search for patterns in complete Event - LOOKUP JOIN: combines data from a query results table with matching records from a specified lookup index + QSTR: perform a Lucene query string query. It can also be used to search for patterns in complete Event. SCORE: Scores an expression + TOP_SNIPPETS: Use TOP_SNIPPETS to extract the best snippets for a given query string from a text field - DATE_DIFF: calculate the difference between two timestamps in a given unit - DATE_EXTRACT: extract a specific part of a date - DATE_FORMAT: return a string representation of a date using the provided format - DATE_PARSE: convert a date string into a date - DATE_TRUNC: round down a date to the nearest specified interval - NOW: return the current date and time + DATE_DIFF: Subtracts the startTimestamp from the endTimestamp and returns the difference in multiples of unit + DATE_EXTRACT: Extracts parts of a date, like year, month, day, hour. + DATE_FORMAT: Returns a string representation of a date, in the provided format. + DATE_PARSE: Returns a date by parsing the second argument using the format specified in the first argument. + DATE_TRUNC: Rounds down a date to the closest interval since epoch, which starts at 0001-01-01T00:00:00Z. + DAY_NAME: Returns the name of the weekday for date based on the configured Locale. + MONTH_NAME: Returns the month name for the provided date based on the configured Locale. + NOW: Returns current date and time. + TRANGE: Filters data for the given time range using the @timestamp attribute. - BIT_LENGTH: calculate the bit length of a string - BYTE_LENGTH: calculate the byte length of a string - CONCAT: combine two or more strings - ENDS_WITH: check if a given string ends with a specified suffix - FROM_BASE64: decode a base64 string - HASH: compute the hash of a given input using a specified algorithm - LEFT: extract a specified number of characters from the start of a string - LENGTH: calculate the character length of a given string - LOCATE: return the position of a specified substring within a string - LTRIM: remove leading whitespaces from a string - REPEAT: generate a string by repeating a specified string a certain number of times - REPLACE: substitute any match of a regular expression within a string with a replacement string - REVERSE: reverse a string - RIGHT: extract a specified number of characters from the end of a string - RTRIM: remove trailing whitespaces from a string - SPACE: create a string composed of a specific number of spaces - SPLIT: split a single valued string into multiple strings based on a delimiter - STARTS_WITH: check if a given string begins with another specified string - SUBSTRING: extract a portion of a string - TO_BASE64: encode a string to b64 - TO_LOWER: convert a string to lowercase - TO_UPPER: convert a string to uppercase - TRIM: remove leading and trailing whitespaces from a string + BIT_LENGTH: Returns the bit length of a string + BYTE_LENGTH: Returns the byte length of a string + CHUNK: Use CHUNK to split a text field into smaller chunks + CONCAT: Concatenates two or more strings. + CONTAINS: Returns a boolean that indicates whether a keyword substring is within another string + ENDS_WITH: Returns a boolean that indicates whether a keyword string ends with another string. + FROM_BASE64: Decode a base64 string. + HASH: Computes the hash of the input using various algorithms such as MD5, SHA, SHA-224, SHA-256, SHA-384, SHA-512. + LEFT: Returns the substring that extracts *length* chars from *string* starting from the left. + LENGTH: Returns the character length of a string + LOCATE: Returns an integer that indicates the position of a keyword substring within another string + LTRIM: Removes leading whitespaces from a string. + MD5: Computes the MD5 hash of the input (if the MD5 hash is available on the JVM). + REPEAT: Returns a string constructed by concatenating string with itself the specified number of times. + REPLACE: The function substitutes in the string str any match of the regular expression regex with the replacement string newStr. + REVERSE: Returns a new string representing the input string in reverse order. + RIGHT: Return the substring that extracts *length* chars from *str* starting from the right. + RTRIM: Removes trailing whitespaces from a string. + SHA1: Computes the SHA1 hash of the input. + SHA256: Computes the SHA256 hash of the input. + SPACE: Returns a string made of number spaces. + SPLIT: Split a single valued string into multiple strings. + STARTS_WITH: Returns a boolean that indicates whether a keyword string starts with another string. + SUBSTRING: Returns a substring of a string, specified by a start position and an optional length. + TO_BASE64: Encode a string to a base64 string. + TO_LOWER: Returns a new string representing the input string converted to lower case. + TO_UPPER: Returns a new string representing the input string converted to upper case. + TRIM: Removes leading and trailing whitespaces from a string. + URL_ENCODE: URL-encodes the input + URL_ENCODE_COMPONENT: URL-encodes the input + URL_DECODE: URL-decodes the input, or returns null and adds a warning header to the response if the input cannot be decoded. - CIDR_MATCH: checks if an IP address falls within specified network blocks - IP_PREFIX: truncates an IP address to a specified prefix length + CIDR_MATCH: Returns true if the provided IP is contained in one of the provided CIDR blocks. + IP_PREFIX: Truncates an IP to a given prefix length. - TO_BOOLEAN - TO_CARTESIANPOINT - TO_CARTESIANSHAPE - TO_DATETIME (prefer DATE_PARSE to convert strings to datetime) - TO_DATEPERIOD - TO_DEGREES - TO_DOUBLE - TO_GEOPOINT - TO_GEOSHAPE - TO_INTEGER - TO_IP - TO_LONG - TO_RADIANS - TO_STRING - TO_TIMEDURATION - TO_UNSIGNED_LONG - TO_VERSION + TO_AGGREGATE_METRIC_DOUBLE: Encode a numeric to an aggregate_metric_double. + TO_BOOLEAN: Converts an input value to a boolean value + TO_CARTESIANPOINT: Converts an input value to a cartesian_point value + TO_CARTESIANSHAPE: Converts an input value to a cartesian_shape value + TO_DATEPERIOD: Converts an input value into a date_period value. + TO_DATETIME: Converts an input value to a date value + TO_DATE_NANOS: Converts an input to a nanosecond-resolution date value (aka date_nanos) + TO_DEGREES: Converts a number in radians to degrees). + TO_DENSE_VECTOR: Converts a multi-valued input of numbers, or a hexadecimal string, to a dense_vector. + TO_DOUBLE: Converts an input value to a double value + TO_GEOHASH: Converts an input value to a geohash value + TO_GEOHEX: Converts an input value to a geohex value + TO_GEOPOINT: Converts an input value to a geo_point value + TO_GEOSHAPE: Converts an input value to a geo_shape value + TO_GEOTILE: Converts an input value to a geotile value + TO_INTEGER: Converts an input value to an integer value + TO_IP: Converts an input string to an IP value. + TO_LONG: Converts the input value to a long + TO_RADIANS: Converts a number in degrees) to radians. + TO_STRING: Converts an input value into a string. + TO_TIMEDURATION: Converts an input value into a time_duration value. + TO_UNSIGNED_LONG: Converts an input value to an unsigned long value + TO_VERSION: Converts an input string to a version value. diff --git a/x-pack/platform/plugins/shared/inference/tsconfig.json b/x-pack/platform/plugins/shared/inference/tsconfig.json index 27b65305f47b3..25e4e8bc81c49 100644 --- a/x-pack/platform/plugins/shared/inference/tsconfig.json +++ b/x-pack/platform/plugins/shared/inference/tsconfig.json @@ -43,6 +43,6 @@ "@kbn/inference-tracing", "@kbn/core-ui-settings-common", "@kbn/setup-node-env", - "@kbn/es-errors" + "@kbn/es-errors", ] }