diff --git a/docs/en/stack/data-frames/ecommerce-example.asciidoc b/docs/en/stack/data-frames/ecommerce-example.asciidoc new file mode 100644 index 000000000..195ab657b --- /dev/null +++ b/docs/en/stack/data-frames/ecommerce-example.asciidoc @@ -0,0 +1,252 @@ +[role="xpack"] +[testenv="basic"] +[[ecommerce-dataframes]] +== Transforming your data with {dataframes} +++++ +Transforming your data +++++ + +beta[] + +<> enable you to retrieve information from an +{es} index, transform it, and store it in another index. Let's use the +{kibana-ref}/add-sample-data.html[{kib} sample data] to demonstrate how you can +pivot and summarize your data with {dataframe-transforms}. + + +. If the {es} {security-features} are enabled, obtain a user ID with sufficient +privileges to complete these steps. ++ +-- +You need `manage_data_frame_transforms` cluster privileges to preview and create +{dataframe-transforms}. Members of the built-in `data_frame_transforms_admin` +role have these privileges. + +You also need `read` and `view_index_metadata` index privileges on the source +index and `read`, `create_index`, and `index` privileges on the destination +index. + +For more information, see <> and <>. +-- + +. Choose your _source index_. ++ +-- +In this example, we'll use the eCommerce orders sample data. If you're not +already familiar with the `kibana_sample_data_ecommerce` index, use the +*Revenue* dashboard in {kib} to explore the data. Consider what insights you +might want to derive from this eCommerce data. +-- + +. Play with various options for grouping and aggregating the data. ++ +-- +For example, you might want to group the data by product ID and calculate the +total number of sales for each product and its average price. Alternatively, you +might want to look at the behavior of individual customers and calculate how +much each customer spent in total and how many different categories of products +they purchased. Or you might want to take the currencies or geographies into +consideration. What are the most interesting ways you can transform and +interpret this data? + +_Pivoting_ your data involves using at least one field to group it and applying +at least one aggregation. You can preview what the transformed data will look +like, so go ahead and play with it! + +For example, go to *Machine Learning* > *Data Frames* in {kib} and use the +wizard to create a {dataframe}: + +[role="screenshot"] +image::images/ecommerce-pivot1.jpg["Creating a simple {dataframe} in {kib}"] + +In this case, we grouped the data by customer ID and calculated the sum of +products each customer purchased. + +Let's add some more aggregations to learn more about our customers' orders. For +example, let's calculate the total sum of their purchases, the maximum number of +products that they purchased in a single order, and their total number of orders. +We'll accomplish this by using the +{ref}/search-aggregations-metrics-sum-aggregation.html[`sum` aggregation] on the +`taxless_total_price` field, the +{ref}/search-aggregations-metrics-max-aggregation.html[`max` aggregation] on the +`total_quantity` field, and the +{ref}/search-aggregations-metrics-cardinality-aggregation.html[`cardinality` aggregation] +on the `order_id` field: + +[role="screenshot"] +image::images/ecommerce-pivot2.jpg["Adding multiple aggregations to a {dataframe} in {kib}"] + +TIP: If you're interested in a subset of the data, you can optionally include a +{ref}/search-request-query.html[query] element. In this example, we've filtered +the data so that we're only looking at orders with a `currency` of `EUR`. +Alternatively, we could group the data by that field too. If you want to use +more complex queries, you can create your {dataframe} from a +{kibana-ref}/save-open-search.html[saved search]. + +If you prefer, you can use the +{ref}/preview-data-frame-transform.html[preview {dataframe-transforms} API]: + +[source,js] +-------------------------------------------------- +POST _data_frame/transforms/_preview +{ + "source": { + "index": "kibana_sample_data_ecommerce", + "query": { + "bool": { + "filter": { + "term": {"currency": "EUR"} + } + } + } + }, + "pivot": { + "group_by": { + "customer_id": { + "terms": { + "field": "customer_id" + } + } + }, + "aggregations": { + "total_quantity.sum": { + "sum": { + "field": "total_quantity" + } + }, + "taxless_total_price.sum": { + "sum": { + "field": "taxless_total_price" + } + }, + "total_quantity.max": { + "max": { + "field": "total_quantity" + } + }, + "order_id.cardinality": { + "cardinality": { + "field": "order_id" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[skip:set up sample data] +-- + +. When you are satisfied with what you see in the preview, create the +{dataframe-transform}. ++ +-- +Supply a job ID and the name of the target (or _destination_) index. In {kib}, +you can choose to create and start the job, create it, or copy it to the +clipboard. + +If you prefer, you can use the +{ref}/put-data-frame-transform.html[create {dataframe-transforms} API]. For +example: + +[source,js] +-------------------------------------------------- +PUT _data_frame/transforms/ecommerce-customer-transform +{ + "source": { + "index": [ + "kibana_sample_data_ecommerce" + ], + "query": { + "bool": { + "filter": { + "term": { + "currency": "EUR" + } + } + } + } + }, + "dest": { + "index": "ecommerce-customers" + }, + "pivot": { + "group_by": { + "customer_id": { + "terms": { + "field": "customer_id" + } + } + }, + "aggregations": { + "total_quantity.sum": { + "sum": { + "field": "total_quantity" + } + }, + "taxless_total_price.sum": { + "sum": { + "field": "taxless_total_price" + } + }, + "total_quantity.max": { + "max": { + "field": "total_quantity" + } + }, + "order_id.cardinality": { + "cardinality": { + "field": "order_id" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[skip:setup kibana sample data] +-- + +. Start the {dataframe-transform}. ++ +-- + +TIP: Even though resource utilization is automatically adjusted based on the +cluster load, a {dataframe-transform} increases search and indexing load on your +cluster while it runs. When it reaches the end of the data in your index, it +stops automatically. If you're experiencing an excessive load, however, you can +stop it sooner. + +You can start, stop, and manage {dataframe} jobs in {kib}: + +[role="screenshot"] +image::images/dataframe-jobs.jpg["Managing {dataframe} jobs in {kib}"] + +Alternatively, you can use the +{ref}/start-data-frame-transform.html[start {dataframe-transforms}] and +{ref}/stop-data-frame-transform.html[stop {dataframe-transforms}] APIs. For +example: + +[source,js] +-------------------------------------------------- +POST _data_frame/transforms/ecommerce-customer-transform/_start +-------------------------------------------------- +// CONSOLE +// TEST[skip:setup kibana sample data] + +-- + +. Explore the data in your new index. ++ +-- +For example, use the *Discover* application in {kib}: + +[role="screenshot"] +image::images/ecommerce-results.jpg["Exploring the new index in {kib}"] + +-- + +TIP: If you do not want to keep the {dataframe-transform}, you can delete it in +{kib} or use the +{ref}/delete-data-frame-transform.html[delete {dataframe-transform} API]. When +you delete a {dataframe-transform}, its destination index remains. diff --git a/docs/en/stack/data-frames/index.asciidoc b/docs/en/stack/data-frames/index.asciidoc index 57e90871e..426173092 100644 --- a/docs/en/stack/data-frames/index.asciidoc +++ b/docs/en/stack/data-frames/index.asciidoc @@ -1,3 +1,3 @@ include::dataframes.asciidoc[] -//include::{es-repo-dir}/data-frames/pivoting.asciidoc[] +include::ecommerce-example.asciidoc[] include::api-quickref.asciidoc[] \ No newline at end of file diff --git a/docs/en/stack/images/dataframe-jobs.jpg b/docs/en/stack/images/dataframe-jobs.jpg new file mode 100644 index 000000000..c833e87ee Binary files /dev/null and b/docs/en/stack/images/dataframe-jobs.jpg differ diff --git a/docs/en/stack/images/ecommerce-pivot1.jpg b/docs/en/stack/images/ecommerce-pivot1.jpg new file mode 100644 index 000000000..7d5bf983b Binary files /dev/null and b/docs/en/stack/images/ecommerce-pivot1.jpg differ diff --git a/docs/en/stack/images/ecommerce-pivot2.jpg b/docs/en/stack/images/ecommerce-pivot2.jpg new file mode 100644 index 000000000..b8557c81d Binary files /dev/null and b/docs/en/stack/images/ecommerce-pivot2.jpg differ diff --git a/docs/en/stack/images/ecommerce-results.jpg b/docs/en/stack/images/ecommerce-results.jpg new file mode 100644 index 000000000..435a24670 Binary files /dev/null and b/docs/en/stack/images/ecommerce-results.jpg differ