diff --git a/.scalafmt.conf b/.scalafmt.conf new file mode 100644 index 0000000000..f2f74c17b4 --- /dev/null +++ b/.scalafmt.conf @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Scalafmt is used to reformat Gatling benchmarks from the benchmarks/ directory + +version = 3.9.3 +runner.dialect = scala213 + +maxColumn = 100 + +preset = default +align.preset = some + +assumeStandardLibraryStripMargin = true +align.stripMargin = true + +rewrite.rules = [ + AvoidInfix + RedundantBraces + RedundantParens + SortModifiers + PreferCurlyFors + Imports +] + +rewrite.imports.sort = original +docstrings.style = Asterisk +docstrings.wrap = fold diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000000..de8c183ed4 --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,210 @@ + + +# Polaris Benchmarks + +This repository contains benchmarks for the Polaris service using Gatling. + +## Available Benchmarks + +### Dataset Creation Benchmark + +The CreateTreeDataset benchmark creates a test dataset with a specific structure. It exists in two variants: + +- `org.apache.polaris.benchmarks.simulations.CreateTreeDatasetSequential`: Creates entities one at a time +- `org.apache.polaris.benchmarks.simulations.CreateTreeDatasetConcurrent`: Creates up to 50 entities simultaneously + +These are write-only workloads designed to populate the system for subsequent benchmarks. + +### Read/Update Benchmark + +The ReadUpdateTreeDataset benchmark tests read and update operations on an existing dataset. It exists in two variants: + +- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetSequential`: Performs read/update operations one at a time +- `org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetConcurrent`: Performs up to 20 read/update operations simultaneously + +These benchmarks can only be run after using CreateTreeDataset to populate the system. + +## Parameters + +### Dataset Structure Parameters + +These parameters must be consistent across all benchmarks: + +- `NUM_CATALOGS`: Number of catalogs to create (default: 1) +- `NAMESPACE_WIDTH`: Width of the namespace tree (default: 2) +- `NAMESPACE_DEPTH`: Depth of the namespace tree (default: 4) +- `NUM_TABLES_PER_NAMESPACE`: Tables per namespace (default: 5) +- `NUM_VIEWS_PER_NAMESPACE`: Views per namespace (default: 3) +- `NUM_COLUMNS`: Columns per table (default: 10) +- `DEFAULT_BASE_LOCATION`: Base location for datasets (default: file:///tmp/polaris) + +### Workload Parameters + +These parameters can vary between benchmarks: + +- `CLIENT_ID`: Required OAuth2 client ID +- `CLIENT_SECRET`: Required OAuth2 client secret +- `BASE_URL`: Service URL (default: http://localhost:8181) +- `READ_WRITE_RATIO`: Ratio of read to write operations (for ReadUpdateTreeDataset only) + +## Running the Benchmarks + +Required environment variables: +```bash +export CLIENT_ID=your_client_id +export CLIENT_SECRET=your_client_secret +export BASE_URL=http://your-polaris-instance:8181 +``` + +To run the sequential dataset creation benchmark: +```bash +./gradlew gatlingRun-org.apache.polaris.benchmarks.simulations.CreateTreeDatasetSequential +``` + +To run the concurrent dataset creation benchmark: +```bash +./gradlew gatlingRun-org.apache.polaris.benchmarks.simulations.CreateTreeDatasetConcurrent +``` + +To run the sequential read/update benchmark: +```bash +export READ_WRITE_RATIO=0.8 # 80% reads, 20% writes +./gradlew gatlingRun-org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetSequential +``` + +To run the concurrent read/update benchmark: +```bash +export READ_WRITE_RATIO=0.8 # 80% reads, 20% writes +./gradlew gatlingRun-org.apache.polaris.benchmarks.simulations.ReadUpdateTreeDatasetConcurrent +``` + +A message will show the location of the Gatling report: +``` +Reports generated in: ./benchmarks/build/reports/gatling//index.html +``` + +### Example Polaris server startup + +For repeated testing and benchmarking purposes it's convenient to have fixed client-ID + client-secret combinations. **The following example is ONLY for testing and benchmarking against an airgapped Polaris instance** + +```bash +# Start Polaris with the fixed client-ID/secret admin/admin +# DO NEVER EVER USE THE FOLLOWING FOR ANY NON-AIRGAPPED POLARIS INSTANCE !! +./gradlew :polaris-quarkus-server:quarkusBuild && java \ + -Dpolaris.bootstrap.credentials=POLARIS,admin,admin \ + -Djava.security.manager=allow \ + -jar quarkus/server/build/quarkus-app/quarkus-run.jar +``` + +With the above you can run the benchmarks with the environment variables `CLIENT_ID=admin` and `CLIENT_SECRET=admin` - meant only for convenience in a fully airgapped system. +# Test Dataset + +The benchmarks use synthetic procedural datasets that are generated deterministically at runtime. This means that given the same input parameters, the exact same dataset structure will always be generated. This approach allows generating large volumes of test data without having to store it, while ensuring reproducible benchmark results across different runs. + +The diagrams below describe the data sets that are used in benchmarks. Note that the benchmark dataset may not cover all Polaris features. + +## Generation rules + +The dataset has a tree shape. At the root of the tree is a Polaris realm that must exist before the dataset is created. + +An arbitrary number of catalogs can be created under the realm. However, only the first catalog (`C_0`) is used for the rest of the dataset. + +The namespaces part of the dataset is a complete `N`-ary tree. That is, it starts with a root namespace (`NS_0`) and then, each namespace contains exactly `0` or `N` children namespaces. The width as well as the depth of the namespaces tree are configurable. The total number of namespaces can easily be calculated with the following formulae, where `N` is the tree arity and `D` is the total tree depth, including the root: + +$$\text{Total number of namespaces} = +\begin{cases} + \frac{N^{D} - 1}{N - 1} & \mbox{if } N \gt 1 \\ + D & \mbox{if } N = 1 +\end{cases}$$ + +The tables are created under the leaves of the tree. That is, they are put under the namespaces with no child namespace. The number of tables that is created under each leaf namespace is configurable. The total number of tables can easily be calculated with the following formulae, where `N` is the tree arity, `D` is the total tree depth, and `T` is the number of tables per leaf namespace: + +Total number of tables = *N**D* − 1 \* *T* + +The views are created alongside the tables. The number of views that is created under each leaf namespace is also configurable. The total number of views can easily be calculated with the following formulae, where `N` is the tree arity, `D` is the total tree depth, `V` is the number of views per leaf namespace: + +Total number of tables = *N**D* − 1 \* *V* + +## Binary tree example + +The diagram below shows an example of a test dataset with the following properties: + +- Number of catalogs: `3` +- Namespace tree width (`N`): `2` (a binary tree) +- Namespace tree depth (`D`): `3` +- Tables per namespace (`T`): `5` +- Views per namespace (`V`): `3` + +![Binary tree dataset example with width 2, depth 3, and 5 tables per namespace](docs/dataset-shape-2-3-5.svg) + +Using the formula from the previous section, we can calculate the total number of namespaces and the total number of tables as follows: + +$$\text{Total number of namespaces} = \frac{2^{3} - 1}{2 - 1} = 7$$ + +Total number of tables = 23 − 1 \* 5 = 20 + +## 10-ary tree example + +The diagram below shows an example of a test dataset with the following properties: + +- Number of catalogs: `1` +- Namespace tree width (`N`): `10` +- Namespace tree depth (`D`): `2` +- Tables per namespace (`T`): `3` +- Views per namespace (`V`): `3` + +![10-ary tree dataset example with width 10, depth 2, and 3 tables per namespace](docs/dataset-shape-10-2-3.svg) + +Using the formula from the previous section, we can calculate the total number of namespaces and the total number of tables as follows: + +$$\text{Total number of namespaces} = \frac{10^{2} - 1}{10 - 1} = 11$$ + +Total number of tables = 102 − 1 \* 3 = 30 + +## 1-ary tree example + +The diagram below shows an example of a test dataset with the following properties: + +- Number of catalogs: `1` +- Namespace tree width (`N`): `1` +- Namespace tree depth (`D`): `1000` +- Tables per namespace (`T`): `7` +- Views per namespace (`V`): `4` + +![1-ary tree dataset example with width 1, depth 1000, and 7 tables per namespace](docs/dataset-shape-1-1000-7.svg) + +Using the formula from the previous section, we can calculate the total number of namespaces and the total number of tables as follows: + +Total number of namespaces = 1000 + +Total number of tables = 11000 − 1 \* 7 = 7 + +## Size + +The data set size can be adjusted as well. Each namespace is associated with an arbitrary number of dummy properties. Similarly, each table is associated with an arbitrary number of dummy columns and properties. + +The diagram below shows sample catalog, namespace and table definition given the following properties: + +- Default base location: `file:///tmp/polaris` +- Number of namespace properties: `100` +- Number of columns per table: `999` +- Number of table properties: `59` + +![Dataset size example showing catalog, namespace, and table definitions](docs/dataset-size.png) diff --git a/benchmarks/build.gradle.kts b/benchmarks/build.gradle.kts new file mode 100644 index 0000000000..c302d818de --- /dev/null +++ b/benchmarks/build.gradle.kts @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + id("scala") + id("io.gatling.gradle") version "3.13.4.1" + id("com.diffplug.spotless") +} + +dependencies { + gatling("com.typesafe.play:play-json_2.13:2.9.4") +} + +description = "Polaris Iceberg REST API performance tests" + +spotless { + scala { + // Use scalafmt for Scala formatting + scalafmt("3.9.3").configFile("../.scalafmt.conf") + } +} diff --git a/benchmarks/docs/dataset-shape-1-1000-7.puml b/benchmarks/docs/dataset-shape-1-1000-7.puml new file mode 100644 index 0000000000..0bb7813b82 --- /dev/null +++ b/benchmarks/docs/dataset-shape-1-1000-7.puml @@ -0,0 +1,64 @@ +/' + ' Licensed to the Apache Software Foundation (ASF) under one + ' or more contributor license agreements. See the NOTICE file + ' distributed with this work for additional information + ' regarding copyright ownership. The ASF licenses this file + ' to you under the Apache License, Version 2.0 (the + ' "License"); you may not use this file except in compliance + ' with the License. You may obtain a copy of the License at + ' + ' http://www.apache.org/licenses/LICENSE-2.0 + ' + ' Unless required by applicable law or agreed to in writing, + ' software distributed under the License is distributed on an + ' "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ' KIND, either express or implied. See the License for the + ' specific language governing permissions and limitations + ' under the License. +'/ +@startuml + +rectangle Realm +rectangle "C 0" as C0 #AntiqueWhite +Realm -- C0 + +rectangle "NS 0" as Ns0 #Lavender +C0 -- Ns0 + +rectangle "NS 1" as Ns1 #Lavender +Ns0 -- Ns1 + +rectangle "NS 2" as Ns2 #Lavender +Ns1 -- Ns2 + +rectangle "..." as Ns3 #Lavender +Ns2 -[dotted]- Ns3 + +rectangle "NS 999" as Ns999 #Lavender +Ns3 -[dotted]- Ns999 + +rectangle "T 0" as T0 #LightCyan +rectangle "T 1" as T1 #LightCyan +rectangle "T 2" as T2 #LightCyan +rectangle "T 3" as T3 #LightCyan +rectangle "T 4" as T4 #LightCyan +rectangle "T 5" as T5 #LightCyan +rectangle "T 6" as T6 #LightCyan +Ns999 -- T0 +Ns999 -- T1 +Ns999 -- T2 +Ns999 -- T3 +Ns999 -- T4 +Ns999 -- T5 +Ns999 -- T6 + +rectangle "V 0" as V0 #FloralWhite +rectangle "V 1" as V1 #FloralWhite +rectangle "V 2" as V2 #FloralWhite +rectangle "V 3" as V3 #FloralWhite +T0 -- V0 +T1 -- V1 +T2 -- V2 +T3 -- V3 + +@enduml diff --git a/benchmarks/docs/dataset-shape-1-1000-7.svg b/benchmarks/docs/dataset-shape-1-1000-7.svg new file mode 100644 index 0000000000..558e4e4d1b --- /dev/null +++ b/benchmarks/docs/dataset-shape-1-1000-7.svg @@ -0,0 +1 @@ +RealmC 0NS 0NS 1NS 2...NS 999T 0T 1T 2T 3T 4T 5T 6V 0V 1V 2V 3 \ No newline at end of file diff --git a/benchmarks/docs/dataset-shape-10-2-3.puml b/benchmarks/docs/dataset-shape-10-2-3.puml new file mode 100644 index 0000000000..0dd17520dc --- /dev/null +++ b/benchmarks/docs/dataset-shape-10-2-3.puml @@ -0,0 +1,108 @@ +/' + ' Licensed to the Apache Software Foundation (ASF) under one + ' or more contributor license agreements. See the NOTICE file + ' distributed with this work for additional information + ' regarding copyright ownership. The ASF licenses this file + ' to you under the Apache License, Version 2.0 (the + ' "License"); you may not use this file except in compliance + ' with the License. You may obtain a copy of the License at + ' + ' http://www.apache.org/licenses/LICENSE-2.0 + ' + ' Unless required by applicable law or agreed to in writing, + ' software distributed under the License is distributed on an + ' "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ' KIND, either express or implied. See the License for the + ' specific language governing permissions and limitations + ' under the License. +'/ +@startuml +skinparam linetype polyline + +rectangle Realm +rectangle "C 0" as C0 #AntiqueWhite +Realm -- C0 + +rectangle "NS 0" as Ns0 #Lavender +C0 -- Ns0 + +rectangle "NS 1" as Ns1 #Lavender +rectangle "NS 2" as Ns2 #Lavender +rectangle "NS 3" as Ns3 #Lavender +rectangle "NS 4" as Ns4 #Lavender +rectangle "NS 5" as Ns5 #Lavender +rectangle "NS 6" as Ns6 #Lavender +rectangle "NS 7" as Ns7 #Lavender +rectangle "NS 8" as Ns8 #Lavender +rectangle "NS 9" as Ns9 #Lavender +rectangle "NS 10" as Ns10 #Lavender +Ns0 -- Ns1 +Ns0 -- Ns2 +Ns0 -- Ns3 +Ns0 -- Ns4 +Ns0 -- Ns5 +Ns0 -- Ns6 +Ns0 -- Ns7 +Ns0 -- Ns8 +Ns0 -- Ns9 +Ns0 -- Ns10 + +rectangle "T 0" as T0 #LightCyan +rectangle "T 1" as T1 #LightCyan +rectangle "T 2" as T2 #LightCyan +rectangle "..." as T2n #LightCyan +rectangle "..." as T3n #LightCyan +rectangle "..." as T4n #LightCyan +rectangle "..." as T5n #LightCyan +rectangle "..." as T6n #LightCyan +rectangle "..." as T7n #LightCyan +rectangle "..." as T8n #LightCyan +rectangle "..." as T9n #LightCyan +rectangle "T 27" as T27 #LightCyan +rectangle "T 28" as T28 #LightCyan +rectangle "T 29" as T29 #LightCyan +Ns1 -- T0 +Ns1 -- T1 +Ns1 -- T2 +Ns2 -[dotted]- T2n +Ns3 -[dotted]- T3n +Ns4 -[dotted]- T4n +Ns5 -[dotted]- T5n +Ns6 -[dotted]- T6n +Ns7 -[dotted]- T7n +Ns8 -[dotted]- T8n +Ns9 -[dotted]- T9n +Ns10 -- T27 +Ns10 -- T28 +Ns10 -- T29 + +rectangle "V 0" as V0 #FloralWhite +rectangle "V 1" as V1 #FloralWhite +rectangle "V 2" as V2 #FloralWhite +rectangle "..." as V2n #FloralWhite +rectangle "..." as V3n #FloralWhite +rectangle "..." as V4n #FloralWhite +rectangle "..." as V5n #FloralWhite +rectangle "..." as V6n #FloralWhite +rectangle "..." as V7n #FloralWhite +rectangle "..." as V8n #FloralWhite +rectangle "..." as V9n #FloralWhite +rectangle "V 27" as V27 #FloralWhite +rectangle "V 28" as V28 #FloralWhite +rectangle "V 29" as V29 #FloralWhite +T0 -- V0 +T1 -- V1 +T2 -- V2 +T2n -[dotted]- V2n +T3n -[dotted]- V3n +T4n -[dotted]- V4n +T5n -[dotted]- V5n +T6n -[dotted]- V6n +T7n -[dotted]- V7n +T8n -[dotted]- V8n +T9n -[dotted]- V9n +T27 -- V27 +T28 -- V28 +T29 -- V29 + +@enduml diff --git a/benchmarks/docs/dataset-shape-10-2-3.svg b/benchmarks/docs/dataset-shape-10-2-3.svg new file mode 100644 index 0000000000..5348b133bf --- /dev/null +++ b/benchmarks/docs/dataset-shape-10-2-3.svg @@ -0,0 +1 @@ +RealmC 0NS 0NS 1NS 2NS 3NS 4NS 5NS 6NS 7NS 8NS 9NS 10T 0T 1T 2........................T 27T 28T 29V 0V 1V 2........................V 27V 28V 29 \ No newline at end of file diff --git a/benchmarks/docs/dataset-shape-2-3-5.puml b/benchmarks/docs/dataset-shape-2-3-5.puml new file mode 100644 index 0000000000..effad5ba4e --- /dev/null +++ b/benchmarks/docs/dataset-shape-2-3-5.puml @@ -0,0 +1,88 @@ +/' + ' Licensed to the Apache Software Foundation (ASF) under one + ' or more contributor license agreements. See the NOTICE file + ' distributed with this work for additional information + ' regarding copyright ownership. The ASF licenses this file + ' to you under the Apache License, Version 2.0 (the + ' "License"); you may not use this file except in compliance + ' with the License. You may obtain a copy of the License at + ' + ' http://www.apache.org/licenses/LICENSE-2.0 + ' + ' Unless required by applicable law or agreed to in writing, + ' software distributed under the License is distributed on an + ' "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ' KIND, either express or implied. See the License for the + ' specific language governing permissions and limitations + ' under the License. +'/ +@startuml + +rectangle Realm +rectangle "C 0" as C0 #AntiqueWhite +rectangle "C 1" as C1 #AntiqueWhite +rectangle "C 2" as C2 #AntiqueWhite +Realm -- C0 +Realm -- C1 +Realm -- C2 + +rectangle "NS 0" as Ns0 #Lavender +C0 -- Ns0 + +rectangle "NS 1" as Ns1 #Lavender +rectangle "NS 2" as Ns2 #Lavender +Ns0 -- Ns1 +Ns0 -- Ns2 + +rectangle "NS 3" as Ns3 #Lavender +rectangle "NS 4" as Ns4 #Lavender +rectangle "NS 5" as Ns5 #Lavender +rectangle "NS 6" as Ns6 #Lavender +Ns1 -- Ns3 +Ns1 -- Ns4 +Ns2 -- Ns5 +Ns2 -- Ns6 + +rectangle "T 0" as T0 #LightCyan +rectangle "T 1" as T1 #LightCyan +rectangle "T 2" as T2 #LightCyan +rectangle "T 3" as T3 #LightCyan +rectangle "T 4" as T4 #LightCyan +rectangle "..." as T5n #LightCyan +rectangle "..." as T6n #LightCyan +rectangle "T 15" as T15 #LightCyan +rectangle "T 16" as T16 #LightCyan +rectangle "T 17" as T17 #LightCyan +rectangle "T 18" as T18 #LightCyan +rectangle "T 19" as T19 #LightCyan +Ns3 -- T0 +Ns3 -- T1 +Ns3 -- T2 +Ns3 -- T3 +Ns3 -- T4 +Ns4 -[dotted]- T5n +Ns5 -[dotted]- T6n +Ns6 -- T15 +Ns6 -- T16 +Ns6 -- T17 +Ns6 -- T18 +Ns6 -- T19 + +rectangle "V 0" as V0 #FloralWhite +rectangle "V 1" as V1 #FloralWhite +rectangle "V 2" as V2 #FloralWhite +rectangle "..." as V5n #FloralWhite +rectangle "..." as V6n #FloralWhite +rectangle "V 15" as V15 #FloralWhite +rectangle "V 16" as V16 #FloralWhite +rectangle "V 17" as V17 #FloralWhite +T0 -- V0 +T1 -- V1 +T2 -- V2 +T5n -[dotted]- V5n +T6n -[dotted]- V6n +T15 -- V15 +T16 -- V16 +T17 -- V17 + +@enduml diff --git a/benchmarks/docs/dataset-shape-2-3-5.svg b/benchmarks/docs/dataset-shape-2-3-5.svg new file mode 100644 index 0000000000..fa804ff80e --- /dev/null +++ b/benchmarks/docs/dataset-shape-2-3-5.svg @@ -0,0 +1 @@ +RealmC 0C 1C 2NS 0NS 1NS 2NS 3NS 4NS 5NS 6T 0T 1T 2T 3T 4......T 15T 16T 17T 18T 19V 0V 1V 2......V 15V 16V 17 \ No newline at end of file diff --git a/benchmarks/docs/dataset-size.puml b/benchmarks/docs/dataset-size.puml new file mode 100644 index 0000000000..2ab840c8c9 --- /dev/null +++ b/benchmarks/docs/dataset-size.puml @@ -0,0 +1,59 @@ +/' + ' Licensed to the Apache Software Foundation (ASF) under one + ' or more contributor license agreements. See the NOTICE file + ' distributed with this work for additional information + ' regarding copyright ownership. The ASF licenses this file + ' to you under the Apache License, Version 2.0 (the + ' "License"); you may not use this file except in compliance + ' with the License. You may obtain a copy of the License at + ' + ' http://www.apache.org/licenses/LICENSE-2.0 + ' + ' Unless required by applicable law or agreed to in writing, + ' software distributed under the License is distributed on an + ' "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ' KIND, either express or implied. See the License for the + ' specific language governing permissions and limitations + ' under the License. +'/ +@startuml + +map C_0 { + catalogName => C_0 + defaultBaseLocation => file:///tmp/polaris/C_0 +} + +json NS_4 { + "namespace": ["NS_0", "NS_1", "NS_4"], + "properties": { + "Attribute_0": "Value_0", + "Attribute_1": "Value_1", + "...": "...", + "Attribute_100": "Value_100" + } +} + +json T_15 { + "name": "T_15", + "schema": { + "type": "struct", + "fields": [ + {"id": 0, "name": "column0", "type": "int", "required": true}, + {"id": 1, "name": "column1", "type": "int", "required": true}, + "...", + {"id": 999, "name": "column999", "type": "int", "required": true} + ], + "identifier-field-ids": [0] + }, + "properties": { + "Attribute_0": "Value_0", + "Attribute_1": "Value_1", + "...": "...", + "Attribute_59": "Value_59" + } +} + +C_0 -[hidden]- NS_4 +NS_4 -[hidden]- T_15 + +@enduml diff --git a/benchmarks/docs/dataset-size.svg b/benchmarks/docs/dataset-size.svg new file mode 100644 index 0000000000..c7da075c46 --- /dev/null +++ b/benchmarks/docs/dataset-size.svg @@ -0,0 +1 @@ +C_0catalogNameC_0defaultBaseLocationfile:///tmp/polaris/C_0NS_4namespaceNS_0NS_1NS_4propertiesAttribute_0Value_0Attribute_1Value_1......Attribute_100Value_100T_15nameT_15schematypestructfieldsid0namecolumn0typeintrequiredtrueid1namecolumn1typeintrequiredtrue...id999namecolumn999typeintrequiredtrueidentifier-field-ids0propertiesAttribute_0Value_0Attribute_1Value_1......Attribute_59Value_59 \ No newline at end of file diff --git a/benchmarks/docs/gatling-report.png b/benchmarks/docs/gatling-report.png new file mode 100644 index 0000000000..77688de8cc Binary files /dev/null and b/benchmarks/docs/gatling-report.png differ diff --git a/benchmarks/src/gatling/resources/logback-test.xml b/benchmarks/src/gatling/resources/logback-test.xml new file mode 100644 index 0000000000..c9227d3eaf --- /dev/null +++ b/benchmarks/src/gatling/resources/logback-test.xml @@ -0,0 +1,39 @@ + + + + + + %d{HH:mm:ss.SSS} [%-5level] %logger{15} - %msg%n%rEx + + false + + + + + + + + + + + + diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala new file mode 100644 index 0000000000..b270230aa8 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/NAryTreeBuilder.scala @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks + +case class NAryTreeBuilder(nsWidth: Int, nsDepth: Int) { + + /** + * Computes the path from the root node to the given ordinal. + * + * @param ordinal the ordinal of the node + * @param acc the accumulator for the path + * @return the path from the root node (included) to the given ordinal (included) + */ + @scala.annotation.tailrec + final def pathToRoot(ordinal: Int, acc: List[Int] = Nil): List[Int] = + if (ordinal == 0) { + ordinal :: acc + } else { + val parent = (ordinal - 1) / nsWidth + pathToRoot(parent, ordinal :: acc) + } + + /** + * Calculates the depth of a node in the n-ary tree based on its ordinal. + * + * @param ordinal The ordinal of the node. + * @return The depth of the node in the tree. + */ + def depthOf(ordinal: Int): Int = { + if (ordinal == 0) return 0 + if (nsWidth == 1) return ordinal + + // Using the formula: floor(log_n((x * (n-1)) + 1)) + val numerator = (ordinal * (nsWidth - 1)) + 1 + (math.log(numerator) / math.log(nsWidth)).floor.toInt + } + + /** + * Calculate the total number of nodes in a complete n-ary tree. + * + * @return The total number of nodes in the tree. + */ + val numberOfNodes: Int = + // The sum of nodes from level 0 to level d is (n^(d+1) - 1) / (n - 1) + ((math.pow(nsWidth, nsDepth) - 1) / (nsWidth - 1)).toInt + + /** + * Returns a range of ordinals for the nodes on the last level of a complete n-ary tree. + * + * @return The range of ordinals for the nodes on the last level of the tree. + */ + val lastLevelOrdinals: Range = { + val lastLevel = nsDepth - 1 + if (nsWidth == 1) { + // For a unary tree, the only node at depth d is simply the node with ordinal d. + Range.inclusive(lastLevel, lastLevel) + } else { + // The sum of nodes from level 0 to level d-1 is (n^d - 1) / (n - 1) + val start = ((math.pow(nsWidth, lastLevel) - 1) / (nsWidth - 1)).toInt + // The sum of nodes from level 0 to level d is (n^(d+1) - 1) / (n - 1) + // Therefore, the last ordinal at depth d is: + val end = (((math.pow(nsWidth, lastLevel + 1) - 1) / (nsWidth - 1)).toInt) - 1 + Range.inclusive(start, end) + } + } + + val numberOfLastLevelElements: Int = { + val lastLevel = nsDepth - 1 + math.pow(nsWidth, lastLevel).toInt + } +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/RetryOnHttpCodes.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/RetryOnHttpCodes.scala new file mode 100644 index 0000000000..93656151c5 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/RetryOnHttpCodes.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import io.gatling.http.request.builder.HttpRequestBuilder +import org.slf4j.LoggerFactory + +object RetryOnHttpCodes { + private val logger = LoggerFactory.getLogger(getClass) + + implicit class HttpRequestBuilderWithStatusSave(val httpRequestBuilder: HttpRequestBuilder) { + def saveHttpStatusCode(): HttpRequestBuilder = + httpRequestBuilder.check(status.saveAs("lastHttpStatusCode")) + } + + def retryOnHttpStatus(maxRetries: Int, statusCodes: Set[Int], counterName: String = "httpRetry")( + httpRequestBuilder: HttpRequestBuilder + ): ChainBuilder = + exec(session => session.set(counterName, 0)) + .exec(session => session.set("lastHttpStatusCode", -1)) + .asLongAs(session => + session(counterName).as[Int] == 0 || ( + session(counterName).as[Int] < maxRetries && + statusCodes.contains(session("lastHttpStatusCode").as[Int]) + ) + ) { + exec(session => session.set(counterName, session(counterName).as[Int] + 1)) + .exec(httpRequestBuilder) + } + .doIf(session => + session(counterName).as[Int] >= maxRetries && statusCodes.contains( + session("lastHttpStatusCode").as[Int] + ) + ) { + exec { session => + logger.warn( + s"""Max retries (${maxRetries}) attempted for chain "${counterName}". Last HTTP status code: ${session( + "lastHttpStatusCode" + ).as[Int]}""" + ) + session + } + } + .exec(session => session.removeAll("lastHttpStatusCode", counterName)) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/AuthenticationActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/AuthenticationActions.scala new file mode 100644 index 0000000000..760d2652b5 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/AuthenticationActions.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.ConnectionParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing authentication operations. This class provides methods to + * authenticate and manage access tokens for API requests. + * + * @param cp Connection parameters containing client credentials + * @param accessToken Reference to the authentication token shared across actions + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class AuthenticationActions( + cp: ConnectionParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a Gatling Feeder that provides authentication credentials. The feeder continuously + * supplies client ID and client secret from the connection parameters for use in authentication + * requests. + * + * @return An iterator providing client credentials + */ + def feeder(): Feeder[String] = Iterator.continually( + Map( + "clientId" -> cp.clientId, + "clientSecret" -> cp.clientSecret + ) + ) + + /** + * Authenticates using client credentials and saves the access token as a session attribute. The + * credentials are defined in the [[AuthenticationActions.feeder]]. This operation performs an + * OAuth2 client credentials flow, requesting full principal roles, and stores the received access + * token in both the Gatling session and the shared AtomicReference. + * + * There is no limit to the maximum number of users that can authenticate concurrently. + */ + val authenticateAndSaveAccessToken: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Authenticate")( + http("Authenticate") + .post("/api/catalog/v1/oauth/tokens") + .header("Content-Type", "application/x-www-form-urlencoded") + .formParam("grant_type", "client_credentials") + .formParam("client_id", "#{clientId}") + .formParam("client_secret", "#{clientSecret}") + .formParam("scope", "PRINCIPAL_ROLE:ALL") + .saveHttpStatusCode() + .check(status.is(200)) + .check(jsonPath("$.access_token").saveAs("accessToken")) + ) + .exec { session => + accessToken.set(session("accessToken").as[String]) + session + } + + /** + * Restores the current access token from the shared reference into the Gatling session. This + * operation is useful when a scenario needs to reuse an authentication token from a previous + * scenario. + */ + val restoreAccessTokenInSession: ChainBuilder = + exec(session => session.set("accessToken", accessToken.get())) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/CatalogActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/CatalogActions.scala new file mode 100644 index 0000000000..8878b5a133 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/CatalogActions.scala @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.DatasetParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing catalog operations in Apache Iceberg. This class provides methods + * to create and fetch catalogs. + * + * @param dp Dataset parameters controlling the dataset generation + * @param accessToken Reference to the authentication token for API requests + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class CatalogActions( + dp: DatasetParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a Gatling Feeder that generates catalog names and their default storage locations. Each + * catalog will be named "C_n" where n is a sequential number, and will have a corresponding + * storage location under the configured base path. + * + * @return An iterator providing catalog names and their storage locations + */ + def feeder(): Feeder[String] = Iterator + .from(0) + .map { i => + val catalogName = s"C_$i" + Map( + "catalogName" -> catalogName, + "defaultBaseLocation" -> s"${dp.defaultBaseLocation}/$catalogName" + ) + } + .take(dp.numCatalogs) + + /** + * Creates a new Iceberg catalog with FILE storage type. The catalog is created as an INTERNAL + * type with a name and a default base location that are defined in the [[CatalogActions.feeder]]. + * This represents the fundamental operation of establishing a new catalog in an Iceberg + * deployment. + * + * There is no limit to the number of users that can create catalogs concurrently. + */ + val createCatalog: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create catalog")( + http("Create Catalog") + .post("/api/management/v1/catalogs") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "catalog": { + | "type": "INTERNAL", + | "name": "#{catalogName}", + | "properties": { + | "default-base-location": "#{defaultBaseLocation}" + | }, + | "storageConfigInfo": { + | "storageType": "FILE" + | } + | } + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(201)) + ) + + /** + * Retrieves details of a specific Iceberg catalog by name. The catalog name is defined in the + * [[CatalogActions.feeder]]. Some basic properties are verified, like the catalog type, storage + * settings, and base location. + * + * There is no limit to the number of users that can fetch catalogs concurrently. + */ + val fetchCatalog: ChainBuilder = exec( + http("Fetch Catalog") + .get("/api/management/v1/catalogs/#{catalogName}") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .check(status.is(200)) + .check(jsonPath("$.type").is("INTERNAL")) + .check(jsonPath("$.name").is("#{catalogName}")) + .check(jsonPath("$.properties.default-base-location").is("#{defaultBaseLocation}")) + .check(jsonPath("$.storageConfigInfo.storageType").is("FILE")) + .check(jsonPath("$.storageConfigInfo.allowedLocations[0]").is("#{defaultBaseLocation}")) + ) + + /** + * Lists all available Iceberg catalogs in the deployment. This operation does not rely on any + * feeder data. + */ + val fetchAllCatalogs: ChainBuilder = exec( + http("Fetch all Catalogs") + .get("/api/management/v1/catalogs") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .check(status.is(200)) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/EntityProperties.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/EntityProperties.scala new file mode 100644 index 0000000000..962651d583 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/EntityProperties.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import play.api.libs.json.Json + +object EntityProperties { + def filterMapByPrefix(json: String, prefix: String): Map[String, String] = + Json.parse(json).as[Map[String, String]].filter { case (k, _) => + k.startsWith(prefix) + } +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/NamespaceActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/NamespaceActions.scala new file mode 100644 index 0000000000..b33a9912cb --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/NamespaceActions.scala @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.DatasetParameters +import org.slf4j.LoggerFactory +import play.api.libs.json.Json + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing authentication operations. This class provides methods to + * authenticate and manage access tokens for API requests. + * + * @param dp Dataset parameters controlling the dataset generation + * @param accessToken Reference to the authentication token shared across actions + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class NamespaceActions( + dp: DatasetParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a Gatling Feeder that generates namespace hierarchies. Each row is associated with a + * distinct namespace. Namespaces are named "NS_n" where n is derived from the n-ary tree + * position. The feeder provides catalog name, namespace id and namespace path. + * + * @return An iterator providing namespace details + */ + def namespaceIdentityFeeder(): Feeder[Any] = Iterator + .from(0) + .map { tableId => + val namespaceId = tableId + val namespacePath: Seq[String] = dp.nAryTree + .pathToRoot(namespaceId) + .map(ordinal => s"NS_$ordinal") + Map( + "catalogName" -> "C_0", + "namespaceId" -> tableId, + "namespacePath" -> namespacePath, + "namespaceJsonPath" -> Json.toJson(namespacePath).toString(), + "namespaceMultipartPath" -> namespacePath.mkString("%1F") + ) + } + .take(dp.nAryTree.numberOfNodes) + + /** + * Creates a Gatling Feeder that generates namespace hierarchies. Each row is associated with a + * distinct namespace and leverages the [[NamespaceActions.namespaceIdentityFeeder()]]. Additional + * attributes are added to each row, like namespace properties. + * + * @return An iterator providing namespace details and their properties + */ + def namespaceCreationFeeder(): Feeder[Any] = namespaceIdentityFeeder() + .map { row => + val properties: Map[String, String] = (0 until dp.numNamespaceProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "initialProperties" -> properties, + "initialJsonProperties" -> Json.toJson(properties).toString() + ) + } + + /** + * Creates a Gatling Feeder that generates expected namespace attributes. Each row is associated + * with a distinct namespace and leverages the [[NamespaceActions.namespaceCreationFeeder]]. + * Additional attributes are added to each row so that the payload returned by the server can be + * verified. The initial properties from the namespace creation feeder are used to verify the + * namespace properties. + * + * @return An iterator providing namespace details and their properties + */ + def namespaceFetchFeeder(): Feeder[Any] = namespaceCreationFeeder() + .map { row => + val catalogName = row("catalogName").asInstanceOf[String] + val namespaceUnixPath = row("namespacePath").asInstanceOf[Seq[String]].mkString("/") + val location = Map( + "location" -> s"${dp.defaultBaseLocation}/$catalogName/$namespaceUnixPath" + ) + row ++ Map( + "location" -> location + ) + } + + def namespacePropertiesUpdateFeeder(): Feeder[Any] = namespaceIdentityFeeder() + .flatMap { row => + (0 until dp.numNamespacePropertyUpdates).map { updateId => + val updates = Map(s"UpdatedAttribute_$updateId" -> s"$updateId") + row ++ Map( + "jsonPropertyUpdates" -> Json.toJson(updates).toString() + ) + } + } + + /** + * Creates a new namespace in a specified catalog. The namespace is created with a full path and + * properties that are defined in the [[NamespaceActions.namespaceCreationFeeder]]. + * + * Namespaces have a dependency on the existence of their parent namespaces. As a result, the + * namespace creation operation is expected to fail if too many concurrent users are run. It is + * possible that a user tries to create a namespace for which the parent has not been fully + * created yet. + * + * Therefore, the number of concurrent users should start with 1 and increase gradually. + * Typically, start 1 user and increase by 1 user every second until some arbitrary maximum value. + */ + val createNamespace: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create namespace")( + http("Create Namespace") + .post("/api/catalog/v1/#{catalogName}/namespaces") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "namespace": #{namespaceJsonPath}, + | "properties": #{initialJsonProperties} + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200)) + .check(jsonPath("$.namespace").is("#{namespaceJsonPath}")) + ) + + /** + * Retrieves details of a specific namespace by its path. The namespace path and properties are + * verified against the values defined in the [[NamespaceActions.namespaceFetchFeeder]]. This + * operation validates the namespace existence and its configuration. + * + * There is no limit to the number of users that can fetch namespaces concurrently. + */ + val fetchNamespace: ChainBuilder = exec( + http("Fetch Namespace") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{namespaceMultipartPath}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + .check(jsonPath("$.namespace").is("#{namespaceJsonPath}")) + .check( + jsonPath("$.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "InitialAttribute_")) + .is("#{initialProperties}") + ) + .check( + jsonPath("$.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "location")) + .is("#{location}") + ) + ) + + /** + * Checks if a specific namespace exists by its path. + */ + val checkNamespaceExists: ChainBuilder = exec( + http("Check Namespace Exists") + .head("/api/catalog/v1/#{catalogName}/namespaces/#{namespaceMultipartPath}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(204)) + ) + + /** + * Lists all child namespaces under a specific parent namespace. This operation retrieves the + * immediate children of a given namespace, supporting the hierarchical nature of the namespace + * structure. + */ + val fetchAllChildrenNamespaces: ChainBuilder = exec( + http("Fetch all Namespaces under specific parent") + .get("/api/catalog/v1/#{catalogName}/namespaces?parent=#{namespaceMultipartPath}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + ) + + val updateNamespaceProperties: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Update namespace properties")( + http("Update Namespace Properties") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{namespaceMultipartPath}/properties") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "removals": [], + | "updates": #{jsonPropertyUpdates} + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200)) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/TableActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/TableActions.scala new file mode 100644 index 0000000000..fcfbe343bd --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/TableActions.scala @@ -0,0 +1,256 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.DatasetParameters +import org.slf4j.LoggerFactory +import play.api.libs.json.Format.GenericFormat +import play.api.libs.json.OFormat.oFormatFromReadsAndOWrites +import play.api.libs.json.{Format, Json} + +import java.util.concurrent.atomic.AtomicReference + +/** + * Actions for performance testing table operations. This class provides methods to create and + * manage tables within namespaces. + * + * @param dp Dataset parameters controlling the dataset generation + * @param accessToken Reference to the authentication token shared across actions + * @param maxRetries Maximum number of retry attempts for failed operations + * @param retryableHttpCodes HTTP status codes that should trigger a retry + */ +case class TableActions( + dp: DatasetParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a base Gatling Feeder that generates table identities. Each row contains the basic + * information needed to identify a table: catalog name, namespace path, and table name. Tables + * are named "T_n" where n is derived from the namespace and table position. + * + * @return An iterator providing table identity details + */ + def tableIdentityFeeder(): Feeder[Any] = dp.nAryTree.lastLevelOrdinals.iterator + .flatMap { namespaceId => + val positionInLevel = namespaceId - dp.nAryTree.lastLevelOrdinals.head + val parentNamespacePath: Seq[String] = dp.nAryTree + .pathToRoot(namespaceId) + .map(ordinal => s"NS_$ordinal") + Range(0, dp.numTablesPerNs) + .map { j => + val tableId = positionInLevel * dp.numTablesPerNs + j + Map( + "catalogName" -> "C_0", + "parentNamespacePath" -> parentNamespacePath, + "multipartNamespace" -> parentNamespacePath.mkString("%1F"), + "tableName" -> s"T_$tableId" + ) + } + } + .take(dp.numTables) + + /** + * Creates a Gatling Feeder that generates table creation details. Each row builds upon the table + * identity information of [[TableActions.tableIdentityFeeder]] and adds schema and property + * details needed for table creation. + * + * @return An iterator providing table creation details + */ + def tableCreationFeeder(): Feeder[Any] = tableIdentityFeeder() + .map { row => + // The field identifiers start at 1 because if they start at 0, they will be overwritten by Iceberg. + // See https://github.com/apache/iceberg/issues/10084 + val fields: Seq[TableField] = (1 to dp.numColumns) + .map(id => TableField(id = id, name = s"column$id", `type` = "int", required = true)) + val properties: Map[String, String] = (0 until dp.numTableProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "schemasType" -> "struct", + "schemasIdentifierFieldIds" -> "[1]", + "fieldsStr" -> Json.toJson(fields).toString(), + "fields" -> fields, + "initialJsonProperties" -> Json.toJson(properties).toString() + ) + } + + /** + * Creates a Gatling Feeder that generates table property updates. Each row contains a single + * property update targeting a specific table. + * + * @return An iterator providing table property update details + */ + def propertyUpdateFeeder(): Feeder[Any] = tableIdentityFeeder() + .flatMap(row => + Range(0, dp.numTablePropertyUpdates) + .map(k => row + ("newProperty" -> s"""{"NewAttribute_$k": "NewValue_$k"}""")) + ) + + /** + * Creates a Gatling Feeder that generates table details. Each row builds upon the table creation + * information and adds schema expectations for fetch verification. The details should be used to + * verify the table schema and properties after creation. + * + * @return An iterator providing table fetch details with expected response values + */ + def tableFetchFeeder(): Feeder[Any] = tableIdentityFeeder() + .map { row => + val catalogName: String = row("catalogName").asInstanceOf[String] + val parentNamespacePath: Seq[String] = row("parentNamespacePath").asInstanceOf[Seq[String]] + val tableName: String = row("tableName").asInstanceOf[String] + val initialProperties: Map[String, String] = (0 until dp.numTableProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "initialProperties" -> initialProperties, + "location" -> s"${dp.defaultBaseLocation}/$catalogName/${parentNamespacePath.mkString("/")}/$tableName" + ) + } + + /** + * Creates a new table in a specified namespace. The table is created with a name, schema + * definition, and properties that are defined in the [[TableActions.tableCreationFeeder]]. The + * operation includes retry logic for handling transient failures. + * + * There is no limit to the number of users that can create tables concurrently. + */ + val createTable: ChainBuilder = retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create table")( + http("Create Table") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + """{ + | "name": "#{tableName}", + | "stage-create": false, + | "schema": { + | "type": "#{schemasType}", + | "fields": #{fieldsStr}, + | "identifier-field-ids": #{schemasIdentifierFieldIds} + | }, + | "properties": #{initialJsonProperties} + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200)) + ) + + /** + * Retrieves details of a specific table by its name and namespace path. The table location, + * schema, and properties are verified against the values defined in the + * [[TableActions.tableFetchFeeder]]. This operation validates the table existence and its + * configuration. + * + * There is no limit to the number of users that can fetch tables concurrently. + */ + val fetchTable: ChainBuilder = exec( + http("Fetch Table") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables/#{tableName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + .check(jsonPath("$.metadata.table-uuid").saveAs("tableUuid")) + .check(jsonPath("$.metadata.location").is("#{location}")) + .check( + jsonPath("$.metadata.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "InitialAttribute_")) + .is("#{initialProperties}") + ) + ) + + /** + * Checks if a specific table exists by its name and namespace path. + */ + val checkTableExists: ChainBuilder = exec( + http("Check Table Exists") + .head("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables/#{tableName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(204)) + ) + + /** + * Lists all tables under a specific namespace. This operation retrieves all tables within the + * given namespace, supporting bulk retrieval of table metadata. + */ + val fetchAllTables: ChainBuilder = exec( + http("Fetch all Tables under parent namespace") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + ) + + /** + * Updates the properties of a specific table by its name and namespace path. The table properties + * are updated with new values defined in the [[TableActions.propertyUpdateFeeder]]. + * + * There is no limit to the number of users that can update table properties concurrently. + */ + val updateTable: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Update table metadata")( + http("Update table metadata") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/tables/#{tableName}") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + s"""{ + | "updates": [{ + | "action": "set-properties", + | "updates": #{newProperty} + | }] + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200).saveAs("lastStatus")) + ) +} + +/** + * This object provides JSON serialization for the table field schema so that it can be used in + * Gatling response checks. + */ +object TableField { + implicit val format: Format[TableField] = Json.format[TableField] + + def fromList(json: String): Seq[TableField] = Json.parse(json).as[Seq[TableField]] +} + +/** + * A case class representing a table field schema. + * @param id Field identifier + * @param name Field name + * @param `type` Field type + * @param required Field requirement + */ +case class TableField(id: Int, name: String, `type`: String, required: Boolean) diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ViewActions.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ViewActions.scala new file mode 100644 index 0000000000..7af810f5b0 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/actions/ViewActions.scala @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.actions + +import io.gatling.core.Predef._ +import io.gatling.core.feeder.Feeder +import io.gatling.core.structure.ChainBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.RetryOnHttpCodes.{ + retryOnHttpStatus, + HttpRequestBuilderWithStatusSave +} +import org.apache.polaris.benchmarks.parameters.DatasetParameters +import org.slf4j.LoggerFactory +import play.api.libs.json.{Format, Json} + +import java.time.Instant +import java.util.concurrent.atomic.AtomicReference + +case class ViewActions( + dp: DatasetParameters, + accessToken: AtomicReference[String], + maxRetries: Int = 10, + retryableHttpCodes: Set[Int] = Set(409, 500) +) { + private val logger = LoggerFactory.getLogger(getClass) + + /** + * Creates a base Gatling Feeder that generates view identities. Each row contains the basic + * information needed to identify a view: catalog name, namespace path, and view name. + * + * @return An iterator providing view identity details + */ + def viewIdentityFeeder(): Feeder[Any] = dp.nAryTree.lastLevelOrdinals.iterator + .flatMap { namespaceId => + val catalogId = 0 + val parentNamespacePath: Seq[String] = dp.nAryTree + .pathToRoot(namespaceId) + .map(ordinal => s"NS_$ordinal") + val positionInLevel = namespaceId - dp.nAryTree.lastLevelOrdinals.head + + Range(0, dp.numViewsPerNs) + .map { j => + // Ensure the view ID matches that of the associated table + val viewId = positionInLevel * dp.numTablesPerNs + j + Map( + "catalogName" -> s"C_$catalogId", + "parentNamespacePath" -> parentNamespacePath, + "multipartNamespace" -> parentNamespacePath.mkString("%1F"), + "viewName" -> s"V_$viewId", + "viewId" -> viewId + ) + } + } + .take(dp.numViews) + + /** + * Creates a Gatling Feeder that generates view creation details. Each row builds upon the view + * identity information and adds schema and query details needed for view creation. + * + * @return An iterator providing view creation details + */ + def viewCreationFeeder(): Feeder[Any] = viewIdentityFeeder() + .map { row => + val viewId = row("viewId").asInstanceOf[Int] + val tableName = s"T_$viewId" + val fields: Seq[ViewField] = (1 to dp.numColumns) + .map(id => ViewField(id = id, name = s"column$id", `type` = "int", required = true)) + val properties: Map[String, String] = (0 until dp.numTableProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "tableName" -> tableName, // Reference the table at the same index as the view + "timestamp" -> Instant.now().toEpochMilli.toString, + "fieldsStr" -> Json.toJson(fields).toString(), + "fields" -> fields, + "sqlQuery" -> s"SELECT * FROM $tableName", + "initialJsonProperties" -> Json.toJson(properties).toString() + ) + } + + /** + * Creates a Gatling Feeder that generates view property updates. Each row contains a single + * property update targeting a specific view. + * + * @return An iterator providing view property update details + */ + def propertyUpdateFeeder(): Feeder[Any] = viewIdentityFeeder() + .flatMap(row => + Range(0, dp.numViewPropertyUpdates) + .map(k => row + ("newProperty" -> s"""{"NewAttribute_$k": "NewValue_$k"}""")) + ) + + /** + * Creates a Gatling Feeder that generates view details. Each row builds upon the view creation + * information and adds schema expectations for fetch verification. The details should be used to + * verify the view schema and properties after creation. + * + * @return An iterator providing view fetch details with expected response values + */ + def viewFetchFeeder(): Feeder[Any] = viewCreationFeeder() + .map { row => + val catalogName: String = row("catalogName").asInstanceOf[String] + val parentNamespacePath: Seq[String] = row("parentNamespacePath").asInstanceOf[Seq[String]] + val viewName: String = row("viewName").asInstanceOf[String] + val initialProperties: Map[String, String] = (0 until dp.numTableProperties) + .map(id => s"InitialAttribute_$id" -> s"$id") + .toMap + row ++ Map( + "initialProperties" -> initialProperties, + "location" -> s"${dp.defaultBaseLocation}/$catalogName/${parentNamespacePath.mkString("/")}/$viewName" + ) + } + + val createView: ChainBuilder = retryOnHttpStatus(maxRetries, retryableHttpCodes, "Create view")( + http("Create View") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body(StringBody("""{ + | "name": "#{viewName}", + | "view-version": { + | "version-id": 1, + | "timestamp-ms": #{timestamp}, + | "schema-id": 0, + | "summary": { + | "engine-version": "3.5.5", + | "app-id": "gatling-#{timestamp}", + | "engine-name": "spark", + | "iceberg-version": "Apache Iceberg 1.7.0" + | }, + | "default-namespace": ["#{multipartNamespace}"], + | "representations": [ + | { + | "type": "sql", + | "sql": "#{sqlQuery}", + | "dialect": "spark" + | } + | ] + | }, + | "schema": { + | "type": "struct", + | "schema-id": 0, + | "fields": #{fieldsStr} + | }, + | "properties": #{initialJsonProperties} + |}""".stripMargin)) + .check(status.is(200)) + ) + + val fetchView: ChainBuilder = exec( + http("Fetch View") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views/#{viewName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + .check(jsonPath("$.metadata.view-uuid").saveAs("viewUuid")) + .check(jsonPath("$.metadata.location").is("#{location}")) + .check( + jsonPath("$.metadata.properties") + .transform(str => EntityProperties.filterMapByPrefix(str, "InitialAttribute_")) + .is("#{initialProperties}") + ) + ) + + /** + * Checks if a specific view exists by its name and namespace path. + */ + val checkViewExists: ChainBuilder = exec( + http("Check View Exists") + .head("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views/#{viewName}") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(204)) + ) + + val fetchAllViews: ChainBuilder = exec( + http("Fetch all Views under parent namespace") + .get("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views") + .header("Authorization", "Bearer #{accessToken}") + .check(status.is(200)) + ) + + /** + * Updates the properties of a specific view by its name and namespace path. The view properties + * are updated with new values defined in the [[ViewActions.propertyUpdateFeeder]]. + * + * There is no limit to the number of users that can update table properties concurrently. + */ + val updateView: ChainBuilder = + retryOnHttpStatus(maxRetries, retryableHttpCodes, "Update View metadata")( + http("Update View metadata") + .post("/api/catalog/v1/#{catalogName}/namespaces/#{multipartNamespace}/views/#{viewName}") + .header("Authorization", "Bearer #{accessToken}") + .header("Content-Type", "application/json") + .body( + StringBody( + s"""{ + | "updates": [{ + | "action": "set-properties", + | "updates": #{newProperty} + | }] + |}""".stripMargin + ) + ) + .saveHttpStatusCode() + .check(status.is(200).saveAs("lastStatus")) + ) +} + +/** + * This object provides JSON serialization for the view field schema so that it can be used in + * Gatling response checks. + */ +object ViewField { + implicit val format: Format[ViewField] = Json.format[ViewField] + + def fromList(json: String): Seq[ViewField] = Json.parse(json).as[Seq[ViewField]] +} + +/** + * A case class representing a view field schema. + * @param id Field identifier + * @param name Field name + * @param `type` Field type + * @param required Field requirement + */ +case class ViewField(id: Int, name: String, `type`: String, required: Boolean) diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ConnectionParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ConnectionParameters.scala new file mode 100644 index 0000000000..2a097b0922 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/ConnectionParameters.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +/** + * Case class to hold the connection parameters for the benchmark. + * + * @param clientId The client ID for authentication. + * @param clientSecret The client secret for authentication. + * @param baseUrl The base URL of the Polaris service. + */ +case class ConnectionParameters(clientId: String, clientSecret: String, baseUrl: String) + extends Explainable { + override def explanations: List[String] = List( + s"The connection parameters point to the server at $baseUrl with client ID $clientId and client secret $clientSecret." + ) +} + +/** + * Object holding the `connectionParameters` instance, populated from environment variables. The + * `CLIENT_ID` and `CLIENT_SECRET` environment variables must be provided, otherwise authentication + * will fail. By default, the `BASE_URL` is "http://localhost:8181". + */ +object ConnectionParameters { + val connectionParameters: ConnectionParameters = ConnectionParameters( + sys.env("CLIENT_ID"), + sys.env("CLIENT_SECRET"), + sys.env.getOrElse("BASE_URL", "http://localhost:8181") + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/DatasetParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/DatasetParameters.scala new file mode 100644 index 0000000000..b2ab84267c --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/DatasetParameters.scala @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +import org.apache.polaris.benchmarks.NAryTreeBuilder + +/** + * Case class to hold the dataset parameters for the benchmark. + * + * @param numCatalogs The number of catalogs to create. + * @param defaultBaseLocation The default base location for the datasets. + * @param nsWidth The width of the namespace n-ary tree. + * @param nsDepth The depth of the namespace n-ary tree. + * @param numNamespaceProperties The number of namespace properties to create. + * @param numNamespacePropertyUpdates The number of namespace property updates to perform. + * @param numTablesPerNs The number of tables per namespace to create. + * @param numTablesMax The maximum number of tables to create. If set to -1, all tables are created. + * @param numColumns The number of columns per table to create. + * @param numTableProperties The number of table properties to create. + * @param numTablePropertyUpdates The number of table property updates to perform. + * @param numViewsPerNs The number of views per namespace to create. + * @param numViewsMax The maximum number of views to create. If set to -1, all views are created. + * @param numViewPropertyUpdates The number of view property updates to perform. + */ +case class DatasetParameters( + numCatalogs: Int, + defaultBaseLocation: String, + nsWidth: Int, + nsDepth: Int, + numNamespaceProperties: Int, + numNamespacePropertyUpdates: Int, + numTablesPerNs: Int, + numTablesMax: Int, + numColumns: Int, + numTableProperties: Int, + numTablePropertyUpdates: Int, + numViewsPerNs: Int, + numViewsMax: Int, + numViewPropertyUpdates: Int +) extends Explainable { + val nAryTree: NAryTreeBuilder = NAryTreeBuilder(nsWidth, nsDepth) + val numTables: Int = if (numTablesMax <= 0) { + nAryTree.numberOfLastLevelElements * numTablesPerNs + } else { + numTablesMax + } + + val numViews: Int = if (numViewsMax <= 0) { + nAryTree.numberOfLastLevelElements * numViewsPerNs + } else { + numViewsMax + } + + override def explanations: List[String] = List( + s"The dataset parameters describe $numCatalogs catalogs with a default base location of $defaultBaseLocation.", + s"In the first catalog, there is a complete n-ary tree of namespaces with width $nsWidth and depth $nsDepth, for a total of ${nAryTree.numberOfNodes} namespaces.", + s"Each namespace has $numNamespaceProperties properties and $numNamespacePropertyUpdates property updates.", + s"Each of the ${nAryTree.numberOfLastLevelElements} leaf namespaces has $numTablesPerNs tables, for a total of $numTables tables.", + s"Each of the ${nAryTree.numberOfLastLevelElements} leaf namespaces has $numViewsPerNs views, for a total of $numViews views.", + s"Each table has $numColumns columns and $numTableProperties properties.", + s"There are $numTablePropertyUpdates table property updates for each table, for a total of ${numTablePropertyUpdates * numTables} updates.", + s"There are $numViewPropertyUpdates view property updates for each view, for a total of ${numViewPropertyUpdates * numViews} updates." + ) +} + +/** + * Object holding the `datasetParameters` instance, populated from environment variables. Multiple + * catalogs can be created, but namespaces and other entities will only be created in the first + * catalog. Namespaces will be created in a complete n-ary tree structure. By default, we create one + * catalog and a complete n-ary tree of namespaces with width 2 and depth 4. Each namespace has 10 + * properties. Leaf namespaces have 5 tables with 10 columns and 10 properties each. + */ +object DatasetParameters { + val datasetParameters: DatasetParameters = DatasetParameters( + sys.env.getOrElse("NUM_CATALOGS", "1").toInt, + sys.env.getOrElse("DEFAULT_BASE_LOCATION", "file:///tmp/polaris"), + sys.env.getOrElse("NAMESPACE_WIDTH", "2").toInt, + sys.env.getOrElse("NAMESPACE_DEPTH", "4").toInt, + sys.env.getOrElse("NUM_NAMESPACE_PROPERTIES", "10").toInt, + sys.env.getOrElse("NUM_NAMESPACE_PROPERTY_UPDATES", "5").toInt, + sys.env.getOrElse("NUM_TABLES_PER_NAMESPACE", "5").toInt, + sys.env.getOrElse("NUM_TABLES_MAX", "-1").toInt, + sys.env.getOrElse("NUM_COLUMNS", "10").toInt, + sys.env.getOrElse("NUM_TABLE_PROPERTIES", "10").toInt, + sys.env.getOrElse("NUM_TABLE_PROPERTY_UPDATES", "10").toInt, + sys.env.getOrElse("NUM_VIEWS_PER_NAMESPACE", "3").toInt, + sys.env.getOrElse("NUM_VIEWS_MAX", "-1").toInt, + sys.env.getOrElse("NUM_VIEW_PROPERTY_UPDATES", "1").toInt + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/Explainable.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/Explainable.scala new file mode 100644 index 0000000000..49d63fe3cb --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/Explainable.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +/** + * Trait that provides a method to describe the parameters of a class. + */ +trait Explainable { + def explanations: List[String] +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala new file mode 100644 index 0000000000..78407ce0b9 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/parameters/WorkloadParameters.scala @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.parameters + +case class WorkloadParameters(readWriteRatio: Double) extends Explainable { + override def explanations: List[String] = List( + s"When applicable, the read/write ratio is ${gatlingReadRatio.round}% reads and ${gatlingWriteRatio.round}% writes." + ) + + val gatlingReadRatio: Double = readWriteRatio * 100 + val gatlingWriteRatio: Double = (1 - readWriteRatio) * 100 +} + +object WorkloadParameters { + val workloadParameters: WorkloadParameters = WorkloadParameters( + sys.env("READ_WRITE_RATIO").toDouble + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala new file mode 100644 index 0000000000..14d71782fb --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDataset.scala @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ScenarioBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.ConnectionParameters.connectionParameters +import org.apache.polaris.benchmarks.parameters.DatasetParameters.datasetParameters +import org.apache.polaris.benchmarks.parameters.{ConnectionParameters, DatasetParameters} +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} +import scala.concurrent.duration._ + +/** + * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended + * to be used against an empty Polaris instance. + */ +class CreateTreeDataset extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + val cp: ConnectionParameters = connectionParameters + val dp: DatasetParameters = datasetParameters + cp.explanations.foreach(logger.info) + dp.explanations.foreach(logger.info) + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val numNamespaces: Int = dp.nAryTree.numberOfNodes + private val accessToken: AtomicReference[String] = new AtomicReference() + + private val authenticationActions = AuthenticationActions(cp, accessToken, 5, Set(500)) + private val catalogActions = CatalogActions(dp, accessToken, 0, Set()) + private val namespaceActions = NamespaceActions(dp, accessToken, 5, Set(500)) + private val tableActions = TableActions(dp, accessToken, 0, Set()) + private val viewActions = ViewActions(dp, accessToken, 0, Set()) + + private val createdCatalogs = new AtomicInteger() + private val createdNamespaces = new AtomicInteger() + private val createdTables = new AtomicInteger() + private val createdViews = new AtomicInteger() + + // -------------------------------------------------------------------------------- + // Workload: Authenticate and store the access token for later use + // -------------------------------------------------------------------------------- + val authenticate: ScenarioBuilder = scenario("Authenticate using the OAuth2 REST API endpoint") + .feed(authenticationActions.feeder()) + .exec(authenticationActions.authenticateAndSaveAccessToken) + + // -------------------------------------------------------------------------------- + // Workload: Create catalogs + // -------------------------------------------------------------------------------- + val createCatalogs: ScenarioBuilder = + scenario("Create catalogs using the Polaris Management REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdCatalogs.getAndIncrement() < dp.numCatalogs && session.contains("accessToken") + )( + feed(catalogActions.feeder()) + .exec(catalogActions.createCatalog) + ) + + // -------------------------------------------------------------------------------- + // Workload: Create namespaces + // -------------------------------------------------------------------------------- + val createNamespaces: ScenarioBuilder = scenario("Create namespaces using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdNamespaces.getAndIncrement() < numNamespaces && session.contains("accessToken") + )( + feed(namespaceActions.namespaceCreationFeeder()) + .exec(namespaceActions.createNamespace) + ) + + // -------------------------------------------------------------------------------- + // Workload: Create tables + // -------------------------------------------------------------------------------- + val createTables: ScenarioBuilder = scenario("Create tables using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdTables.getAndIncrement() < dp.numTables && session.contains("accessToken") + )( + feed(tableActions.tableCreationFeeder()) + .exec(tableActions.createTable) + ) + + // -------------------------------------------------------------------------------- + // Workload: Create views + // -------------------------------------------------------------------------------- + val createViews: ScenarioBuilder = scenario("Create views using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + createdViews.getAndIncrement() < dp.numViews && session.contains("accessToken") + )( + feed(viewActions.viewCreationFeeder()) + .exec(viewActions.createView) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala new file mode 100644 index 0000000000..0f9dfd5e11 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetConcurrent.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.slf4j.LoggerFactory + +import scala.concurrent.duration._ + +/** + * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended + * to be used against an empty Polaris instance. It is a concurrent version of CreateTreeDataset, + * i.e. up to 50 requests are sent simultaneously. + */ +class CreateTreeDatasetConcurrent extends CreateTreeDataset { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(createCatalogs.inject(atOnceUsers(50))) + .andThen( + createNamespaces.inject( + constantUsersPerSec(1).during(1.seconds), + constantUsersPerSec(dp.nsWidth - 1).during(dp.nsDepth.seconds) + ) + ) + .andThen(createTables.inject(atOnceUsers(50))) + .andThen(createViews.inject(atOnceUsers(50))) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala new file mode 100644 index 0000000000..5818ee3c5f --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/CreateTreeDatasetSequential.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.ConnectionParameters.connectionParameters +import org.apache.polaris.benchmarks.parameters.DatasetParameters.datasetParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} +import scala.concurrent.duration._ + +/** + * This simulation is a 100% write workload that creates a tree dataset in Polaris. It is intended + * to be used against an empty Polaris instance. It is a sequential version of CreateTreeDataset, + * i.e. only one request is sent at a time. + */ +class CreateTreeDatasetSequential extends CreateTreeDataset { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(createCatalogs.inject(atOnceUsers(1))) + .andThen(createNamespaces.inject(atOnceUsers(1))) + .andThen(createTables.inject(atOnceUsers(1))) + .andThen(createViews.inject(atOnceUsers(1))) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala new file mode 100644 index 0000000000..4e364a01b1 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadTreeDataset.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.actions.{ + AuthenticationActions, + CatalogActions, + NamespaceActions, + TableActions, + ViewActions +} +import org.apache.polaris.benchmarks.parameters.ConnectionParameters.connectionParameters +import org.apache.polaris.benchmarks.parameters.DatasetParameters.datasetParameters +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.{AtomicInteger, AtomicReference} + +/** + * This simulation is a 100% read workload that fetches a tree dataset in Polaris. It is intended to + * be used against a Polaris instance with a pre-existing tree dataset. It has no side effect on the + * dataset and therefore can be executed multiple times without any issue. + */ +class ReadTreeDataset extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + private val cp = connectionParameters + private val dp = datasetParameters + cp.explanations.foreach(logger.info) + dp.explanations.foreach(logger.info) + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val numNamespaces: Int = dp.nAryTree.numberOfNodes + private val accessToken: AtomicReference[String] = new AtomicReference() + + private val authenticationActions = AuthenticationActions(cp, accessToken) + private val catalogActions = CatalogActions(dp, accessToken) + private val namespaceActions = NamespaceActions(dp, accessToken) + private val tableActions = TableActions(dp, accessToken) + private val viewActions = ViewActions(dp, accessToken) + + private val verifiedCatalogs = new AtomicInteger() + private val verifiedNamespaces = new AtomicInteger() + private val verifiedTables = new AtomicInteger() + private val verifiedViews = new AtomicInteger() + + // -------------------------------------------------------------------------------- + // Workload: Authenticate and store the access token for later use + // -------------------------------------------------------------------------------- + private val authenticate = scenario("Authenticate using the OAuth2 REST API endpoint") + .feed(authenticationActions.feeder()) + .tryMax(5) { + exec(authenticationActions.authenticateAndSaveAccessToken) + } + + // -------------------------------------------------------------------------------- + // Workload: Verify each catalog + // -------------------------------------------------------------------------------- + private val verifyCatalogs = scenario("Verify catalogs using the Polaris Management REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedCatalogs.getAndIncrement() < dp.numCatalogs && session.contains("accessToken") + )( + feed(catalogActions.feeder()) + .exec(catalogActions.fetchCatalog) + ) + + // -------------------------------------------------------------------------------- + // Workload: Verify namespaces + // -------------------------------------------------------------------------------- + private val verifyNamespaces = scenario("Verify namespaces using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedNamespaces.getAndIncrement() < numNamespaces && session.contains("accessToken") + )( + feed(namespaceActions.namespaceFetchFeeder()) + .exec(namespaceActions.fetchAllChildrenNamespaces) + .exec(namespaceActions.checkNamespaceExists) + .exec(namespaceActions.fetchNamespace) + ) + + // -------------------------------------------------------------------------------- + // Workload: Verify tables + // -------------------------------------------------------------------------------- + private val verifyTables = scenario("Verify tables using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedTables.getAndIncrement() < dp.numTables && session.contains("accessToken") + )( + feed(tableActions.tableFetchFeeder()) + .exec(tableActions.fetchAllTables) + .exec(tableActions.checkTableExists) + .exec(tableActions.fetchTable) + ) + + // -------------------------------------------------------------------------------- + // Workload: Verify views + // -------------------------------------------------------------------------------- + private val verifyViews = scenario("Verify views using the Iceberg REST API") + .exec(authenticationActions.restoreAccessTokenInSession) + .asLongAs(session => + verifiedViews.getAndIncrement() < dp.numViews && session.contains("accessToken") + )( + feed(viewActions.viewFetchFeeder()) + .exec(viewActions.fetchAllViews) + .exec(viewActions.checkViewExists) + .exec(viewActions.fetchView) + ) + + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(verifyCatalogs.inject(atOnceUsers(1))) + .andThen(verifyNamespaces.inject(atOnceUsers(dp.nsDepth))) + .andThen(verifyTables.inject(atOnceUsers(50))) + .andThen(verifyViews.inject(atOnceUsers(50))) + ) + .protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala new file mode 100644 index 0000000000..13bbb0d94e --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDataset.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.core.structure.ScenarioBuilder +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.{ + ConnectionParameters, + DatasetParameters, + WorkloadParameters +} +import org.apache.polaris.benchmarks.parameters.ConnectionParameters.connectionParameters +import org.apache.polaris.benchmarks.parameters.DatasetParameters.datasetParameters +import org.apache.polaris.benchmarks.parameters.WorkloadParameters.workloadParameters +import org.apache.polaris.benchmarks.util.CircularIterator +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference +import scala.concurrent.duration.DurationInt + +class ReadUpdateTreeDataset extends Simulation { + private val logger = LoggerFactory.getLogger(getClass) + + // -------------------------------------------------------------------------------- + // Load parameters + // -------------------------------------------------------------------------------- + val cp: ConnectionParameters = connectionParameters + val dp: DatasetParameters = datasetParameters + val wp: WorkloadParameters = workloadParameters + cp.explanations.foreach(logger.info) + dp.explanations.foreach(logger.info) + wp.explanations.foreach(logger.info) + + // -------------------------------------------------------------------------------- + // Helper values + // -------------------------------------------------------------------------------- + private val numNamespaces: Int = dp.nAryTree.numberOfNodes + private val accessToken: AtomicReference[String] = new AtomicReference() + + private val authActions = AuthenticationActions(cp, accessToken) + private val catActions = CatalogActions(dp, accessToken) + private val nsActions = NamespaceActions(dp, accessToken) + private val tblActions = TableActions(dp, accessToken) + private val viewActions = ViewActions(dp, accessToken) + + // -------------------------------------------------------------------------------- + // Workload: Authenticate and store the access token for later use + // -------------------------------------------------------------------------------- + val authenticate: ScenarioBuilder = scenario("Authenticate using the OAuth2 REST API endpoint") + .feed(authActions.feeder()) + .tryMax(5) { + exec(authActions.authenticateAndSaveAccessToken) + } + + private val nsListFeeder = new CircularIterator(nsActions.namespaceIdentityFeeder) + private val nsExistsFeeder = new CircularIterator(nsActions.namespaceIdentityFeeder) + private val nsFetchFeeder = new CircularIterator(nsActions.namespaceFetchFeeder) + private val nsUpdateFeeder = new CircularIterator(nsActions.namespacePropertiesUpdateFeeder) + + private val tblListFeeder = new CircularIterator(tblActions.tableIdentityFeeder) + private val tblExistsFeeder = new CircularIterator(tblActions.tableIdentityFeeder) + private val tblFetchFeeder = new CircularIterator(tblActions.tableFetchFeeder) + private val tblUpdateFeeder = new CircularIterator(tblActions.propertyUpdateFeeder) + + private val viewListFeeder = new CircularIterator(viewActions.viewIdentityFeeder) + private val viewExistsFeeder = new CircularIterator(viewActions.viewIdentityFeeder) + private val viewFetchFeeder = new CircularIterator(viewActions.viewFetchFeeder) + private val viewUpdateFeeder = new CircularIterator(viewActions.propertyUpdateFeeder) + + // -------------------------------------------------------------------------------- + // Workload: Randomly read and write entities + // -------------------------------------------------------------------------------- + val readWriteScenario: ScenarioBuilder = + scenario("Read and write entities using the Iceberg REST API") + .exec(authActions.restoreAccessTokenInSession) + .randomSwitch( + wp.gatlingReadRatio -> group("Read")( + uniformRandomSwitch( + exec(feed(nsListFeeder).exec(nsActions.fetchAllChildrenNamespaces)), + exec(feed(nsExistsFeeder).exec(nsActions.checkNamespaceExists)), + exec(feed(nsFetchFeeder).exec(nsActions.fetchNamespace)), + exec(feed(tblListFeeder).exec(tblActions.fetchAllTables)), + exec(feed(tblExistsFeeder).exec(tblActions.checkTableExists)), + exec(feed(tblFetchFeeder).exec(tblActions.fetchTable)), + exec(feed(viewListFeeder).exec(viewActions.fetchAllViews)), + exec(feed(viewExistsFeeder).exec(viewActions.checkViewExists)), + exec(feed(viewFetchFeeder).exec(viewActions.fetchView)) + ) + ), + wp.gatlingWriteRatio -> group("Write")( + uniformRandomSwitch( + exec(feed(nsUpdateFeeder).exec(nsActions.updateNamespaceProperties)), + exec(feed(tblUpdateFeeder).exec(tblActions.updateTable)), + exec(feed(viewUpdateFeeder).exec(viewActions.updateView)) + ) + ) + ) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala new file mode 100644 index 0000000000..35e219ab2b --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetConcurrent.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ + +import scala.concurrent.duration.DurationInt + +class ReadUpdateTreeDatasetConcurrent extends ReadUpdateTreeDataset { + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(readWriteScenario.inject(constantUsersPerSec(100).during(5.minutes).randomized)) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala new file mode 100644 index 0000000000..a88887140a --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/simulations/ReadUpdateTreeDatasetSequential.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.simulations + +import io.gatling.core.Predef._ +import io.gatling.http.Predef._ +import org.apache.polaris.benchmarks.actions._ +import org.apache.polaris.benchmarks.parameters.ConnectionParameters.connectionParameters +import org.apache.polaris.benchmarks.parameters.DatasetParameters.datasetParameters +import org.apache.polaris.benchmarks.util.CircularIterator +import org.slf4j.LoggerFactory + +import java.util.concurrent.atomic.AtomicReference +import scala.concurrent.duration.DurationInt + +class ReadUpdateTreeDatasetSequential extends ReadUpdateTreeDataset { + // -------------------------------------------------------------------------------- + // Build up the HTTP protocol configuration and set up the simulation + // -------------------------------------------------------------------------------- + private val httpProtocol = http + .baseUrl(cp.baseUrl) + .acceptHeader("application/json") + .contentTypeHeader("application/json") + + setUp( + authenticate + .inject(atOnceUsers(1)) + .andThen(readWriteScenario.inject(constantUsersPerSec(1).during(5.minutes))) + ).protocols(httpProtocol) +} diff --git a/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/util/CircularIterator.scala b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/util/CircularIterator.scala new file mode 100644 index 0000000000..8ee3514c29 --- /dev/null +++ b/benchmarks/src/gatling/scala/org/apache/polaris/benchmarks/util/CircularIterator.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.polaris.benchmarks.util + +import scala.util.Random + +class CircularIterator[T](builder: () => Iterator[T]) extends Iterator[T] { + private var currentIterator: Iterator[T] = builder() + + override def hasNext: Boolean = true + + override def next(): T = synchronized { + if (!currentIterator.hasNext) { + currentIterator = builder() + } + currentIterator.next() + } +} + +class BufferedRandomIterator[T](underlying: CircularIterator[T], bufferSize: Int) + extends Iterator[T] { + private var buffer: Iterator[T] = populateAndShuffle() + + private def populateAndShuffle(): Iterator[T] = + Random.shuffle((1 to bufferSize).map(_ => underlying.next()).toList).iterator + + override def hasNext: Boolean = true + + override def next(): T = synchronized { + if (!buffer.hasNext) { + buffer = populateAndShuffle() + } + buffer.next() + } +} diff --git a/getting-started/assets/polaris/create-catalog.sh b/getting-started/assets/polaris/create-catalog.sh index f069c66376..e133034d61 100755 --- a/getting-started/assets/polaris/create-catalog.sh +++ b/getting-started/assets/polaris/create-catalog.sh @@ -50,7 +50,7 @@ curl -s -H "Authorization: Bearer ${token}" \ "storageConfigInfo": { "storageType": "FILE", "allowedLocations": [ - "file:///tmp" + "file:///tmp/polaris/" ] } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 4e569e27dd..9eed984efb 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -100,3 +100,5 @@ jandex = { id = "org.kordamp.gradle.jandex", version = "2.1.0" } openapi-generator = { id = "org.openapi.generator", version = "7.12.0" } quarkus = { id = "io.quarkus", version.ref = "quarkus" } rat = { id = "org.nosphere.apache.rat", version = "0.8.1" } +gatling = { id = "io.gatling.gradle", version = "3.13.4.1" } + diff --git a/gradle/projects.main.properties b/gradle/projects.main.properties index 88fbcd0fa4..6183842ddb 100644 --- a/gradle/projects.main.properties +++ b/gradle/projects.main.properties @@ -36,6 +36,7 @@ aggregated-license-report=aggregated-license-report polaris-immutables=tools/immutables polaris-container-spec-helper=tools/container-spec-helper polaris-version=tools/version +polaris-benchmarks=benchmarks polaris-config-docs-annotations=tools/config-docs/annotations polaris-config-docs-generator=tools/config-docs/generator