Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 1000
type: InsertNode
Expand All @@ -36,19 +36,19 @@ dag_content:
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
num_partitions_upsert: 50
type: UpsertNode
deps: third_insert
first_delete:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 1000
type: InsertNode
Expand All @@ -36,19 +36,19 @@ dag_content:
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
num_partitions_upsert: 50
type: UpsertNode
deps: third_insert
first_delete:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 1000
type: InsertNode
Expand All @@ -39,19 +39,19 @@ dag_content:
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
num_partitions_upsert: 50
type: UpsertNode
deps: third_insert
first_delete:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 1000
type: InsertNode
Expand All @@ -39,19 +39,19 @@ dag_content:
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_partitions_insert: 50
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
num_partitions_upsert: 50
type: UpsertNode
deps: third_insert
first_delete:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,42 +13,51 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-spark-simple.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 6
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 100
type: SparkInsertNode
num_records_insert: 1000
type: InsertNode
deps: none
first_validate:
second_insert:
config:
validate_hive: false
type: ValidateDatasetNode
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 1
num_records_insert: 50
num_records_insert: 1000
repeat_count: 1
num_records_upsert: 100
num_records_upsert: 8000
num_partitions_upsert: 1
type: SparkUpsertNode
deps: first_validate
type: UpsertNode
deps: second_insert
first_delete:
config:
num_partitions_delete: 1
num_records_delete: 30
type: SparkDeleteNode
num_records_delete: 1000
type: DeleteNode
deps: first_upsert
second_validate:
config:
validate_once_every_itr : 3
validate_hive: false
delete_input_data: false
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
deps: first_delete
last_validate:
config:
execute_itr_count: 6
type: ValidateAsyncOperations
deps: second_validate
2 changes: 1 addition & 1 deletion docker/demo/config/test-suite/insert-overwrite-table.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ dag_content:
first_insert_overwrite_table:
config:
record_size: 1000
repeat_count: 10
repeat_count: 1
num_records_insert: 10
type: SparkInsertOverwriteTableNode
deps: second_upsert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,42 +14,42 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-spark-deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_rounds: 6
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
config:
record_size: 200
num_partitions_insert: 50
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 10000
type: SparkInsertNode
deps: none
first_upsert:
config:
record_size: 200
num_partitions_insert: 50
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 3000
num_partitions_upsert: 50
num_partitions_upsert: 1
type: SparkUpsertNode
deps: first_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 4000
num_partitions_delete: 1
num_records_delete: 1000
type: SparkDeleteNode
deps: first_upsert
second_validate:
config:
validate_once_every_itr : 5
validate_once_every_itr : 3
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
last_validate:
config:
execute_itr_count: 30
execute_itr_count: 6
type: ValidateAsyncOperations
deps: second_validate
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

hoodie.insert.shuffle.parallelism=25
hoodie.upsert.shuffle.parallelism=25
hoodie.bulkinsert.shuffle.parallelism=25
hoodie.delete.shuffle.parallelism=25

hoodie.cleaner.commits.retained=8
hoodie.keep.min.commits=12
hoodie.keep.max.commits=14

hoodie.compact.inline=true

hoodie.deltastreamer.source.test.num_partitions=100
hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
hoodie.deltastreamer.source.test.max_unique_records=100000000
hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector

hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
hoodie.datasource.hive_sync.skip_ro_suffix=true

hoodie.datasource.write.recordkey.field=_row_key
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
hoodie.datasource.write.partitionpath.field=timestamp

hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd

hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
hoodie.datasource.hive_sync.database=testdb
hoodie.datasource.hive_sync.table=table1
hoodie.datasource.hive_sync.assume_date_partitioning=false
hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor

Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
# under the License.
#

hoodie.insert.shuffle.parallelism=100
hoodie.upsert.shuffle.parallelism=100
hoodie.bulkinsert.shuffle.parallelism=100
hoodie.insert.shuffle.parallelism=25
hoodie.upsert.shuffle.parallelism=25
hoodie.bulkinsert.shuffle.parallelism=25
hoodie.delete.shuffle.parallelism=25

hoodie.cleaner.commits.retained=8
hoodie.keep.min.commits=12
Expand All @@ -29,7 +30,6 @@ hoodie.keep.max.commits=14
hoodie.deltastreamer.source.test.num_partitions=100
hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
hoodie.deltastreamer.source.test.max_unique_records=100000000
hoodie.embed.timeline.server=false
hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector

hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

hoodie.insert.shuffle.parallelism=25
hoodie.upsert.shuffle.parallelism=25
hoodie.bulkinsert.shuffle.parallelism=25
hoodie.delete.shuffle.parallelism=25

hoodie.compact.inline=true

hoodie.cleaner.commits.retained=8
hoodie.keep.min.commits=12
hoodie.keep.max.commits=14

hoodie.deltastreamer.source.test.num_partitions=100
hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
hoodie.deltastreamer.source.test.max_unique_records=100000000
hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector

hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
hoodie.datasource.hive_sync.skip_ro_suffix=true

hoodie.datasource.write.recordkey.field=_row_key
hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
hoodie.datasource.write.partitionpath.field=timestamp

hoodie.clustering.inline=true
hoodie.clustering.inline.max.commits=4
hoodie.clustering.plan.strategy.sort.columns=_hoodie_partition_path,_row_key
hoodie.clustering.plan.strategy.target.file.max.bytes=1073741824
hoodie.clustering.plan.strategy.small.file.limit=629145600
hoodie.clustering.execution.strategy.class=org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy

hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd

hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
hoodie.datasource.hive_sync.database=testdb
hoodie.datasource.hive_sync.table=table1
hoodie.datasource.hive_sync.assume_date_partitioning=false
hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor

Loading