Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions docker/demo/config/test-suite/complex-dag-cow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,41 +13,48 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-long-running-example.yaml
dag_rounds: 2
dag_name: complex-dag-cow.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: third_insert
first_validate:
config:
validate_hive: true
type: ValidateDatasetNode
deps: third_insert
deps: first_hive_sync
first_upsert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
Expand All @@ -61,8 +68,15 @@ dag_content:
num_records_delete: 2000
type: DeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_hive: true
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
deps: second_hive_sync
91 changes: 28 additions & 63 deletions docker/demo/config/test-suite/complex-dag-mor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,105 +15,70 @@
# limitations under the License.
dag_name: complex-dag-mor.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 10
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 70000
record_size: 1000
num_partitions_insert: 1
repeat_count: 5
repeat_count: 1
num_records_insert: 100
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
record_size: 1000
num_partitions_insert: 1
repeat_count: 5
num_records_insert: 100
repeat_count: 1
num_records_insert: 1000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 70000
record_size: 1000
num_partitions_insert: 1
repeat_count: 2
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_rollback:
config:
deps: third_insert
type: RollbackNode
first_upsert:
config:
record_size: 70000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 10
type: UpsertNode
deps: first_rollback
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_upsert
first_hive_query:
deps: third_insert
first_validate:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveQueryNode
type: ValidateDatasetNode
deps: first_hive_sync
second_upsert:
first_upsert:
config:
record_size: 70000
record_size: 1000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 10
num_partitions_upsert: 1
type: UpsertNode
deps: first_hive_query
second_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1100
type: HiveQueryNode
deps: second_upsert
deps: first_validate
first_schedule_compact:
config:
type: ScheduleCompactNode
deps: second_hive_query
third_upsert:
config:
record_size: 70000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 10
type: UpsertNode
deps: first_schedule_compact
first_compact:
deps: first_upsert
first_delete:
config:
type: CompactNode
num_partitions_delete: 1
num_records_delete: 500
type: DeleteNode
deps: first_schedule_compact
third_hive_query:
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1400
type: HiveQueryNode
deps: first_compact
type: HiveSyncNode
deps: first_delete
second_validate:
config:
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
76 changes: 76 additions & 0 deletions docker/demo/config/test-suite/cow-clustering-example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-clustering-example.yaml
dag_rounds: 3
dag_intermittent_delay_mins: 0
dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_delete:
config:
num_partitions_delete: 1
num_records_delete: 9000
type: DeleteNode
deps: third_insert
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
first_validate:
config:
validate_hive: true
type: ValidateDatasetNode
deps: first_hive_sync
first_cluster:
config:
execute_itr_count: 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nsivabalan Is this supposed to mean run this twice ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nope. execute this node only during iteration count 2.

type: ClusteringNode
deps: first_validate
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_cluster
second_validate:
config:
validate_hive: true
type: ValidateDatasetNode
deps: second_hive_sync
39 changes: 30 additions & 9 deletions docker/demo/config/test-suite/cow-long-running-example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: cow-long-running-example.yaml
dag_rounds: 20
dag_intermittent_delay_mins: 10
dag_rounds: 50
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: third_insert
first_validate:
config:
validate_hive: true
type: ValidateDatasetNode
deps: third_insert
deps: first_hive_sync
first_upsert:
config:
record_size: 100
record_size: 1000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
Expand All @@ -58,11 +65,25 @@ dag_content:
first_delete:
config:
num_partitions_delete: 1
num_records_delete: 2000
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_hive: true
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
deps: second_hive_sync
last_validate:
config:
execute_itr_count: 50
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
Loading