Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
e75dc30
[HUDI-1075] Implement simple clustering strategies to create Clusteri…
Nov 8, 2020
5d349a2
[HUDI-1471] Make QuickStartUtils generate deletes according to specif…
wangxianghu Dec 22, 2020
38fb03d
[HUDI-1485] Fix Deletes issued without any prior commits exception (#…
wangxianghu Dec 22, 2020
a39951f
[HUDI-1488] Fix Test Case Failure in TestHBaseIndex (#2365)
Dec 23, 2020
93e6b54
[HUDI-1489] Fix null pointer exception when reading updated written b…
zhedoubushishi Dec 23, 2020
20b9d64
[HUDI-1451] Support bulk insert v2 with Spark 3.0.0 (#2328)
zhedoubushishi Dec 25, 2020
cd8f145
[HUDI-1487] fix unit test testCopyOnWriteStorage random failed (#2364)
lw309637554 Dec 25, 2020
a3772a0
[HUDI-1490] Incremental Query should work even when there are partit…
bvaradar Dec 26, 2020
3424eaf
[HUDI-1331] Adding support for validating entire dataset and long run…
nsivabalan Dec 26, 2020
4ef4e8d
[HUDI-1481] add structured streaming and delta streamer clustering …
lw309637554 Dec 28, 2020
17e66f8
[HUDI-1354] Block updates and replace on file groups in clustering (#…
lw309637554 Dec 28, 2020
b40e090
[HUDI-1350] Support Partition level delete API in HUDI (#2254)
lw309637554 Dec 28, 2020
79292a5
[HUDI-1495] Upgrade Flink version to 1.12.0 (#2384)
danny0405 Dec 29, 2020
dc23e58
[MINOR] Remove the duplicate code in AbstractHoodieWriteClient.startC…
danny0405 Dec 29, 2020
191eb9f
[HUDI-1398] Align insert file size for reducing IO (#2256)
yui2010 Dec 29, 2020
4555468
[HUDI-1484] Escape the partition value in HiveSyncTool (#2363)
Dec 29, 2020
6bacf12
[HUDI-1474] Add additional unit tests to TestHBaseIndex (#2349)
nbalajee Dec 29, 2020
9abc305
[HUDI-1441] - HoodieAvroUtils - rewrite() is not handling evolution o…
nbalajee Dec 8, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
172 changes: 53 additions & 119 deletions docker/demo/config/test-suite/complex-dag-cow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,122 +13,56 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
first_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_rollback:
config:
deps: third_insert
type: RollbackNode
first_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_rollback
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_upsert
first_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 11300
type: HiveQueryNode
deps: first_hive_sync
second_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_hive_query
second_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 11600
type: HiveQueryNode
deps: second_upsert
fourth_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 1000
deps: second_hive_query
type: InsertNode
third_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 12600
type: HiveQueryNode
deps: fourth_insert
first_delete:
config:
record_size: 70000
num_partitions_delete: 1
num_records_delete: 200
deps: third_hive_query
type: DeleteNode
fourth_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
fourth_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 12400
type: HiveQueryNode
deps: fourth_hive_sync
dag_name: cow-long-running-example.yaml
dag_rounds: 2
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 100
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 100
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 100
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_validate:
config:
type: ValidateDatasetNode
deps: third_insert
first_upsert:
config:
record_size: 100
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: first_validate
first_delete:
config:
num_partitions_delete: 1
num_records_delete: 2000
type: DeleteNode
deps: first_upsert
second_validate:
config:
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
204 changes: 104 additions & 100 deletions docker/demo/config/test-suite/complex-dag-mor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,103 +13,107 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
first_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 100
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 100
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 70000
num_insert_partitions: 1
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_rollback:
config:
deps: third_insert
type: RollbackNode
first_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_rollback
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_upsert
first_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveQueryNode
deps: first_hive_sync
second_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_hive_query
second_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1100
type: HiveQueryNode
deps: second_upsert
first_schedule_compact:
config:
type: ScheduleCompactNode
deps: second_hive_query
third_upsert:
config:
record_size: 70000
num_insert_partitions: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_upsert_partitions: 10
type: UpsertNode
deps: first_schedule_compact
first_compact:
config:
type: CompactNode
deps: first_schedule_compact
third_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1400
type: HiveQueryNode
deps: first_compact
dag_name: complex-dag-mor.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 10
dag_content:
first_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 5
num_records_insert: 100
type: InsertNode
deps: none
second_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 5
num_records_insert: 100
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 70000
num_partitions_insert: 1
repeat_count: 2
num_records_insert: 300
deps: second_insert
type: InsertNode
first_rollback:
config:
deps: third_insert
type: RollbackNode
first_upsert:
config:
record_size: 70000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 10
type: UpsertNode
deps: first_rollback
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_upsert
first_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveQueryNode
deps: first_hive_sync
second_upsert:
config:
record_size: 70000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 10
type: UpsertNode
deps: first_hive_query
second_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1100
type: HiveQueryNode
deps: second_upsert
first_schedule_compact:
config:
type: ScheduleCompactNode
deps: second_hive_query
third_upsert:
config:
record_size: 70000
num_partitions_insert: 1
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 10
type: UpsertNode
deps: first_schedule_compact
first_compact:
config:
type: CompactNode
deps: first_schedule_compact
third_hive_query:
config:
queue_name: "adhoc"
engine: "mr"
hive_queries:
query1: "select count(*) from testdb.table1 group by `_row_key` having count(*) > 1"
result1: 0
query2: "select count(*) from testdb.table1"
result2: 1400
type: HiveQueryNode
deps: first_compact
Loading