Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1ba8220
[HUDI-3613] Adding/fixing yamls for metadata (#5029)
nsivabalan Mar 14, 2022
465d553
[HUDI-3600] Tweak the default cleaning strategy to be more streaming …
danny0405 Mar 14, 2022
4b75cb6
fix NPE when run schdule using spark-sql if the commits time < hoodie…
peanut-chenzhong Mar 14, 2022
003c6ee
[MINODR] Remove repeated kafka-clients dependencies (#5034)
wangxianghu Mar 14, 2022
22c3ce7
[HUDI-3621] Fixing NullPointerException in DeltaStreamer (#5039)
nsivabalan Mar 14, 2022
30cf393
[HUDI-3623] Removing hive sync node from non hive yamls (#5040)
nsivabalan Mar 14, 2022
d40adfa
[HUDI-3620] Adding spark3.2.0 profile (#5038)
nsivabalan Mar 14, 2022
3b59b76
[HUDI-3547] Introduce MaxwellSourcePostProcessor to extract data from…
wangxianghu Mar 15, 2022
6ed7106
[HUDI-3606] Add `org.objenesis:objenesis` to hudi-timeline-server-bun…
cdmikechen Mar 15, 2022
9bdda2a
[HUDI-3619] Fix HoodieOperation fromValue using wrong constant value …
Mar 15, 2022
5e8ff8d
[HUDI-3514] Rebase Data Skipping flow to rely on MT Column Stats inde…
Mar 15, 2022
d514570
[HUDI-3633] Allow non-string values to be set in TypedProperties (#5045)
codope Mar 15, 2022
55dca96
[HUDI-3589] flink sync hive metadata supports table properties and se…
todd5167 Mar 15, 2022
296a0e6
[HUDI-3588] Remove hudi-common and hudi-hadoop-mr jars in Presto Dock…
yihua Mar 16, 2022
91849c3
[HUDI-3607] Support backend switch in HoodieFlinkStreamer (#5032)
liufangqi Mar 16, 2022
8ca9a54
[Hudi-3376] Add an option to skip under deletion files for HoodieMeta…
zhangyue19921010 Mar 17, 2022
95e6e53
[HUDI-3404] Automatically adjust write configs based on metadata tabl…
yihua Mar 17, 2022
5ba2d9a
[HUDI-3494] Consider triggering condition of MOR compaction during ar…
yihua Mar 17, 2022
bf191f8
[HUDI-3645] Fix NPE caused by multiple threads accessing non-thread-s…
fengjian428 Mar 17, 2022
7446ff9
[HUDI-2439] Replace RDD with HoodieData in HoodieSparkTable and commi…
xushiyan Mar 17, 2022
9ece775
[MINOR] HoodieFileScanRDD could print null path (#5056)
Mar 17, 2022
6fe4d6e
[HUDI-3598] Row Data to Hoodie Record Operator parallelism needs to a…
JerryYue-M Mar 18, 2022
2551c26
[HUDI-3656] Adding medium sized dataset for clustering and minor fixe…
nsivabalan Mar 18, 2022
316e38c
[HUDI-3659] Reducing the validation frequency with integ tests (#5067)
nsivabalan Mar 18, 2022
099c2c0
[HUDI-3457] Refactored Spark DataSource Relations to avoid code dupli…
Mar 19, 2022
1b6e201
[HUDI-3663] Fixing Column Stats index to properly handle first Data T…
Mar 20, 2022
15d1c18
[MINOR] Remove flaky assert in TestInLineFileSystem (#5069)
yihua Mar 20, 2022
799c78e
[HUDI-3665] Support flink multiple versions (#5072)
danny0405 Mar 21, 2022
a118d56
[MINOR] Fixing sparkUpdateNode for record generation (#5079)
nsivabalan Mar 21, 2022
26e5d2e
[HUDI-3559] Flink bucket index with COW table throws NoSuchElementExc…
wxplovecc Mar 11, 2022
ca0931d
[HUDI-1436]: Provide an option to trigger clean every nth commit (#4385)
pratyakshsharma Mar 22, 2022
9b6e138
[HUDI-3640] Set SimpleKeyGenerator as default in 2to3 table upgrade f…
yihua Mar 22, 2022
5f570ea
[HUDI-2883] Refactor hive sync tool / config to use reflection and st…
rmahindra123 Mar 22, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
8 changes: 4 additions & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Punit-tests -pl hudi-common,hudi-flink,hudi-client/hudi-spark-client
options: -Punit-tests -pl hudi-common,hudi-flink-datasource/hudi-flink,hudi-client/hudi-spark-client
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand All @@ -66,7 +66,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Pfunctional-tests -pl hudi-common,hudi-flink
options: -Pfunctional-tests -pl hudi-common,hudi-flink-datasource/hudi-flink
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand Down Expand Up @@ -165,7 +165,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Punit-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
options: -Punit-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand All @@ -174,7 +174,7 @@ stages:
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: -Pfunctional-tests -pl !hudi-common,!hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
options: -Pfunctional-tests -pl !hudi-common,!hudi-flink-datasource/hudi-flink,!hudi-client/hudi-spark-client,!hudi-client/hudi-client-common,!hudi-client/hudi-flink-client,!hudi-client/hudi-java-client,!hudi-cli,!hudi-utilities,!hudi-sync/hudi-hive-sync
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx2g $(MAVEN_OPTS)'
Expand Down
40 changes: 20 additions & 20 deletions docker/compose/docker-compose_hadoop284_hive233_spark244.yml
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ services:
presto-coordinator-1:
container_name: presto-coordinator-1
hostname: presto-coordinator-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.268:latest
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
ports:
- '8090:8090'
environment:
Expand All @@ -201,25 +201,25 @@ services:
command: coordinator

presto-worker-1:
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.268:latest
depends_on: ["presto-coordinator-1"]
environment:
- PRESTO_JVM_MAX_HEAP=512M
- PRESTO_QUERY_MAX_MEMORY=1GB
- PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- TERM=xterm
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: worker
container_name: presto-worker-1
hostname: presto-worker-1
image: apachehudi/hudi-hadoop_2.8.4-prestobase_0.271:latest
depends_on: [ "presto-coordinator-1" ]
environment:
- PRESTO_JVM_MAX_HEAP=512M
- PRESTO_QUERY_MAX_MEMORY=1GB
- PRESTO_QUERY_MAX_MEMORY_PER_NODE=256MB
- PRESTO_QUERY_MAX_TOTAL_MEMORY_PER_NODE=384MB
- PRESTO_MEMORY_HEAP_HEADROOM_PER_NODE=100MB
- TERM=xterm
links:
- "hivemetastore"
- "hiveserver"
- "hive-metastore-postgresql"
- "namenode"
volumes:
- ${HUDI_WS}:/var/hoodie/ws
command: worker

trino-coordinator-1:
container_name: trino-coordinator-1
Expand Down
24 changes: 3 additions & 21 deletions docker/demo/config/test-suite/cow-spark-long-running.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,6 @@ dag_content:
num_records_insert: 10000
type: SparkInsertNode
deps: none
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
first_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_hive_sync
first_upsert:
config:
record_size: 200
Expand All @@ -45,29 +34,22 @@ dag_content:
num_records_upsert: 3000
num_partitions_upsert: 50
type: SparkUpsertNode
deps: first_validate
deps: first_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 8000
type: SparkDeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
deps: first_delete
last_validate:
config:
execute_itr_count: 30
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
16 changes: 2 additions & 14 deletions docker/demo/config/test-suite/cow-spark-simple.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,11 @@ dag_content:
num_records_insert: 100
type: SparkInsertNode
deps: none
first_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_insert
first_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_hive_sync
deps: first_insert
first_upsert:
config:
record_size: 1000
Expand All @@ -52,15 +46,9 @@ dag_content:
num_records_delete: 30
type: SparkDeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_hive: false
delete_input_data: false
type: ValidateDatasetNode
deps: second_hive_sync
deps: first_delete
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,6 @@ dag_content:
engine: "mr"
type: HiveSyncNode
deps: third_insert
first_validate:
config:
validate_hive: false
type: ValidateDatasetNode
deps: first_hive_sync
first_upsert:
config:
record_size: 1000
Expand All @@ -61,7 +56,7 @@ dag_content:
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: first_validate
deps: first_hive_sync
first_delete:
config:
num_partitions_delete: 50
Expand All @@ -76,14 +71,13 @@ dag_content:
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
validate_hive: true
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
last_validate:
config:
execute_itr_count: 50
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: deltastreamer-long-running-multi-partitions.yaml
dag_rounds: 30
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 1000
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: third_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_validate:
config:
validate_once_every_itr : 5
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
last_validate:
config:
execute_itr_count: 30
type: ValidateAsyncOperations
deps: second_validate
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,15 @@ dag_content:
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_hive_sync:
config:
queue_name: "adhoc"
engine: "mr"
type: HiveSyncNode
deps: first_delete
second_validate:
config:
validate_once_every_itr : 5
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: second_hive_sync
deps: first_delete
last_validate:
config:
execute_itr_count: 50
validate_clean: true
validate_archival: true
type: ValidateAsyncOperations
deps: second_validate
73 changes: 73 additions & 0 deletions docker/demo/config/test-suite/deltastreamer-medium-clustering.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# to be used with test-aggressive-clean-archival.properties

dag_name: deltastreamer-medium-clustering.yaml
dag_rounds: 20
dag_intermittent_delay_mins: 1
dag_content:
first_insert:
config:
record_size: 1000
num_partitions_insert: 5
repeat_count: 1
num_records_insert: 1000
type: InsertNode
deps: none
second_insert:
config:
record_size: 1000
num_partitions_insert: 50
repeat_count: 1
num_records_insert: 10000
deps: first_insert
type: InsertNode
third_insert:
config:
record_size: 1000
num_partitions_insert: 2
repeat_count: 1
num_records_insert: 300
deps: second_insert
type: InsertNode
first_upsert:
config:
record_size: 1000
num_partitions_insert: 2
num_records_insert: 300
repeat_count: 1
num_records_upsert: 100
num_partitions_upsert: 1
type: UpsertNode
deps: third_insert
first_delete:
config:
num_partitions_delete: 50
num_records_delete: 8000
type: DeleteNode
deps: first_upsert
second_validate:
config:
validate_hive: false
delete_input_data: true
type: ValidateDatasetNode
deps: first_delete
last_validate:
config:
execute_itr_count: 20
type: ValidateAsyncOperations
deps: second_validate
Loading