Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions docker/demo/config/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Set the default spark-shell log level to WARN. When running the spark-shell, the
# log level for this class is used to overwrite the root logger's log level, so that
# the user can have different defaults for the shell and regular Spark apps.
log4j.logger.org.apache.spark.repl.Main=WARN

# Set logging of integration testsuite to INFO level
log4j.logger.org.apache.hudi.integ.testsuite=INFO
# Settings to quiet third party logs that are too verbose
log4j.logger.org.spark_project.jetty=WARN
log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR
Expand All @@ -35,7 +35,6 @@ log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.parquet=ERROR
log4j.logger.parquet=ERROR
log4j.logger.org.apache.spark=WARN

# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL
log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: spark-sql-nonpartitioned-managed-cow-ctas.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
create_table:
config:
table_type: cow
is_external: true
primary_key: _row_key
pre_combine_field: test_suite_source_ordering_field
use_ctas: true
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlCreateTableNode
deps: none
insert_records:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlInsertNode
deps: create_table
validate:
config:
delete_input_data: true
type: spark.sql.SparkSqlValidateDatasetNode
deps: insert_records
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: sspark-sql-nonpartitioned-external-mor.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
create_table:
config:
table_type: mor
is_external: true
primary_key: _row_key
pre_combine_field: test_suite_source_ordering_field
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlCreateTableNode
deps: none
insert_records:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlInsertNode
deps: create_table
validate:
config:
delete_input_data: true
type: spark.sql.SparkSqlValidateDatasetNode
deps: insert_records
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: spark-sql-nonpartitioned-managed-cow-ctas.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
create_table:
config:
table_type: cow
primary_key: _row_key
pre_combine_field: test_suite_source_ordering_field
use_ctas: true
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlCreateTableNode
deps: none
insert_records:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlInsertNode
deps: create_table
validate:
config:
delete_input_data: true
type: spark.sql.SparkSqlValidateDatasetNode
deps: insert_records
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: spark-sql-nonpartitioned-managed-cow.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
create_table:
config:
table_type: cow
primary_key: _row_key
pre_combine_field: test_suite_source_ordering_field
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlCreateTableNode
deps: none
insert_records:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlInsertNode
deps: create_table
#merge_records:
# config:
# merge_condition: target._row_key = source._row_key
# matched_action: update set *
# not_matched_action: insert *
# record_size: 1000
# num_partitions_insert: 10
# repeat_count: 1
# num_records_upsert: 100
# num_records_insert: 1000
# type: spark.sql.SparkSqlMergeNode
# deps: insert_records
delete_records:
config:
condition_column: begin_lat
record_size: 1000
repeat_count: 1
ratio_records_change: 0.2
type: spark.sql.SparkSqlDeleteNode
deps: insert_records
validate:
config:
delete_input_data: true
type: spark.sql.SparkSqlValidateDatasetNode
deps: delete_records
61 changes: 61 additions & 0 deletions docker/demo/config/test-suite/spark-sql-partition-cow-updates.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: spark-sql-partitioned-managed-cow.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
create_table:
config:
table_type: cow
primary_key: _row_key
pre_combine_field: test_suite_source_ordering_field
partition_field: rider
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlCreateTableNode
deps: none
insert_records:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlInsertNode
deps: create_table
first_validate:
config:
delete_input_data: false
type: spark.sql.SparkSqlValidateDatasetNode
deps: insert_records
update_records:
config:
type: spark.sql.SparkSqlUpdateNode
deps: first_validate
delete_records:
config:
condition_column: begin_lat
record_size: 1000
repeat_count: 1
ratio_records_change: 0.2
type: spark.sql.SparkSqlDeleteNode
deps: update_records
second_validate:
config:
delete_input_data: true
type: spark.sql.SparkSqlValidateDatasetNode
deps: delete_records
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
dag_name: spark-sql-partitioned-managed-cow-ctas.yaml
dag_rounds: 1
dag_intermittent_delay_mins: 1
dag_content:
create_table:
config:
table_type: cow
primary_key: _row_key
pre_combine_field: test_suite_source_ordering_field
partition_field: rider
use_ctas: true
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlCreateTableNode
deps: none
insert_records:
config:
record_size: 1000
num_partitions_insert: 1
repeat_count: 1
num_records_insert: 1000
type: spark.sql.SparkSqlInsertNode
deps: create_table
validate:
config:
delete_input_data: true
type: spark.sql.SparkSqlValidateDatasetNode
deps: insert_records
Loading