Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
cebdfc4
[HUDI-3088] Use Spark 3.2 as default Spark version
xushiyan Nov 29, 2022
25060fc
fix bot.yml
xushiyan Dec 4, 2022
d87aa9b
fix hudi-client poms
xushiyan Dec 4, 2022
b8d02db
fix hudi-sync poms
xushiyan Dec 4, 2022
47824bf
clean up hudi-spark poms
xushiyan Dec 4, 2022
6e46488
fix failed tests
xushiyan Dec 4, 2022
c6e0d30
fix bot.yml
xushiyan Dec 4, 2022
ae23f1e
temp disable
xushiyan Dec 4, 2022
97b5546
fix bot.yml
xushiyan Dec 5, 2022
0873829
disable some validations
xushiyan Dec 5, 2022
ebc1c53
fix bot.yml
xushiyan Dec 5, 2022
0a9864d
fix integ test pom
xushiyan Dec 5, 2022
45f09a5
exclude integ test module deps
xushiyan Dec 5, 2022
9c9ac30
fix cli pom
xushiyan Dec 5, 2022
875f6d1
fix utilities ut
xushiyan Dec 5, 2022
c2092ce
fix kafka version for spark 3
xushiyan Dec 5, 2022
920f323
remove ineffective exclusion
xushiyan Dec 5, 2022
f3cfad5
fix zookeeper
xushiyan Dec 5, 2022
5b5a52f
fix tests
xushiyan Dec 5, 2022
c56335d
temp disable bundle validation
xushiyan Dec 5, 2022
17c6cfc
update integ test pom
xushiyan Dec 5, 2022
1443c5e
move IT to GH actions
xushiyan Dec 7, 2022
02842b5
fix jackson for java time module'
xushiyan Dec 7, 2022
34fcd0b
Revert "fix jackson for java time module'"
xushiyan Dec 8, 2022
871dff6
fix jackson mapper config
xushiyan Dec 8, 2022
512761c
fix bot.yml
xushiyan Dec 8, 2022
6af0058
update flink profile
xushiyan Dec 8, 2022
4cc5535
add java time module
xushiyan Dec 9, 2022
b4f4a66
fix maven profile name
xushiyan Dec 9, 2022
5c5a904
revert jsr310 related changes
xushiyan Dec 12, 2022
fb5b130
revert changes in TestParquetColumnProjection
xushiyan Dec 12, 2022
a6c9d24
fix bot.yml
xushiyan Dec 12, 2022
fd78acc
disable unstable test
xushiyan Dec 12, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 41 additions & 9 deletions .github/workflows/bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,45 @@ jobs:
include:
- scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
flinkProfile: "flink1.13"
flinkProfile: "flink1.15"
sparkArchive: "spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz"
skipBundleValidation: "true" # TODO: remove this var to validate spark2.4 bundle combinations
buildOnly: "false"

- scalaProfile: "scala-2.11"
sparkProfile: "spark2.4"
flinkProfile: "flink1.14"
sparkArchive: ""
skipBundleValidation: "true"
buildOnly: "true"

- scalaProfile: "scala-2.12"
sparkProfile: "spark2.4"
flinkProfile: "flink1.13"
sparkArchive: ""
skipBundleValidation: "true"
buildOnly: "true"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.1"
flinkProfile: "flink1.14"
sparkArchive: ""
skipBundleValidation: "false"
buildOnly: "false"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.2"
flinkProfile: "flink1.14"
sparkArchive: ""
skipBundleValidation: "false"
buildOnly: "false"

- scalaProfile: "scala-2.12"
sparkProfile: "spark3.3"
flinkProfile: "flink1.14"
flinkProfile: "flink1.15"
sparkArchive: ""
skipBundleValidation: "false"
buildOnly: "false"

steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -69,23 +87,37 @@ jobs:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI
if: ${{ matrix.skipBundleValidation != 'true' }}
run: |
HUDI_VERSION=$(mvn help:evaluate -Dexpression=project.version -q -DforceStdout)
./packaging/bundle-validation/ci_run.sh $HUDI_VERSION
- name: Common Test
- name: 'UT: common & spark'
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
if: ${{ matrix.buildOnly != 'true' }}
run:
mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -pl hudi-common,hudi-spark-datasource/hudi-spark $MVN_ARGS
- name: 'UT integ-test'
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI
if: ${{ matrix.buildOnly != 'true' && matrix.sparkArchive != '' }}
run:
mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" '-Dtest=Test*' -pl hudi-common $MVN_ARGS
- name: Spark SQL Test
mvn test -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" -DskipUTs=false -DskipITs=true -pl hudi-integ-test $MVN_ARGS
- name: 'IT'
env:
SCALA_PROFILE: ${{ matrix.scalaProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
FLINK_PROFILE: ${{ matrix.flinkProfile }}
if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI
SPARK_ARCHIVE: ${{ matrix.sparkArchive }}
if: ${{ matrix.buildOnly != 'true' && matrix.sparkArchive != '' }}
run:
mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" '-Dtest=Test*' -pl hudi-spark-datasource/hudi-spark $MVN_ARGS
echo "Downloading $SPARK_ARCHIVE"
curl https://archive.apache.org/dist/spark/$SPARK_ARCHIVE --create-dirs -o $GITHUB_WORKSPACE/$SPARK_ARCHIVE
tar -xvf $GITHUB_WORKSPACE/$SPARK_ARCHIVE.tgz -C $GITHUB_WORKSPACE/
mkdir /tmp/spark-events/
export SPARK_HOME=$GITHUB_WORKSPACE/${SPARK_ARCHIVE%.*}
mvn verify -Pintegration-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -D"$FLINK_PROFILE" $MVN_ARGS
51 changes: 7 additions & 44 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ parameters:
default:
- 'hudi-spark-datasource'
- 'hudi-spark-datasource/hudi-spark'
- 'hudi-spark-datasource/hudi-spark2'
- 'hudi-spark-datasource/hudi-spark2-common'
- 'hudi-spark-datasource/hudi-spark3.2.x'
- 'hudi-spark-datasource/hudi-spark3.2plus-common'
- 'hudi-spark-datasource/hudi-spark3-common'
- 'hudi-spark-datasource/hudi-spark-common'
- name: job4UTModules
type: object
Expand All @@ -60,8 +61,9 @@ parameters:
- '!hudi-flink-datasource/hudi-flink1.15.x'
- '!hudi-spark-datasource'
- '!hudi-spark-datasource/hudi-spark'
- '!hudi-spark-datasource/hudi-spark2'
- '!hudi-spark-datasource/hudi-spark2-common'
- '!hudi-spark-datasource/hudi-spark3.2.x'
- '!hudi-spark-datasource/hudi-spark3.2plus-common'
- '!hudi-spark-datasource/hudi-spark3-common'
- '!hudi-spark-datasource/hudi-spark-common'
- name: job4FTModules
type: object
Expand All @@ -80,13 +82,10 @@ parameters:
- '!hudi-flink-datasource/hudi-flink1.15.x'

variables:
BUILD_PROFILES: '-Dscala-2.11 -Dspark2.4 -Dflink1.14'
BUILD_PROFILES: '-Dscala-2.12 -Dspark3.2 -Dflink1.15'
PLUGIN_OPTS: '-Dcheckstyle.skip=true -Drat.skip=true -Djacoco.skip=true -ntp -B -V -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn'
MVN_OPTS_INSTALL: '-DskipTests $(BUILD_PROFILES) $(PLUGIN_OPTS)'
MVN_OPTS_TEST: '-fae -Pwarn-log $(BUILD_PROFILES) $(PLUGIN_OPTS)'
SPARK_VERSION: '2.4.4'
HADOOP_VERSION: '2.7'
SPARK_ARCHIVE: spark-$(SPARK_VERSION)-bin-hadoop$(HADOOP_VERSION)
JOB1_MODULES: ${{ join(',',parameters.job1Modules) }}
JOB2_MODULES: ${{ join(',',parameters.job2Modules) }}
JOB3_MODULES: ${{ join(',',parameters.job3UTModules) }}
Expand Down Expand Up @@ -210,39 +209,3 @@ stages:
- script: |
grep "testcase" */target/surefire-reports/*.xml */*/target/surefire-reports/*.xml | awk -F'"' ' { print $6,$4,$2 } ' | sort -nr | head -n 100
displayName: Top 100 long-running testcases
- job: IT
displayName: IT modules
timeoutInMinutes: '150'
steps:
- task: Maven@4
displayName: maven install
inputs:
mavenPomFile: 'pom.xml'
goals: 'clean install'
options: $(MVN_OPTS_INSTALL) -Pintegration-tests
publishJUnitResults: false
jdkVersionOption: '1.8'
- task: Maven@4
displayName: UT integ-test
inputs:
mavenPomFile: 'pom.xml'
goals: 'test'
options: $(MVN_OPTS_TEST) -Pintegration-tests -DskipUTs=false -DskipITs=true -pl hudi-integ-test
publishJUnitResults: false
jdkVersionOption: '1.8'
mavenOptions: '-Xmx4g'
- task: AzureCLI@2
displayName: Prepare for IT
inputs:
azureSubscription: apachehudici-service-connection
scriptType: bash
scriptLocation: inlineScript
inlineScript: |
echo 'Downloading $(SPARK_ARCHIVE)'
az storage blob download -c ci-caches -n $(SPARK_ARCHIVE).tgz -f $(Pipeline.Workspace)/$(SPARK_ARCHIVE).tgz --account-name apachehudici
tar -xvf $(Pipeline.Workspace)/$(SPARK_ARCHIVE).tgz -C $(Pipeline.Workspace)/
mkdir /tmp/spark-events/
- script: |
export SPARK_HOME=$(Pipeline.Workspace)/$(SPARK_ARCHIVE)
mvn $(MVN_OPTS_TEST) -Pintegration-tests verify
displayName: IT
94 changes: 10 additions & 84 deletions hudi-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,16 @@
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-api</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client-runtime</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
Expand Down Expand Up @@ -302,96 +312,12 @@
<version>2.6.2</version>
</dependency>

<!-- HDFS test dependencies -->

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>org.mortbay.jetty</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet.jsp</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
<exclusions>
<exclusion>
<groupId>javax.servlet</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<artifactId>netty</artifactId>
<groupId>io.netty</groupId>
</exclusion>
<exclusion>
<artifactId>netty-all</artifactId>
<groupId>io.netty</groupId>
</exclusion>
</exclusions>
</dependency>

<!-- Test -->
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-tests-common</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.vintage</groupId>
<artifactId>junit-vintage-engine</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.platform</groupId>
<artifactId>junit-platform-runner</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.platform</groupId>
<artifactId>junit-platform-suite-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.platform</groupId>
<artifactId>junit-platform-commons</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Loading