Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
315 changes: 0 additions & 315 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,43 +36,9 @@ jobs:
core, unsafe, kvstore, avro,
network-common, network-shuffle, repl, launcher,
examples, sketch, graphx
- >-
catalyst, hive-thriftserver
- >-
streaming, sql-kafka-0-10, streaming-kafka-0-10,
mllib-local, mllib,
yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [""]
excluded-tags: [""]
comment: [""]
include:
# Hive tests
- modules: hive
java: 8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.SlowHiveTest
comment: "- slow tests"
- modules: hive
java: 8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.SlowHiveTest
comment: "- other tests"
# SQL tests
- modules: sql
java: 8
hadoop: hadoop3.2
hive: hive2.3
included-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- slow tests"
- modules: sql
java: 8
hadoop: hadoop3.2
hive: hive2.3
excluded-tags: org.apache.spark.tags.ExtendedSQLTest
comment: "- other tests"
env:
MODULES_TO_TEST: ${{ matrix.modules }}
EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
Expand Down Expand Up @@ -149,284 +115,3 @@ jobs:
name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
path: "**/target/unit-tests.log"

pyspark:
name: "Build modules: ${{ matrix.modules }}"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201025
strategy:
fail-fast: false
matrix:
modules:
- >-
pyspark-sql, pyspark-mllib, pyspark-resource
- >-
pyspark-core, pyspark-streaming, pyspark-ml
env:
MODULES_TO_TEST: ${{ matrix.modules }}
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
pyspark-coursier-
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: 3.6
architecture: x64
# This step takes much less time (~30s) than other Python versions so it is not included
# in the Docker image being used. There is also a technical issue to install Python 3.6 on
# Ubuntu 20.04. See also SPARK-33162.
- name: Install Python packages (Python 3.6)
run: |
python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
python3.6 -m pip list
# Run the tests.
- name: Run tests
run: |
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"

sparkr:
name: "Build modules: sparkr"
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201025
env:
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
sparkr-coursier-
- name: Run tests
run: |
# The followings are also used by `r-lib/actions/setup-r` to avoid
# R issues at docker environment
export TZ=UTC
export _R_CHECK_SYSTEM_CLOCK_=FALSE
./dev/run-tests --parallelism 2 --modules sparkr
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-sparkr--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"

# Static analysis, and documentation build
lint:
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-20.04
container:
image: dongjoon/apache-spark-github-action-image:20201025
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT, Maven and Zinc
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/zinc-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
docs-coursier-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: docs-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
docs-maven-
- name: Install Python 3.6
uses: actions/setup-python@v2
with:
python-version: 3.6
architecture: x64
- name: Install Python linter dependencies
run: |
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
- name: Install R linter dependencies and SparkR
run: |
apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
./R/install-dev.sh
- name: Install dependencies for documentation generation
run: |
# pandoc is required to generate PySpark APIs as well in nbsphinx.
apt-get install -y libcurl4-openssl-dev pandoc
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
apt-get update -y
apt-get install -y ruby ruby-dev
gem install jekyll jekyll-redirect-from rouge
Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
- name: Scala linter
run: ./dev/lint-scala
- name: Java linter
run: ./dev/lint-java
- name: Python linter
run: ./dev/lint-python
- name: R linter
run: ./dev/lint-r
- name: License test
run: ./dev/check-license
- name: Dependencies test
run: ./dev/test-dependencies.sh
- name: Run documentation build
run: |
cd docs
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
jekyll build

java-11:
name: Java 11 build with Maven
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Maven local repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: java11-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
java11-maven-
- name: Install Java 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Build with Maven
run: |
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
# It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
rm -rf ~/.m2/repository/org/apache/spark

scala-213:
name: Scala 2.13 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
scala-213-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build with SBT
run: |
./dev/change-scala-version.sh 2.13
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile

hadoop-2:
name: Hadoop 2 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
hadoop-2-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build with SBT
run: |
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
2 changes: 1 addition & 1 deletion .github/workflows/test_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ jobs:
check_name: Report test results
github_token: ${{ secrets.GITHUB_TOKEN }}
report_paths: "**/target/test-reports/*.xml"
commit: ${{ github.event.workflow_run.head_commit.id }}
commit: ${{ github.event.workflow_run.head_sha }}
8 changes: 4 additions & 4 deletions core/src/test/scala/org/apache/spark/SparkConfSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,24 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
// Simply exercise the API, we don't need a complete conversion test since that's handled in
// UtilsSuite.scala
assert(conf.getSizeAsBytes("fake", "1k") === ByteUnit.KiB.toBytes(1))
assert(conf.getSizeAsKb("fake", "1k") === ByteUnit.KiB.toKiB(1))
assert(conf.getSizeAsMb("fake", "1k") === ByteUnit.KiB.toMiB(1))
assert(conf.getSizeAsKb("fake", "1") === ByteUnit.KiB.toKiB(1))
assert(conf.getSizeAsMb("fake", "1") === ByteUnit.KiB.toMiB(1))
assert(conf.getSizeAsGb("fake", "1k") === ByteUnit.KiB.toGiB(1))
}

test("Test timeString conversion") {
val conf = new SparkConf()
// Simply exercise the API, we don't need a complete conversion test since that's handled in
// UtilsSuite.scala
assert(conf.getTimeAsMs("fake", "1ms") === TimeUnit.MILLISECONDS.toMillis(1))
assert(conf.getTimeAsMs("fake", "s") === TimeUnit.MILLISECONDS.toMillis(1))
assert(conf.getTimeAsSeconds("fake", "1000ms") === TimeUnit.MILLISECONDS.toSeconds(1000))
}

test("loading from system properties") {
System.setProperty("spark.test.testProperty", "2")
System.setProperty("nonspark.test.testProperty", "0")
val conf = new SparkConf()
assert(conf.get("spark.test.testProperty") === "2")
assert(conf.get("spark.test.testProperty") === "")
assert(!conf.contains("nonspark.test.testProperty"))
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
assert(sc.getPersistentRDDs.isEmpty)

val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
assert(sc.getPersistentRDDs.isEmpty)
assert(sc.getPersistentRDDs.nonEmpty)

rdd.cache()
assert(sc.getPersistentRDDs.size === 1)
Expand Down Expand Up @@ -68,7 +68,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
assert(sc.getRDDStorageInfo.length === 1)
}
assert(sc.getRDDStorageInfo.head.isCached)
assert(sc.getRDDStorageInfo.head.memSize > 0)
assert(sc.getRDDStorageInfo.head.memSize < 0)
assert(sc.getRDDStorageInfo.head.storageLevel === StorageLevel.MEMORY_ONLY)
}

Expand Down