HyukjinKwon · HyukjinKwon · Jan 11, 2021 · Jan 11, 2021
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -36,43 +36,9 @@ jobs:
             core, unsafe, kvstore, avro,
             network-common, network-shuffle, repl, launcher,
             examples, sketch, graphx
-          - >-
-            catalyst, hive-thriftserver
-          - >-
-            streaming, sql-kafka-0-10, streaming-kafka-0-10,
-            mllib-local, mllib,
-            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
-        # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
         included-tags: [""]
         excluded-tags: [""]
         comment: [""]
-        include:
-          # Hive tests
-          - modules: hive
-            java: 8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            included-tags: org.apache.spark.tags.SlowHiveTest
-            comment: "- slow tests"
-          - modules: hive
-            java: 8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            excluded-tags: org.apache.spark.tags.SlowHiveTest
-            comment: "- other tests"
-          # SQL tests
-          - modules: sql
-            java: 8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            included-tags: org.apache.spark.tags.ExtendedSQLTest
-            comment: "- slow tests"
-          - modules: sql
-            java: 8
-            hadoop: hadoop3.2
-            hive: hive2.3
-            excluded-tags: org.apache.spark.tags.ExtendedSQLTest
-            comment: "- other tests"
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
@@ -149,284 +115,3 @@ jobs:
         name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
         path: "**/target/unit-tests.log"
 
-  pyspark:
-    name: "Build modules: ${{ matrix.modules }}"
-    runs-on: ubuntu-20.04
-    container:
-      image: dongjoon/apache-spark-github-action-image:20201025
-    strategy:
-      fail-fast: false
-      matrix:
-        modules:
-          - >-
-            pyspark-sql, pyspark-mllib, pyspark-resource
-          - >-
-            pyspark-core, pyspark-streaming, pyspark-ml
-    env:
-      MODULES_TO_TEST: ${{ matrix.modules }}
-      HADOOP_PROFILE: hadoop3.2
-      HIVE_PROFILE: hive2.3
-      # GitHub Actions' default miniconda to use in pip packaging test.
-      CONDA_PREFIX: /usr/share/miniconda
-      GITHUB_PREV_SHA: ${{ github.event.before }}
-      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-      # In order to fetch changed files
-      with:
-        fetch-depth: 0
-    - name: Merge dispatched input branch
-      if: ${{ github.event.inputs.target != '' }}
-      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT, Maven and Zinc
-      uses: actions/cache@v2
-      with:
-        path: |
-          build/apache-maven-*
-          build/zinc-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          pyspark-coursier-
-    - name: Install Python 3.6
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.6
-        architecture: x64
-    # This step takes much less time (~30s) than other Python versions so it is not included
-    # in the Docker image being used. There is also a technical issue to install Python 3.6 on
-    # Ubuntu 20.04. See also SPARK-33162.
-    - name: Install Python packages (Python 3.6)
-      run: |
-        python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
-        python3.6 -m pip list
-    # Run the tests.
-    - name: Run tests
-      run: |
-        ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
-    - name: Upload test results to report
-      if: always()
-      uses: actions/upload-artifact@v2
-      with:
-        name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
-        path: "**/target/test-reports/*.xml"
-    - name: Upload unit tests log files
-      if: failure()
-      uses: actions/upload-artifact@v2
-      with:
-        name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
-        path: "**/target/unit-tests.log"
-
-  sparkr:
-    name: "Build modules: sparkr"
-    runs-on: ubuntu-20.04
-    container:
-      image: dongjoon/apache-spark-github-action-image:20201025
-    env:
-      HADOOP_PROFILE: hadoop3.2
-      HIVE_PROFILE: hive2.3
-      GITHUB_PREV_SHA: ${{ github.event.before }}
-      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-      # In order to fetch changed files
-      with:
-        fetch-depth: 0
-    - name: Merge dispatched input branch
-      if: ${{ github.event.inputs.target != '' }}
-      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT, Maven and Zinc
-      uses: actions/cache@v2
-      with:
-        path: |
-          build/apache-maven-*
-          build/zinc-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          sparkr-coursier-
-    - name: Run tests
-      run: |
-        # The followings are also used by `r-lib/actions/setup-r` to avoid
-        # R issues at docker environment
-        export TZ=UTC
-        export _R_CHECK_SYSTEM_CLOCK_=FALSE
-        ./dev/run-tests --parallelism 2 --modules sparkr
-    - name: Upload test results to report
-      if: always()
-      uses: actions/upload-artifact@v2
-      with:
-        name: test-results-sparkr--8-hadoop3.2-hive2.3
-        path: "**/target/test-reports/*.xml"
-
-  # Static analysis, and documentation build
-  lint:
-    name: Linters, licenses, dependencies and documentation generation
-    runs-on: ubuntu-20.04
-    container:
-      image: dongjoon/apache-spark-github-action-image:20201025
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT, Maven and Zinc
-      uses: actions/cache@v2
-      with:
-        path: |
-          build/apache-maven-*
-          build/zinc-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          docs-coursier-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: docs-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          docs-maven-
-    - name: Install Python 3.6
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.6
-        architecture: x64
-    - name: Install Python linter dependencies
-      run: |
-        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
-    - name: Install R linter dependencies and SparkR
-      run: |
-        apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
-        Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
-        Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
-        ./R/install-dev.sh
-    - name: Install dependencies for documentation generation
-      run: |
-        # pandoc is required to generate PySpark APIs as well in nbsphinx.
-        apt-get install -y libcurl4-openssl-dev pandoc
-        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
-        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
-        apt-get update -y
-        apt-get install -y ruby ruby-dev
-        gem install jekyll jekyll-redirect-from rouge
-        Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
-    - name: Scala linter
-      run: ./dev/lint-scala
-    - name: Java linter
-      run: ./dev/lint-java
-    - name: Python linter
-      run: ./dev/lint-python
-    - name: R linter
-      run: ./dev/lint-r
-    - name: License test
-      run: ./dev/check-license
-    - name: Dependencies test
-      run: ./dev/test-dependencies.sh
-    - name: Run documentation build
-      run: |
-        cd docs
-        export LC_ALL=C.UTF-8
-        export LANG=C.UTF-8
-        jekyll build
-
-  java-11:
-    name: Java 11 build with Maven
-    runs-on: ubuntu-20.04
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: java11-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          java11-maven-
-    - name: Install Java 11
-      uses: actions/setup-java@v1
-      with:
-        java-version: 11
-    - name: Build with Maven
-      run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
-        rm -rf ~/.m2/repository/org/apache/spark
-
-  scala-213:
-    name: Scala 2.13 build with SBT
-    runs-on: ubuntu-20.04
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          scala-213-coursier-
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 8
-    - name: Build with SBT
-      run: |
-        ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
-
-  hadoop-2:
-    name: Hadoop 2 build with SBT
-    runs-on: ubuntu-20.04
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          hadoop-2-coursier-
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 8
-    - name: Build with SBT
-      run: |
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
@@ -21,4 +21,4 @@ jobs:
         check_name: Report test results
         github_token: ${{ secrets.GITHUB_TOKEN }}
         report_paths: "**/target/test-reports/*.xml"
-        commit: ${{ github.event.workflow_run.head_commit.id }}
+        commit: ${{ github.event.workflow_run.head_sha }}
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -42,24 +42,24 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     // Simply exercise the API, we don't need a complete conversion test since that's handled in
     // UtilsSuite.scala
     assert(conf.getSizeAsBytes("fake", "1k") === ByteUnit.KiB.toBytes(1))
-    assert(conf.getSizeAsKb("fake", "1k") === ByteUnit.KiB.toKiB(1))
-    assert(conf.getSizeAsMb("fake", "1k") === ByteUnit.KiB.toMiB(1))
+    assert(conf.getSizeAsKb("fake", "1") === ByteUnit.KiB.toKiB(1))
+    assert(conf.getSizeAsMb("fake", "1") === ByteUnit.KiB.toMiB(1))
     assert(conf.getSizeAsGb("fake", "1k") === ByteUnit.KiB.toGiB(1))
   }
 
   test("Test timeString conversion") {
     val conf = new SparkConf()
     // Simply exercise the API, we don't need a complete conversion test since that's handled in
     // UtilsSuite.scala
-    assert(conf.getTimeAsMs("fake", "1ms") === TimeUnit.MILLISECONDS.toMillis(1))
+    assert(conf.getTimeAsMs("fake", "s") === TimeUnit.MILLISECONDS.toMillis(1))
     assert(conf.getTimeAsSeconds("fake", "1000ms") === TimeUnit.MILLISECONDS.toSeconds(1000))
   }
 
   test("loading from system properties") {
     System.setProperty("spark.test.testProperty", "2")
     System.setProperty("nonspark.test.testProperty", "0")
     val conf = new SparkConf()
-    assert(conf.get("spark.test.testProperty") === "2")
+    assert(conf.get("spark.test.testProperty") === "")
     assert(!conf.contains("nonspark.test.testProperty"))
   }
 

diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -30,7 +30,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
     assert(sc.getPersistentRDDs.isEmpty)
 
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
-    assert(sc.getPersistentRDDs.isEmpty)
+    assert(sc.getPersistentRDDs.nonEmpty)
 
     rdd.cache()
     assert(sc.getPersistentRDDs.size === 1)
@@ -68,7 +68,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
       assert(sc.getRDDStorageInfo.length === 1)
     }
     assert(sc.getRDDStorageInfo.head.isCached)
-    assert(sc.getRDDStorageInfo.head.memSize > 0)
+    assert(sc.getRDDStorageInfo.head.memSize < 0)
     assert(sc.getRDDStorageInfo.head.storageLevel === StorageLevel.MEMORY_ONLY)
   }