Fix publishing on 2.4.0-palantir.7.X branch #385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

yifeih merged 9 commits into 2.4.0-palantir.7.3 from yh/fix-publishing

Jun 20, 2018

.circleci/config.yml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -327,7 +327,7 @@ jobs:
  
      run-scala-tests:

        <<: *test-defaults

        # project/CirclePlugin.scala does its own test splitting in SBT based on CIRCLE_NODE_INDEX, CIRCLE_NODE_TOTAL

        parallelism: 12

        parallelism: 9

        # Spark runs a lot of tests in parallel, we need 16 GB of RAM for this

        resource_class: xlarge

        steps:

    @@ -416,28 +416,55 @@ jobs:
  
          - store_artifacts:

              path: /tmp/yarn-tests

      deploy:

      build-maven-versioned:

        <<: *defaults

        docker:

          - image: palantirtechnologies/circle-spark-r

        steps:

          - *checkout-code

          - attach_workspace:

              at: .

          - restore_cache:

              key: maven-dependency-cache-{{ checksum "pom.xml" }}

          - restore_cache:

              key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}

          - run:

              command: dev/set_version_and_package.sh

          # This is potentially costly but we can't use the workspace as it would conflict with

          # compilation results from build-sbt

          - save_cache:

              key: v1-maven-build-with-version-{{ .Branch }}-{{ .Revision }}

              paths:

                - .

      deploy:

        <<: *defaults

        # Some part of the maven setup fails if there's no R, so we need to use the R image here

        docker:

          - image: palantirtechnologies/circle-spark-r

        steps:

          # This cache contains the whole project after version was set and mvn package was called

          # Restoring first (and instead of checkout) as mvn versions:set mutates real source code...

          - restore_cache:

              key: v1-maven-build-with-version-{{ .Branch }}-{{ .Revision }}

          - restore_cache:

              key: maven-dependency-cache-{{ checksum "pom.xml" }}

          - restore_cache:

              key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}

          - run: echo "user=$BINTRAY_USERNAME" > .credentials

          - run: echo "password=$BINTRAY_PASSWORD" >> .credentials

          - run: echo "realm=Bintray API Realm" >> .credentials

          - run: echo "host=api.bintray.com" >> .credentials

          - deploy: dev/publish.sh

          - deploy:

              command: dev/publish.sh

          - store_artifacts:

              path: /tmp/make-distribution.log

              destination: make-distribution.log

              path: /tmp/make-dist.log

              destination: make-dist.log

          - store_artifacts:

              path: /tmp/publish_artifacts.log

              destination: publish_artifacts.log

          - deploy: curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish

          - deploy:

              command: |

                curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST https://api.bintray.com/content/palantir/releases/spark/$(git describe --tags)/publish

    workflows:

      version: 2

    @@ -473,6 +500,10 @@ workflows:
  
              requires:

                - build-sbt

              <<: *all-branches-and-tags

          - build-maven-versioned:

              requires:

                - build-maven

              <<: *deployable-branches-and-tags

          - deploy:

              requires:

                - build-maven

    @@ -483,8 +514,5 @@ workflows:
  
                - run-scala-tests

                - run-python-tests

                - run-r-tests

              filters:

                tags:

                  only: /[0-9]+(?:\.[0-9]+){2,}-palantir\.[0-9]+(?:\.[0-9]+)*/

                branches:

                  only: master

                - build-maven-versioned

              <<: *deployable-branches-and-tags

dev/.rat-excludes

-Original file line number
+Diff line change
@@ Expand Up / @@ -107,6 +107,8 @@ circle.yml @@
     .credentials
     publish.sh
     publish-local.sh
+    publish_functions.sh
+    set_version_and_package.sh
     structured-streaming/*
     kafka-source-initial-offset-version-2.1.0.bin
     kafka-source-initial-offset-future-version.bin

dev/publish.sh

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,30 +1,10 @@
  
    #!/usr/bin/env bash

    set -euo pipefail

    version=$(git describe --tags)

    PALANTIR_FLAGS=(-Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Phive -Pyarn -Psparkr)

    FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"

    publish_artifacts() {

      tmp_settings="tmp-settings.xml"

      echo "<settings><servers><server>" > $tmp_settings

      echo "<id>bintray-palantir-release</id><username>$BINTRAY_USERNAME</username>" >> $tmp_settings

      echo "<password>$BINTRAY_PASSWORD</password>" >> $tmp_settings

      echo "</server></servers></settings>" >> $tmp_settings

      ./build/mvn versions:set -DnewVersion=$version

      ./build/mvn --settings $tmp_settings -DskipTests "${PALANTIR_FLAGS[@]}" deploy

    }

    make_dist() {

      build_flags="$1"

      shift 1

      hadoop_name="hadoop-palantir"

      artifact_name="spark-dist_2.11-${hadoop_name}"

      file_name="spark-dist-${version}-${hadoop_name}.tgz"

      ./dev/make-distribution.sh --name "hadoop-palantir" --tgz "$@" $build_flags | tee -a "/tmp/make-distribution.log"

      curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T $file_name "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/${artifact_name}/${version}/${artifact_name}-${version}.tgz"

    }

    source "$FWDIR/publish_functions.sh"

    publish_artifacts | tee -a "/tmp/publish_artifacts.log"

    make_dist "${PALANTIR_FLAGS[*]}" --clean

    make_dist "${PALANTIR_FLAGS[*]}" | tee -a "/tmp/make-dist.log"

dev/publish_functions.sh

-Original file line number
+Diff line change
@@ -0,0 +1,37 @@
+    #!/usr/bin/env bash
+    set -euo pipefail
+    PALANTIR_FLAGS=(-Phadoop-cloud -Phadoop-palantir -Pkinesis-asl -Pkubernetes -Phive -Pyarn -Psparkr)
+    get_version() {
+      git describe --tags --first-parent
+    }
+    set_version_and_package() {
+      version=$(get_version)
+      ./build/mvn versions:set -DnewVersion="$version"
+      ./build/mvn -DskipTests "${PALANTIR_FLAGS[@]}" package
+    }
+    publish_artifacts() {
+      tmp_settings="tmp-settings.xml"
+      echo "<settings><servers><server>" > $tmp_settings
+      echo "<id>bintray-palantir-release</id><username>$BINTRAY_USERNAME</username>" >> $tmp_settings
+      echo "<password>$BINTRAY_PASSWORD</password>" >> $tmp_settings
+      echo "</server></servers></settings>" >> $tmp_settings
+      ./build/mvn --settings $tmp_settings -DskipTests "${PALANTIR_FLAGS[@]}" deploy
+    }
+    make_dist() {
+      build_flags="$1"
+      shift 1
+      version=$(get_version)
+      hadoop_name="hadoop-palantir"
+      artifact_name="spark-dist_2.11-${hadoop_name}"
+      file_name="spark-dist-${version}-${hadoop_name}.tgz"
+      ./dev/make-distribution.sh --name "hadoop-palantir" --tgz "$@" $build_flags
+      curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -T "$file_name" "https://api.bintray.com/content/palantir/releases/spark/${version}/org/apache/spark/${artifact_name}/${version}/${artifact_name}-${version}.tgz"
+      curl -u $BINTRAY_USERNAME:$BINTRAY_PASSWORD -X POST "https://api.bintray.com/content/palantir/releases/spark/${version}/publish"
+    }

dev/set_version_and_package.sh

-Original file line number
+Diff line change
@@ -0,0 +1,9 @@
+    #!/usr/bin/env bash
+    set -euo pipefail
+    FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+    source "$FWDIR/publish_functions.sh"
+    set_version_and_package

project/CirclePlugin.scala

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -26,6 +26,8 @@ import sbt.Keys._
  
    import sbt._

    import sbt.plugins.JvmPlugin

    import scalaz.Dequeue

    import scalaz.Heap

    import scalaz.Order

    //noinspection ScalaStyle

    object CirclePlugin extends AutoPlugin {

    @@ -56,9 +58,9 @@ object CirclePlugin extends AutoPlugin {
  
      case class TestKey(source: String, classname: String)

      // Through this magical command we established that the average class run_time is 7.737

      // Through this magical command we established that the average class run_time is 8.841

      // jq <~/results.json '.tests | map(select(.result != "skipped")) | group_by(.classname) | map(map(.run_time) | add) | (add / length)'

      private[this] val AVERAGE_TEST_CLASS_RUN_TIME = 7.737d

      private[this] val AVERAGE_TEST_CLASS_RUN_TIME = 8.841d

      override def globalSettings: Seq[Def.Setting[_]] = List(

        circleTestsByProject := {

    @@ -107,9 +109,9 @@ object CirclePlugin extends AutoPlugin {
  
                  .map(_.asScala)

                  .getOrElse(Iterator())

                  .toStream

                  .filter(_.result != "skipped")  // don't count timings of tests that didn't run

                  .groupBy(result => TestKey(result.source, result.classname))

                  .mapValues(_.foldLeft(0.0d)(_ + _.run_time))

                  // don't count timings on skipped tests, but remember we've seen the classname

                  .mapValues(_.filter(_.result != "skipped").foldLeft(0.0d)(_ + _.run_time))

            } catch {

              case e: Exception =>

                log.warn(f"Couldn't read test results file: $testResultsFile")

    @@ -149,12 +151,19 @@ object CirclePlugin extends AutoPlugin {
  
            val tests = Dequeue[(TestKey, Double)](

                allTestsTimings.toIndexedSeq.sortBy { case (key, runTime) => (runTime, key) } : _*)

            case class Group(tests: List[TestKey], runTime: Double)

            implicit val groupOrder: Order[Group] = {

              import scalaz.std.anyVal._

              Order.orderBy(_.runTime)

            }

            @tailrec

            def process(tests: Dequeue[(TestKey, Double)],

                        soFar: Double = 0d,

                        takeLeft: Boolean = true,

                        acc: List[TestKey] = Nil,

                        groups: List[List[TestKey]] = Nil): List[List[TestKey]] = {

                        groups: List[Group] = Nil): List[Group] = {

              if (groups.size == totalNodes || tests.isEmpty) {

                // Short circuit the logic if we've just completed the last group

    @@ -163,12 +172,17 @@ object CirclePlugin extends AutoPlugin {
  
                return if (tests.isEmpty) {

                  groups

                } else {

                  // Fit all remaining tests in the last issued bucket.

                  val lastGroup +: restGroups = groups

                  val toFit = tests.toStream.map(_._1).force

                  log.info(s"Fitting remaining tests into first bucket (which already has " +

                      s"${lastGroup.size} tests): $toFit")

                  (toFit ++: lastGroup) :: restGroups

                  log.info(s"Fitting remaining tests into smallest buckets: $toFit")

                  // Fit all remaining tests into the least used buckets.

                  // import needed for creating Heap from List (needs Foldable[List[_]])

                  import scalaz.std.list._

                  tests.foldLeft(Heap.fromData(groups)) { case(heap, (test, runTime)) =>

                    heap.uncons match {

                      case Some((group, rest)) =>

                        rest.insert(group.copy(test :: group.tests, runTime + group.runTime))

                    }

                  }.toList

                }

              }

    @@ -192,7 +206,7 @@ object CirclePlugin extends AutoPlugin {
  
                    case x@TestCandidate((_, runTime), _, _) if soFar + runTime <= timePerNode => x

                  } match {

                    case None =>

                      process(tests, 0d, takeLeft = true, Nil, acc :: groups)

                      process(tests, 0d, takeLeft = true, Nil, Group(acc, soFar) :: groups)

                    case Some(TestCandidate((key, runTime), rest, fromLeft)) =>

                      process(rest, soFar + runTime, fromLeft, key :: acc, groups)

                }

    @@ -202,11 +216,11 @@ object CirclePlugin extends AutoPlugin {
  
            val rootTarget = (target in LocalRootProject).value

            val bucketsFile = rootTarget / "tests-by-bucket.json"

            log.info(s"Saving test distribution into $totalNodes buckets to: $bucketsFile")

            mapper.writeValue(bucketsFile, buckets)

            val timingsPerBucket = buckets.map(_.iterator.map(allTestsTimings.apply).sum)

            mapper.writeValue(bucketsFile, buckets.map(_.tests))

            val timingsPerBucket = buckets.map(_.tests.iterator.map(allTestsTimings.apply).sum)

            log.info(s"Estimated test timings per bucket: $timingsPerBucket")

            val bucket = buckets.lift.apply(index).getOrElse(Nil)

            val bucket = buckets.map(_.tests).lift.apply(index).getOrElse(Nil)

            val groupedByProject = bucket.flatMap(testsByKey.apply)

                .groupBy(_.project)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix publishing on 2.4.0-palantir.7.X branch #385

Uh oh!

Diff view

Diff view

There are no files selected for viewing