Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions jenkins/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/local/env groovy
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
*
* Jenkinsfile to watch EMR cluster on AWS. If running more than threshold will generate email notification.
*/
@Library(['shared-libs', 'blossom-lib']) _

def IMAGE = "${ArtifactoryConstants.ARTIFACTORY_NAME}/sw-spark-docker/plugin:dev-ubuntu18-cuda11.0-blossom-dev"

pipeline {
agent {
kubernetes {
label "test-parallel-${BUILD_NUMBER}"
cloud 'sc-ipp-blossom-prod'
}
}

options {
ansiColor('xterm')
timeout(time: 2, unit: 'HOURS')
buildDiscarder(logRotator(numToKeepStr: '20'))
}

parameters {
string(name: 'GPU_POOL', defaultValue: 'RESERVED_POOL', description: 'GPU pool name')
}

environment {
MVN_URM_MIRROR = '-s jenkins/settings.xml -P mirror-apache-to-urm'
LIBCUDF_KERNEL_CACHE_PATH = '/tmp/.cudf'
URM_URL = "https://${ArtifactoryConstants.ARTIFACTORY_NAME}/artifactory/sw-spark-maven"
CUDA_CLASSIFIER = 'cuda11'
}

stages {
stage('Test Parallel for Pre-merge') {
parallel {
stage('Integration Test') {
options {
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "test-parallel-it-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE}", "${env.GPU_RESOURCE}", '8', '32Gi')
}
}

steps {
container('gpu') {
script {
sh "cat /proc/cpuinfo; cat /proc/meminfo"
sh "jenkins/spark-premerge-build.sh"
step([$class : 'JacocoPublisher',
execPattern : '**/target/jacoco.exec',
classPattern : 'target/jacoco_classes/',
sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark304/src/main/scala/,shims/spark312/src/main/scala/,shims/spark313/src/main/scala/',
sourceInclusionPattern: '**/*.java,**/*.scala'
])
}
}
}
}

stage('Unit Test') {
options {
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "test-parallel-ut-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE}", "${env.GPU_RESOURCE}", '8', '32Gi')
}
}

steps {
container('gpu') {
script {
sh "cat /proc/cpuinfo; cat /proc/meminfo"
sh "jenkins/spark-premerge-build-ut.sh"
}
}
}
}
}
}
}
}
108 changes: 81 additions & 27 deletions jenkins/Jenkinsfile-blossom.premerge
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
*/

@Library(['shared-libs', 'blossom-lib']) _
@Library('blossom-github-lib@master')
// @Library('blossom-github-lib@master')
@Library('blossom-github-lib@nvbug-3339178')
import ipp.blossom.*

def githubHelper // blossom github helper
Expand Down Expand Up @@ -151,33 +152,81 @@ pipeline {
!skipped
}
}
options {
// We have to use params to pass the resource label in options block,
// this is a limitation of declarative pipeline. And we need to lock resource before agent start
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "premerge-test-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE_PREMERGE}", "${env.GPU_RESOURCE}", '8', '32Gi') // cpu: 8, memory: 32Gi
workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
customWorkspace "${CUSTOM_WORKSPACE}"

failFast true
parallel {
stage('Integration Test') {
options {
// We have to use params to pass the resource label in options block,
// this is a limitation of declarative pipeline. And we need to lock resource before agent start
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "premerge-test-it-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE_PREMERGE}", "${env.GPU_RESOURCE}", '8', '32Gi') // cpu: 8, memory: 32Gi
workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
customWorkspace "${CUSTOM_WORKSPACE}"
}
}

steps {
script {
container('gpu') {
// TODO: improve resource management
timeout(time: 4, unit: 'HOURS') { // step only timeout for test run
sh "$PREMERGE_SCRIPT"
step([$class : 'JacocoPublisher',
execPattern : '**/target/jacoco.exec',
classPattern : 'target/jacoco_classes/',
sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark304/src/main/scala/,shims/spark312/src/main/scala/,shims/spark313/src/main/scala/',
sourceInclusionPattern: '**/*.java,**/*.scala'
])
}
}
}
}
}
}

steps {
script {
container('gpu') {
// TODO: improve resource management
timeout(time: 4, unit: 'HOURS') { // step only timeout for test run
sh "$PREMERGE_SCRIPT"
step([$class : 'JacocoPublisher',
execPattern : '**/target/jacoco.exec',
classPattern : 'target/jacoco_classes/',
sourcePattern : 'shuffle-plugin/src/main/scala/,udf-compiler/src/main/scala/,sql-plugin/src/main/java/,sql-plugin/src/main/scala/,shims/spark311/src/main/scala/,shims/spark301db/src/main/scala/,shims/spark301/src/main/scala/,shims/spark302/src/main/scala/,shims/spark303/src/main/scala/,shims/spark304/src/main/scala/,shims/spark312/src/main/scala/,shims/spark313/src/main/scala/',
sourceInclusionPattern: '**/*.java,**/*.scala'
])
stage('Unit Test') {
options {
// We have to use params to pass the resource label in options block,
// this is a limitation of declarative pipeline. And we need to lock resource before agent start
lock(label: "${params.GPU_POOL}", quantity: 1, variable: 'GPU_RESOURCE')
}
agent {
kubernetes {
label "premerge-test-ut-${BUILD_TAG}"
cloud 'sc-ipp-blossom-prod'
yaml pod.getGPUYAML("${IMAGE_PREMERGE}", "${env.GPU_RESOURCE}", '8', '32Gi') // cpu: 8, memory: 32Gi
workspaceVolume persistentVolumeClaimWorkspaceVolume(claimName: "${PVC}", readOnly: false)
customWorkspace "${CUSTOM_WORKSPACE}-ut"
}
}

steps {
script {
checkout(
changelog: false,
poll: true,
scm: [
$class: 'GitSCM', branches: [[name: githubHelper.getMergedSHA()]],
doGenerateSubmoduleConfigurations: false,
submoduleCfg: [],
userRemoteConfigs: [[
credentialsId: 'github-token',
url: githubHelper.getCloneUrl(),
refspec: '+refs/pull/*/merge:refs/remotes/origin/pr/*']]
]
)

container('gpu') {
// TODO: improve resource management
timeout(time: 2, unit: 'HOURS') { // step only timeout for test run
sh "$JENKINS_ROOT/spark-premerge-build-ut.sh"
}
}
}
}
}
Expand All @@ -197,9 +246,14 @@ pipeline {
} else {
// upload log only in case of build failure
def guardWords = ["gitlab.*?\\.com", "urm.*?\\.com"]

// hide GPU info
guardWords.add("nvidia-smi(?s)(.*?)(?=jenkins/version-def.sh)")

def logPattern = "### BEGIN OF TEST LOG ###.*### END OF TEST LOG ###"

githubHelper.uploadPartialLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords, logPattern)
// githubHelper.uploadPartialLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords, logPattern)
githubHelper.uploadParallelLogs(this, env.JOB_NAME, env.BUILD_NUMBER, null, guardWords)

githubHelper.updateCommitStatus("$BUILD_URL", "Fail", GitHubCommitState.FAILURE)
}
Expand Down
72 changes: 72 additions & 0 deletions jenkins/spark-premerge-build-ut.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash
#
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -ex

nvidia-smi

function on_exit {
echo '### END OF TEST LOG ###'
}
trap on_exit EXIT

echo '### BEGIN OF TEST LOG ###'

. jenkins/version-def.sh

# get merge BASE from merged pull request. Log message e.g. "Merge HEAD into BASE"
# BASE_REF=$(git --no-pager log --oneline -1 | awk '{ print $NF }')
# file size check for pull request. The size of a committed file should be less than 1.5MiB
# pre-commit run check-added-large-files --from-ref $BASE_REF --to-ref HEAD

ARTF_ROOT="$WORKSPACE/.download"
MVN_GET_CMD="mvn org.apache.maven.plugins:maven-dependency-plugin:2.8:get -B \
$MVN_URM_MIRROR -DremoteRepositories=$URM_URL \
-Ddest=$ARTF_ROOT"

rm -rf $ARTF_ROOT && mkdir -p $ARTF_ROOT

# Download a full version of spark
$MVN_GET_CMD \
-DgroupId=org.apache -DartifactId=spark -Dversion=$SPARK_VER -Dclassifier=bin-hadoop3.2 -Dpackaging=tgz

# export SPARK_HOME="$ARTF_ROOT/spark-$SPARK_VER-bin-hadoop3.2"
# export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH"
# tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \
# rm -f $SPARK_HOME.tgz

# mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' \
# -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 -Dcuda.version=$CUDA_CLASSIFIER
# Run the unit tests for other Spark versions but dont run full python integration tests
# NOT ALL TESTS NEEDED FOR PREMERGE
# Test latest stable and snapshot shims for a spark minor versions. All others shims test should be covered in nightly pipelines
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark303tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark304tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark312tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
# Disabled until Spark 3.2 source incompatibility fixed, see https://github.com/NVIDIA/spark-rapids/issues/2052
#env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER

# The jacoco coverage should have been collected, but because of how the shade plugin
# works and jacoco we need to clean some things up so jacoco will only report for the
# things we care about
# mkdir -p target/jacoco_classes/
# FILE=$(ls dist/target/rapids-4-spark_2.12-*.jar | grep -v test | xargs readlink -f)
# pushd target/jacoco_classes/
# jar xf $FILE
# rm -rf com/nvidia/shaded/ org/openucx/
# popd
2 changes: 1 addition & 1 deletion jenkins/spark-premerge-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TE
# NOT ALL TESTS NEEDED FOR PREMERGE
# Just test one 3.0.X version (base version covers this) and one 3.1.X version.
# All others shims test should be covered in nightly pipelines
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
# env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark313tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER
# Disabled until Spark 3.2 source incompatibility fixed, see https://github.com/NVIDIA/spark-rapids/issues/2052
#env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark320tests,snapshot-shims test -Dpytest.TEST_TAGS='' -Dcuda.version=$CUDA_CLASSIFIER

Expand Down