Merge pull request #2094 from cBioPortal/add_study #328
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Zip studies on push to master branch | |
on: | |
# Trigger the workflow on push, | |
# but only for the master branch | |
push: | |
branches: | |
- master | |
paths: | |
- 'crdc/gdc/**' | |
- 'public/**' | |
jobs: | |
zip_studies: | |
if: github.repository == 'cbioportal/datahub' | |
runs-on: ubuntu-latest | |
steps: | |
- id: files | |
uses: jitterbit/get-changed-files@v1 | |
- run: | | |
echo "##[set-output name=unique_study_paths_list;]$(echo "${{ steps.files.outputs.all }}" | awk 'BEGIN { RS=" "; ORS=" " } | |
{ | |
if (index($0, "public/") == 1) { | |
split($0, parts, "/"); | |
set["public/" parts[2]]++; | |
} | |
if (index($0, "crdc/gdc/") == 1) { | |
split($0, parts, "/"); | |
set["crdc/gdc/" parts[3]]++; | |
} | |
} | |
END { for (studypath in set) { print studypath } }')" | |
exit 0 | |
id: unique_studies | |
- uses: actions/checkout@v2 | |
- run: | | |
# detecting modified studies and archiving them | |
# create a list of all studies under 'public' | |
echo creating ${STUDY_LIST_FILENAME} | |
echo "[" > ${STUDY_LIST_FILENAME} | |
first_study=1 | |
find crdc/gdc -type d -mindepth 1 -maxdepth 1 | sort > ${STUDY_PATH_FILENAME} | |
find public -type d -mindepth 1 -maxdepth 1 | sort >> ${STUDY_PATH_FILENAME} | |
studypaths=() | |
while read studypath ; do | |
if [ ${first_study} -ne 1 ] ; then | |
echo "," >> ${STUDY_LIST_FILENAME} | |
fi | |
studypaths+=("$studypath") | |
studyname=$(basename "$studypath") | |
echo -n "\"${studyname}\"" >> ${STUDY_LIST_FILENAME} | |
first_study=0 | |
done < ${STUDY_PATH_FILENAME} | |
rm -f ${STUDY_PATH_FILENAME} | |
echo "" >> ${STUDY_LIST_FILENAME} | |
echo "]" >> ${STUDY_LIST_FILENAME} | |
# copy study list file to S3 | |
echo copying ${STUDY_LIST_FILENAME} to S3 | |
aws s3 cp --acl public-read ${STUDY_LIST_FILENAME} s3://${AWS_S3_BUCKET}/ | |
rm -f ${STUDY_LIST_FILENAME} | |
# first save .git | |
tar -cvf ${SAVED_GIT_REPO_FILENAME} .git > /dev/null | |
mkdir studies_to_upload | |
for study_path in ${{ steps.unique_studies.outputs.unique_study_paths_list }}; do | |
if [ ! -d "${study_path}" ]; then | |
continue | |
fi | |
changed_study=$(basename "$study_path") | |
study_dir=$(dirname "$study_path") | |
echo "Pulling study: ${study_path}." | |
git lfs pull --include="${study_path}" | |
echo "Compressing this study: ${study_path}." | |
tar -czvf studies_to_upload/${changed_study}.tar.gz -C ${study_dir} ${changed_study} | |
echo "Copy file to S3: studies_to_upload/${changed_study}.tar.gz" | |
aws s3 cp --acl public-read studies_to_upload/${changed_study}.tar.gz s3://${AWS_S3_BUCKET}/ | |
echo "Remove this directory: ${study_path}." | |
rm -rf "${study_path}" | |
echo "Remove this file: studies_to_upload/${changed_study}.tar.gz." | |
rm studies_to_upload/${changed_study}.tar.gz | |
echo "Remove .git" | |
rm -rf .git | |
# add original .git back | |
tar -xf ${SAVED_GIT_REPO_FILENAME} > /dev/null | |
done | |
env: | |
STUDY_PATH_FILENAME: study_paths.txt | |
STUDY_LIST_FILENAME: study_list.json | |
SAVED_GIT_REPO_FILENAME: save_git.tar | |
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }} | |
AWS_EC2_METADATA_DISABLED: "true" # fixes aws cli error (code 255); needed since ubuntu-latest v2.262.1 |