Merge pull request #2094 from cBioPortal/add_study #328

Summary
Jobs
- zip_studies
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/zip_studies.yml at 60938c2

	name: Zip studies on push to master branch
	on:
	# Trigger the workflow on push,
	# but only for the master branch
	push:
	branches:
	- master
	paths:
	- 'crdc/gdc/**'
	- 'public/**'
	jobs:
	zip_studies:
	if: github.repository == 'cbioportal/datahub'
	runs-on: ubuntu-latest
	steps:
	- id: files
	uses: jitterbit/get-changed-files@v1
	- run: \|
	echo "##[set-output name=unique_study_paths_list;]$(echo "${{ steps.files.outputs.all }}" \| awk 'BEGIN { RS=" "; ORS=" " }
	{
	if (index($0, "public/") == 1) {
	split($0, parts, "/");
	set["public/" parts[2]]++;
	}
	if (index($0, "crdc/gdc/") == 1) {
	split($0, parts, "/");
	set["crdc/gdc/" parts[3]]++;
	}
	}
	END { for (studypath in set) { print studypath } }')"
	exit 0
	id: unique_studies
	- uses: actions/checkout@v2
	- run: \|
	# detecting modified studies and archiving them
	# create a list of all studies under 'public'
	echo creating ${STUDY_LIST_FILENAME}
	echo "[" > ${STUDY_LIST_FILENAME}
	first_study=1
	find crdc/gdc -type d -mindepth 1 -maxdepth 1 \| sort > ${STUDY_PATH_FILENAME}
	find public -type d -mindepth 1 -maxdepth 1 \| sort >> ${STUDY_PATH_FILENAME}
	studypaths=()
	while read studypath ; do
	if [ ${first_study} -ne 1 ] ; then
	echo "," >> ${STUDY_LIST_FILENAME}
	fi
	studypaths+=("$studypath")
	studyname=$(basename "$studypath")
	echo -n "\"${studyname}\"" >> ${STUDY_LIST_FILENAME}
	first_study=0
	done < ${STUDY_PATH_FILENAME}
	rm -f ${STUDY_PATH_FILENAME}
	echo "" >> ${STUDY_LIST_FILENAME}
	echo "]" >> ${STUDY_LIST_FILENAME}
	# copy study list file to S3
	echo copying ${STUDY_LIST_FILENAME} to S3
	aws s3 cp --acl public-read ${STUDY_LIST_FILENAME} s3://${AWS_S3_BUCKET}/
	rm -f ${STUDY_LIST_FILENAME}
	# first save .git
	tar -cvf ${SAVED_GIT_REPO_FILENAME} .git > /dev/null
	mkdir studies_to_upload
	for study_path in ${{ steps.unique_studies.outputs.unique_study_paths_list }}; do
	if [ ! -d "${study_path}" ]; then
	continue
	fi
	changed_study=$(basename "$study_path")
	study_dir=$(dirname "$study_path")
	echo "Pulling study: ${study_path}."
	git lfs pull --include="${study_path}"
	echo "Compressing this study: ${study_path}."
	tar -czvf studies_to_upload/${changed_study}.tar.gz -C ${study_dir} ${changed_study}
	echo "Copy file to S3: studies_to_upload/${changed_study}.tar.gz"
	aws s3 cp --acl public-read studies_to_upload/${changed_study}.tar.gz s3://${AWS_S3_BUCKET}/
	echo "Remove this directory: ${study_path}."
	rm -rf "${study_path}"
	echo "Remove this file: studies_to_upload/${changed_study}.tar.gz."
	rm studies_to_upload/${changed_study}.tar.gz
	echo "Remove .git"
	rm -rf .git
	# add original .git back
	tar -xf ${SAVED_GIT_REPO_FILENAME} > /dev/null
	done
	env:
	STUDY_PATH_FILENAME: study_paths.txt
	STUDY_LIST_FILENAME: study_list.json
	SAVED_GIT_REPO_FILENAME: save_git.tar
	AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
	AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
	AWS_S3_BUCKET: ${{ secrets.AWS_S3_BUCKET }}
	AWS_EC2_METADATA_DISABLED: "true" # fixes aws cli error (code 255); needed since ubuntu-latest v2.262.1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge pull request #2094 from cBioPortal/add_study #328

Workflow file

Merge pull request #2094 from cBioPortal/add_study #328

Jobs

Run details

Workflow file for this run