-
Notifications
You must be signed in to change notification settings - Fork 15
/
packages.cloudbuild.yaml
82 lines (74 loc) · 2.42 KB
/
packages.cloudbuild.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# This build fetches R packages used on Kaggle and updates
# the `packages_users` file on GitHub.
# In order for this build to have write access to the repo, a deploy key
# has been created and added to the repo.
# - doc: https://cloud.google.com/cloud-build/docs/access-private-github-repos
# - deploy key: https://github.com/Kaggle/docker-rcran/settings/keys
steps:
# Get the rsa key file from Secret Manager.
- name: gcr.io/cloud-builders/gcloud
entrypoint: 'bash'
args: [ '-c', 'gcloud secrets versions access latest --secret=rcran-id-rsa > /root/.ssh/id_github' ]
volumes:
- name: 'ssh'
path: /root/.ssh
# Set up git with key and domain.
- name: 'gcr.io/cloud-builders/git'
entrypoint: 'bash'
args:
- '-c'
- |
chmod 600 /root/.ssh/id_github
cat <<EOF >/root/.ssh/config
Hostname github.com
IdentityFile /root/.ssh/id_github
EOF
ssh-keyscan -t rsa github.com > /root/.ssh/known_hosts
git config --global user.email "[email protected]"
volumes:
- name: 'ssh'
path: /root/.ssh
# Pull the repository from GitHub.
- name: 'gcr.io/cloud-builders/git'
args:
- clone
- [email protected]:Kaggle/docker-rcran.git
volumes:
- name: 'ssh'
path: /root/.ssh
# Fetch the packages from BigQuery.
# The `bq` image has been built previously in the project:
# $ git clone https://github.com/GoogleCloudPlatform/cloud-builders-community.git
# $ cd cloud-builders-community/bq
# $ gcloud builds submit
- name: 'gcr.io/$PROJECT_ID/bq'
entrypoint: 'bash'
dir: docker-rcran
args:
- '-c'
- |
set -eo pipefail
bq query \
--use_legacy_sql=false \
--format=sparse \
--max_rows=100000 \
'SELECT PackageName FROM `kaggle-infra-analytics.kaggle_derived_no_pii.KernelBlobsRPackagesLast60days` ORDER BY PackageName' \
> bq_response || (cat bq_response && exit 1)
# Remove leading whitespaces
sed "s/^[ \t]*//" -i bq_response
# Ignore tensorflow which is installed in docker-rstats image.
# Also ignore the headers of the search results.
cat bq_response | grep -v 'tensorflow' | awk 'NR>2' > packages_users
# Commit the file to GitHub.
- name: 'gcr.io/cloud-builders/git'
entrypoint: 'bash'
dir: docker-rcran
args:
- '-c'
- |
git commit -am "Update the list of R packages used on Kaggle ($(wc -l packages_users | awk '{ print $1 }') packages)"
git push -u origin main
volumes:
- name: 'ssh'
path: /root/.ssh
tags: ['rcran', 'packages', 'notification']