forked from City-Bureau/city-scrapers-template
-
Notifications
You must be signed in to change notification settings - Fork 0
65 lines (56 loc) · 1.83 KB
/
cron.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
name: Cron
on:
schedule:
# Set any time that you'd like scrapers to run (in UTC)
- cron: "1 6 * * *"
workflow_dispatch:
env:
CI: true
PIPENV_VENV_IN_PROJECT: true
SCRAPY_SETTINGS_MODULE: city_scrapers.settings.prod
WAYBACK_ENABLED: true
AUTOTHROTTLE_MAX_DELAY: 30.0
AUTOTHROTTLE_START_DELAY: 1.5
AUTOTHROTTLE_TARGET_CONCURRENCY: 3.0
# Add secrets for the platform you're using and uncomment here
# AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
# AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
# S3_BUCKET: ${{ secrets.S3_BUCKET }}
# AZURE_ACCOUNT_KEY: ${{ secrets.AZURE_ACCOUNT_KEY }}
# AZURE_ACCOUNT_NAME: ${{ secrets.AZURE_ACCOUNT_NAME }}
# AZURE_CONTAINER: ${{ secrets.AZURE_CONTAINER }}
# GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
# GCS_BUCKET = os.getenv("GCS_BUCKET")
# Setup Sentry, add the DSN to secrets and uncomment here
# SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
jobs:
crawl:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Python 3.8
uses: actions/setup-python@v1
with:
python-version: 3.8
- name: Install Pipenv
uses: dschep/install-pipenv-action@v1
- name: Cache Python dependencies
uses: actions/cache@v1
with:
path: .venv
key: pip-3.8-${{ hashFiles('**/Pipfile.lock') }}
restore-keys: |
pip-3.8-
pip-
- name: Install dependencies
run: pipenv sync
env:
PIPENV_DEFAULT_PYTHON_VERSION: 3.8
- name: Run scrapers
run: |
export PYTHONPATH=$(pwd):$PYTHONPATH
./.deploy.sh
- name: Combine output feeds
run: |
export PYTHONPATH=$(pwd):$PYTHONPATH
pipenv run scrapy combinefeeds -s LOG_ENABLED=False