Skip to content

Commit 1f3ca1b

Browse files
pmacjgmize
authored andcommitted
Upload and distribute database updates via S3 (#5334)
A.K.A The Sqlitening! This removes bedrock's depenence on a database server and moves to downloading pre-built sqlite database files from s3 on a schedule. There is also a clock process that will update and upload such a database on a schedule as well. This should mean more stability, speed, and reliability for bedrock, as well as quicker development due to easy to download pre-populated databases.
1 parent 9c8f7ea commit 1f3ca1b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+657
-197
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ Desktop.ini
2828
venv
2929
*.db
3030
*.mmdb
31+
bedrock_db_info.json
32+
!root_files/bedrock_db_info.json
3133
james.ini
3234
test-results.xml
3335
tests/unit/coverage

Jenkinsfile

-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def loadBranch(String branch) {
3434

3535
// load the global config
3636
global_config = readYaml file: 'jenkins/global.yml'
37-
env.DEMO_MODE = config.demo ? 'true' : 'false'
3837
// defined in the Library loaded above
3938
setGitEnvironmentVariables()
4039
setConfigEnvironmentVariables(global_config)

Procfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
web: ./bin/run.sh
2-
clock: ./bin/run-clock.sh
2+
clock: ./bin/run-db-clock.sh
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
<!doctype html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="utf-8">
5+
<title>Time Since Last Cron Task Runs</title>
6+
<style>
7+
h2.success {
8+
color: green;
9+
}
10+
h2.fail {
11+
color: #c00;
12+
}
13+
th, td {
14+
padding: 3px 5px;
15+
margin: 5px;
16+
}
17+
tr:nth-child(even) > td, th {
18+
background-color: #eee;
19+
}
20+
#other-data tr > td:first-child {
21+
font-weight: bold;
22+
}
23+
.error > td {
24+
color: #c00;
25+
font-weight: bold;
26+
}
27+
.num {
28+
text-align: right;
29+
}
30+
</style>
31+
</head>
32+
33+
<body>
34+
<h1>Time Since Last Cron Task Runs</h1>
35+
{% if success %}
36+
<h2 class="success">PASS</h2>
37+
{% else %}
38+
<h2 class="fail">FAIL</h2>
39+
{% endif %}
40+
<table>
41+
<thead>
42+
<tr>
43+
<th>Task Name</th>
44+
<th>Max Seconds</th>
45+
<th>Seconds Since Last Run</th>
46+
</tr>
47+
</thead>
48+
<tbody>
49+
{% for name, maxt, realt, task_pass in results %}
50+
<tr{% if not task_pass %} class="error"{% endif %}>
51+
<td>{{ name }}</td>
52+
<td class="num">{{ maxt }}</td>
53+
<td class="num">{{ realt }}</td>
54+
</tr>
55+
{% endfor %}
56+
</tbody>
57+
</table>
58+
59+
<h1>Other Site Data</h1>
60+
<table id="other-data">
61+
<tbody>
62+
<tr>
63+
<td>Hostname</td>
64+
<td>{{ server_info.name }}</td>
65+
</tr>
66+
{% if server_info.git_sha %}
67+
<tr>
68+
<td>Bedrock Git SHA</td>
69+
<td><a href="https://github.com/mozilla/bedrock/commit/{{ server_info.git_sha }}">{{ server_info.git_sha[:10] }}</a></td>
70+
</tr>
71+
{% endif %}
72+
{% if server_info.db_git_sha %}
73+
<tr>
74+
<td>DB Git SHA</td>
75+
<td><a href="https://github.com/mozilla/bedrock/commit/{{ server_info.db_git_sha }}">{{ server_info.db_git_sha[:10] }}</a></td>
76+
</tr>
77+
{% endif %}
78+
{% if server_info.db_checksum %}
79+
<tr>
80+
<td>DB File Checksum</td>
81+
<td>{{ server_info.db_checksum }}</td>
82+
</tr>
83+
{% endif %}
84+
{% if server_info.db_last_update %}
85+
<tr>
86+
<td>DB Last Updated</td>
87+
<td>{{ server_info.db_last_update }} min ago</td>
88+
</tr>
89+
{% endif %}
90+
{% if server_info.db_file_name %}
91+
<tr>
92+
<td>DB File</td>
93+
<td><a href="{{ server_info.db_file_url }}">{{ server_info.db_file_name }}</a></td>
94+
</tr>
95+
{% endif %}
96+
</tbody>
97+
</table>
98+
</body>
99+
</html>

bedrock/base/views.py

+73
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import json
22
import logging
3+
import os.path
4+
from os import getenv
5+
from time import time
36

47
from django.conf import settings
58
from django.http import HttpResponse, HttpResponseBadRequest
9+
from django.shortcuts import render
610
from django.views.decorators.cache import never_cache
711
from django.views.decorators.csrf import csrf_exempt
812
from django.views.decorators.http import require_POST, require_safe
@@ -56,6 +60,75 @@ def geolocate(request):
5660
})
5761

5862

63+
# file names and max seconds since last run
64+
HEALTH_FILES = (
65+
('download_database', 600),
66+
('update_locales', 600),
67+
)
68+
DB_INFO_FILE = getenv('AWS_DB_JSON_DATA_FILE', 'bedrock_db_info.json')
69+
GIT_SHA = getenv('GIT_SHA')
70+
BUCKET_NAME = getenv('AWS_DB_S3_BUCKET', 'bedrock-db-dev')
71+
REGION_NAME = os.getenv('AWS_DB_REGION', 'us-west-2')
72+
S3_BASE_URL = 'https://s3-{}.amazonaws.com/{}'.format(
73+
REGION_NAME,
74+
BUCKET_NAME,
75+
)
76+
77+
78+
def get_db_file_url(filename):
79+
return '/'.join([S3_BASE_URL, filename])
80+
81+
82+
def get_extra_server_info():
83+
server_name = [getattr(settings, x) for x in ['HOSTNAME', 'DEIS_APP', 'DEIS_DOMAIN']]
84+
server_name = '.'.join(x for x in server_name if x)
85+
server_info = {
86+
'name': server_name,
87+
'git_sha': GIT_SHA,
88+
}
89+
try:
90+
with open(DB_INFO_FILE, 'r') as fp:
91+
db_info = json.load(fp)
92+
except (IOError, ValueError):
93+
pass
94+
else:
95+
db_info['last_update'] = int((time() - db_info['updated']) / 60)
96+
db_info['file_url'] = get_db_file_url(db_info['file_name'])
97+
for key, value in db_info.items():
98+
server_info['db_%s' % key] = value
99+
100+
return server_info
101+
102+
103+
@require_safe
104+
@never_cache
105+
def cron_health_check(request):
106+
results = []
107+
check_pass = True
108+
for fname, max_time in HEALTH_FILES:
109+
fpath = '/tmp/last-run-%s' % fname
110+
try:
111+
last_check = os.path.getmtime(fpath)
112+
except OSError:
113+
check_pass = False
114+
results.append((fname, max_time, 'None', False))
115+
continue
116+
117+
time_since = int(time() - last_check)
118+
if time_since > max_time:
119+
task_pass = False
120+
check_pass = False
121+
else:
122+
task_pass = True
123+
124+
results.append((fname, max_time, time_since, task_pass))
125+
126+
server_info = get_extra_server_info()
127+
return render(request, 'cron-health-check.html',
128+
{'results': results, 'server_info': server_info, 'success': check_pass},
129+
status=200 if check_pass else 500)
130+
131+
59132
def server_error_view(request, template_name='500.html'):
60133
"""500 error handler that runs context processors."""
61134
return l10n_utils.render(request, template_name, status=500)

bedrock/externalfiles/models.py

+3
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@ class ExternalFile(models.Model):
55
name = models.CharField(max_length=50, primary_key=True)
66
content = models.TextField()
77
last_modified = models.DateTimeField(auto_now=True)
8+
9+
class Meta:
10+
app_label = 'externalfiles'

bedrock/settings/base.py

+4-17
Original file line numberDiff line numberDiff line change
@@ -32,23 +32,9 @@ def path(*args):
3232

3333
DEBUG = config('DEBUG', cast=bool, default=False)
3434

35-
# Production uses PostgreSQL, but Sqlite should be sufficient for local development.
36-
db_url = config('DATABASE_URL', default='sqlite:///bedrock.db')
3735
DATABASES = {
38-
# leave 'default' empty so that Django will start even
39-
# if it can't connect to the DB at boot time
40-
'default': {},
41-
'bedrock': dj_database_url.parse(db_url)
36+
'default': dj_database_url.parse('sqlite:///bedrock.db'),
4237
}
43-
if db_url.startswith('sqlite'):
44-
# no server, can use 'default'
45-
DATABASES['default'] = DATABASES['bedrock']
46-
# leave the config in 'bedrock' as well so scripts
47-
# hardcoded for 'bedrock' will continue to work
48-
else:
49-
# settings specific to db server environments
50-
DATABASES['bedrock']['CONN_MAX_AGE'] = None
51-
DATABASE_ROUTERS = ['bedrock.base.database.BedrockRouter']
5238

5339
CACHES = config(
5440
'CACHES',
@@ -271,6 +257,7 @@ def lazy_langs():
271257
'contributor-data',
272258
'healthz',
273259
'readiness',
260+
'healthz-cron',
274261
'2004',
275262
'2005',
276263
'2006',
@@ -300,8 +287,8 @@ def lazy_langs():
300287
r'^newsletter/(confirm|existing|hacks\.mozilla\.org|recovery|updated)/',
301288
r'/system-requirements/$',
302289
r'.*/(firstrun|thanks)/$',
303-
r'^healthz/$',
304290
r'^readiness/$',
291+
r'^healthz(-cron)?/$',
305292
r'^country-code\.json$',
306293
# exclude redirects
307294
r'^foundation/annualreport/$'
@@ -535,8 +522,8 @@ def set_whitenoise_headers(headers, path, url):
535522
SECURE_CONTENT_TYPE_NOSNIFF = config('SECURE_CONTENT_TYPE_NOSNIFF', default=True, cast=bool)
536523
SECURE_SSL_REDIRECT = config('SECURE_SSL_REDIRECT', default=not DISABLE_SSL, cast=bool)
537524
SECURE_REDIRECT_EXEMPT = [
538-
r'^healthz/$',
539525
r'^readiness/$',
526+
r'^healthz(-cron)?/$',
540527
]
541528
if config('USE_SECURE_PROXY_HEADER', default=SECURE_SSL_REDIRECT, cast=bool):
542529
SECURE_PROXY_SSL_HEADER = ('HTTP_X_FORWARDED_PROTO', 'https')

bedrock/urls.py

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
url(r'^healthz/$', watchman_views.ping, name="watchman.ping"),
3737
url(r'^readiness/$', watchman_views.status, name="watchman.status"),
38+
url(r'^healthz-cron/$', 'bedrock.base.views.cron_health_check'),
3839
url(r'^csp-violation-capture$', 'bedrock.base.views.csp_violation_capture',
3940
name='csp-violation-capture'),
4041
url(r'^country-code\.json$', 'bedrock.base.views.geolocate',

0 commit comments

Comments
 (0)