Skip to content

Commit 3d90070

Browse files
author
Peter Bengtsson
authored
Bug 1397884 invalidate cache on new symbols (mozilla-services#389)
* bug 1397884 - invalidate cache on new symbols * longer cache invalidation * docs updates
1 parent cabfcd8 commit 3d90070

File tree

11 files changed

+306
-30
lines changed

11 files changed

+306
-30
lines changed

.coveragerc

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ omit =
88
tests/*
99
tecken/settings.py
1010
tecken/wsgi.py
11+
tecken/benchmarking/*
1112
*migrations*
1213
*management*
1314
*management/commands*

docs/download.rst

+36
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,39 @@ route your request to the code that talks to S3. It can also come back
165165
as exactly ``Debug-Time: 0.0`` which means the symbol is in a blacklist of
166166
symbols that are immediately ``404 Not Found`` based on filename pattern
167167
matching.
168+
169+
170+
Download Without Caching
171+
========================
172+
173+
Generally we can cache our work around S3 downloads quite aggressively since we
174+
tightly control the (only) input. Whenever a symbol archive file is uploaded,
175+
for every file within that we upload to S3 we also invalidate it from our
176+
cache. That means we can cache information about whether certain symbols
177+
exist in S3 or not quite long.
178+
179+
However, if you are debugging something or if you manually remove a symbol
180+
from S3 that control is "lost". But there is a way to force the cache to
181+
be ignored. However, it only ignores looking in the cache. It will always
182+
update the cache.
183+
184+
To do this append ``?_refresh`` to the URL. For example:
185+
186+
.. code-block:: shell
187+
188+
$ curl https://symbols.mozilla.org/foo.pdb/HEX/foo.sym
189+
...302 Found...
190+
191+
# Now suppose you delete the file manually from S3 in the AWS Console.
192+
# And without any delay do the curl again:
193+
$ curl https://symbols.mozilla.org/foo.pdb/HEX/foo.sym
194+
...302 Found...
195+
# Same old "broken", which is wrong.
196+
197+
# Avoid it by adding ?_refresh
198+
$ curl https://symbols.mozilla.org/foo.pdb/HEX/foo.sym?_refresh
199+
...404 Symbol Not Found...
200+
201+
# Now our cache will be updated.
202+
$ curl https://symbols.mozilla.org/foo.pdb/HEX/foo.sym
203+
...404 Symbol Not Found...

frontend/src/Common.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ export const formatFileSize = (bytes, decimals = 0) => {
6363

6464
export const BooleanIcon = bool =>
6565
<span className="icon" style={{ color: bool ? 'green' : 'red' }}>
66-
<i className={bool ? 'fa fa-thumbs-o-up' : 'fa fa-thumbs-o-down'} />
66+
<i className={bool ? 'fa fa-check' : 'fa fa-close'} />
6767
</span>
6868

6969
export const Pagination = ({

tecken/base/decorators.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ def send_email(email):
180180
Also, whatever you do where things get cached, you can undo that.
181181
For example::
182182
183-
184183
@cache_memoize(100)
185184
def callmeonce(arg1):
186185
print(arg1)
@@ -190,6 +189,18 @@ def callmeonce(arg1):
190189
callmeonce.invalidate('peter')
191190
callmeonce('peter') # will print 'peter'
192191
192+
Suppose you know for good reason you want to bypass the cache and
193+
really let the decorator let you through you can set one extra
194+
keyword argument called `_refresh`. For example::
195+
196+
@cache_memoize(100)
197+
def callmeonce(arg1):
198+
print(arg1)
199+
200+
callmeonce('peter') # will print 'peter'
201+
callmeonce('peter') # nothing printed
202+
callmeonce('peter', _refresh=True) # will print 'peter'
203+
193204
"""
194205

195206
if args_rewrite is None:
@@ -212,11 +223,18 @@ def _make_cache_key(*args, **kwargs):
212223

213224
@wraps(func)
214225
def inner(*args, **kwargs):
226+
refresh = kwargs.pop('_refresh', False)
215227
cache_key = _make_cache_key(*args, **kwargs)
216-
result = cache.get(cache_key)
228+
if refresh:
229+
result = None
230+
else:
231+
result = cache.get(cache_key)
217232
if result is None:
218233
result = func(*args, **kwargs)
219234
if not store_result:
235+
# Then the result isn't valuable/important to store but
236+
# we want to store something. Just to remember that
237+
# it has be done.
220238
cache.set(cache_key, True, timeout)
221239
elif result is not None:
222240
cache.set(cache_key, result, timeout)

tecken/base/symboldownloader.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# License, v. 2.0. If a copy of the MPL was not distributed with this
33
# file, you can obtain one at http://mozilla.org/MPL/2.0/.
44

5-
import hashlib
65
import time
76
from io import BytesIO
87
from gzip import GzipFile
@@ -14,7 +13,6 @@
1413
from botocore.exceptions import ClientError
1514

1615
from django.conf import settings
17-
from django.utils.encoding import force_bytes
1816

1917
from tecken.base.decorators import cache_memoize
2018
from tecken.s3 import S3Bucket
@@ -78,10 +76,6 @@ def wrapper(self, *args, **kwargs):
7876
return wrapper
7977

8078

81-
def make_symbol_cache_key(bucket_name, key):
82-
return hashlib.md5(force_bytes(f'symbol:{bucket_name}:{key}')).hexdigest()
83-
84-
8579
@metrics.timer_decorator('symboldownloader_exists')
8680
@cache_memoize(
8781
settings.SYMBOLDOWNLOAD_EXISTS_TTL_SECONDS,
@@ -232,7 +226,7 @@ def _make_key(prefix, symbol, debugid, filename):
232226
filename,
233227
)
234228

235-
def _get(self, symbol, debugid, filename):
229+
def _get(self, symbol, debugid, filename, refresh_cache=False):
236230
"""Return a dict if the symbol can be found. The dict will
237231
either be `{'url': ...}` or `{'buckey_name': ..., 'key': ...}`
238232
depending on if the symbol was found a public bucket or a
@@ -248,7 +242,9 @@ def _get(self, symbol, debugid, filename):
248242
# If it's a private bucket we use boto3.
249243

250244
key = self._make_key(prefix, symbol, debugid, filename)
251-
if not exists_in_source(source, key):
245+
if not exists_in_source(
246+
source, key, _refresh=refresh_cache
247+
):
252248
continue
253249

254250
logger.debug(
@@ -276,7 +272,7 @@ def _get(self, symbol, debugid, filename):
276272
logger.debug(
277273
f'Looking for symbol file by URL {file_url!r}'
278274
)
279-
if check_url_head(file_url):
275+
if check_url_head(file_url, _refresh=refresh_cache):
280276
return {'url': file_url, 'source': source}
281277

282278
def _get_stream(self, symbol, debugid, filename):
@@ -404,16 +400,20 @@ def _get_stream(self, symbol, debugid, filename):
404400
raise SymbolNotFound(symbol, debugid, filename)
405401

406402
@set_time_took
407-
def has_symbol(self, symbol, debugid, filename):
403+
def has_symbol(self, symbol, debugid, filename, refresh_cache=False):
408404
"""return True if the symbol can be found, False if not
409405
found in any of the URLs provided."""
410-
return bool(self._get(symbol, debugid, filename))
406+
return bool(self._get(
407+
symbol, debugid, filename, refresh_cache=refresh_cache,
408+
))
411409

412410
@set_time_took
413-
def get_symbol_url(self, symbol, debugid, filename):
411+
def get_symbol_url(self, symbol, debugid, filename, refresh_cache=False):
414412
"""return the redirect URL or None. If we return None
415413
it means we can't find the object in any of the URLs provided."""
416-
found = self._get(symbol, debugid, filename)
414+
found = self._get(
415+
symbol, debugid, filename, refresh_cache=refresh_cache
416+
)
417417
if found:
418418
if 'url' in found:
419419
return found['url']

tecken/download/views.py

+15-5
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,26 @@ def download_symbol(request, symbol, debugid, filename):
9090
response['Debug-Time'] = 0
9191
return response
9292

93+
refresh_cache = '_refresh' in request.GET
94+
9395
if request.method == 'HEAD':
94-
if downloader.has_symbol(symbol, debugid, filename):
96+
if downloader.has_symbol(
97+
symbol,
98+
debugid,
99+
filename,
100+
refresh_cache=refresh_cache,
101+
):
95102
response = http.HttpResponse()
96103
if request._request_debug:
97104
response['Debug-Time'] = downloader.time_took
98105
return response
99106
else:
100-
url = downloader.get_symbol_url(symbol, debugid, filename)
107+
url = downloader.get_symbol_url(
108+
symbol,
109+
debugid,
110+
filename,
111+
refresh_cache=refresh_cache,
112+
)
101113
if url:
102114
# If doing local development, with Docker, you're most likely
103115
# running minio as a fake S3 client. It runs on its own
@@ -110,7 +122,7 @@ def download_symbol(request, symbol, debugid, filename):
110122
'http://minio:9000' in url and
111123
request.get_host() == 'localhost:8000'
112124
): # pragma: no cover
113-
url = url.replace('minio:9000', 'localhost:5000')
125+
url = url.replace('minio:9000', 'localhost:9000')
114126
response = http.HttpResponseRedirect(url)
115127
if request._request_debug:
116128
response['Debug-Time'] = downloader.time_took
@@ -148,8 +160,6 @@ def download_symbol(request, symbol, debugid, filename):
148160
code_id=request.GET.get('code_id'),
149161
)
150162

151-
downloader.invalidate_cache(symbol, debugid, filename)
152-
153163
# The querying of Microsoft's server is potentially slow.
154164
# That's why this call is down in a celery task.
155165
# But there is hope! And the client ought to be informed

tecken/settings.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -402,10 +402,9 @@ def LOGGING(self):
402402

403403
# We can cache quite aggressively here because the SymbolDownloader
404404
# has chance to invalidate certain keys.
405-
# But we don't want to make it too long since when a symbols.zip file
406-
# is uploaded it doesn't have the opportunity to invalidate those
407-
# that are uploaded.
408-
SYMBOLDOWNLOAD_EXISTS_TTL_SECONDS = values.IntegerValue(60 * 60)
405+
# Also, any time a symbol archive file is upload, for each file within
406+
# that we end up uploading to S3 we also cache invalidate.
407+
SYMBOLDOWNLOAD_EXISTS_TTL_SECONDS = values.IntegerValue(60 * 60 * 6)
409408

410409
# Whether to start a background task to search for symbols
411410
# on Microsoft's server is protected by an in-memory cache.

tecken/upload/tasks.py

+23
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from tecken.upload.models import Upload, FileUpload
2323
from tecken.upload.utils import get_archive_members
2424
from tecken.s3 import get_s3_client
25+
from tecken.base.symboldownloader import SymbolDownloader
2526
from tecken.boto_extra import (
2627
reraise_clienterrors,
2728
reraise_endpointconnectionerrors
@@ -30,6 +31,8 @@
3031
logger = logging.getLogger('tecken')
3132
metrics = markus.get_metrics('tecken')
3233

34+
downloader = SymbolDownloader(settings.SYMBOL_URLS)
35+
3336

3437
@shared_task(autoretry_for=(
3538
EndpointConnectionError,
@@ -286,4 +289,24 @@ def upload_file_upload(s3_client, bucket_name, key_name, content, upload=None):
286289
**extras,
287290
)
288291
file_upload.completed_at = timezone.now()
292+
293+
# Take this opportunity to inform possible caches that the file,
294+
# if before wasn't the case, is now stored in S3.
295+
symbol, debugid, filename = _key_name_to_parts(key_name)
296+
try:
297+
downloader.invalidate_cache(
298+
symbol, debugid, filename
299+
)
300+
except Exception as exception: # pragma: no cover
301+
if 1 or settings.DEBUG:
302+
raise
303+
logger.error(
304+
f'Unable to invalidate symbol {symbol}/{debugid}/{filename}',
305+
exc_info=True
306+
)
307+
289308
return file_upload
309+
310+
311+
def _key_name_to_parts(key_name):
312+
return key_name.split(settings.SYMBOL_FILE_PREFIX + '/')[1].split('/')

tests/test_decorators.py

+17
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,20 @@ def returnnothing(a, b, k='bla'):
7676
returnnothing(1, 2)
7777
returnnothing(1, 2)
7878
assert len(calls_made) == 8
79+
80+
81+
def test_cache_memoize_refresh():
82+
83+
calls_made = []
84+
85+
@decorators.cache_memoize(10)
86+
def runmeonce(a):
87+
calls_made.append(a)
88+
return a * 2
89+
90+
runmeonce(10)
91+
assert len(calls_made) == 1
92+
runmeonce(10)
93+
assert len(calls_made) == 1
94+
runmeonce(10, _refresh=True)
95+
assert len(calls_made) == 2

tests/test_download.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,39 @@ def mock_api_call(self, operation_name, api_params):
174174
assert len(mock_api_calls) == 1
175175

176176

177+
def test_client_with_cache_refreshed(client, botomock, metricsmock):
178+
reload_downloader('https://s3.example.com/private/prefix/')
179+
180+
mock_api_calls = []
181+
182+
def mock_api_call(self, operation_name, api_params):
183+
assert operation_name == 'ListObjectsV2'
184+
mock_api_calls.append(api_params)
185+
return {
186+
'Contents': [{
187+
'Key': api_params['Prefix'],
188+
}]
189+
}
190+
191+
url = reverse('download:download_symbol', args=(
192+
'xul.pdb',
193+
'44E4EC8C2F41492B9369D6B9A059577C2',
194+
'xul.sym',
195+
))
196+
with botomock(mock_api_call):
197+
response = client.get(url)
198+
assert response.status_code == 302
199+
assert len(mock_api_calls) == 1
200+
201+
response = client.get(url)
202+
assert response.status_code == 302
203+
assert len(mock_api_calls) == 1 # still 1
204+
205+
response = client.get(url, {'_refresh': 1})
206+
assert response.status_code == 302
207+
assert len(mock_api_calls) == 2
208+
209+
177210
def test_client_404(client, botomock, clear_redis_store):
178211
reload_downloader('https://s3.example.com/private/prefix/')
179212

@@ -387,11 +420,9 @@ def fake_task(symbol, debugid, **kwargs):
387420
assert response.status_code == 404
388421
assert response.content == b'Symbol Not Found Yet'
389422

390-
# Whenever a download_microsoft_symbol.delay() is is MAYBE
391-
# called, the symboldownloader cache is invalidated on this
392-
# key. So the second call to symboldownloader will actually
393-
# reach out to S3 again.
394-
assert len(mock_calls) == 2
423+
# This basically checks that the SymbolDownloader cache is
424+
# not invalidated between calls.
425+
assert len(mock_calls) == 1
395426
# However, the act of triggering that
396427
# download_microsoft_symbol.delay() call is guarded by an
397428
# in-memory cache. So it shouldn't have called it more than

0 commit comments

Comments
 (0)