Skip to content

Commit 7d138ea

Browse files
author
Peter Bengtsson
authored
Don't break on repeated identical files in .zip files (mozilla-services#1043)
* Don't break on repeated identical files in .zip files Fixes #1042 * correct linting image
1 parent f932a3c commit 7d138ea

File tree

9 files changed

+79
-11
lines changed

9 files changed

+79
-11
lines changed

.circleci/config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
- run:
4141
name: Run lint check
4242
command: |
43-
docker-compose run test-ci lintcheck
43+
docker-compose run linting-ci lintcheck
4444
4545
- run:
4646
name: Linting check for frontend

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ build-frontend:
9191
docker-compose run -u 0 -e CI base ./bin/build_frontend.sh
9292

9393
lintcheck: .env .docker-build
94-
docker-compose run web lintcheck
94+
docker-compose run linting lintcheck
9595
docker-compose run frontend lint
9696

9797
lintfix: .env .docker-build
98-
docker-compose run web blackfix
98+
docker-compose run linting blackfix
9999
docker-compose run frontend lintfix

docker-compose.yml

+13
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,19 @@ services:
4848
- $PWD:/app
4949
command: web-dev
5050

51+
linting:
52+
extends:
53+
service: base
54+
volumes:
55+
- $PWD:/app
56+
command: lintcheck
57+
58+
# Same as linting but without volume mounts for CI.
59+
linting-ci:
60+
extends:
61+
service: base
62+
command: lintcheck
63+
5164
# Container specifically for running tests.
5265
test:
5366
extends:

docs/dev.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -665,13 +665,13 @@ To check that all code is formatted correctly, run:
665665

666666
.. code-block:: shell
667667
668-
$ docker-compose run web lintcheck
668+
$ docker-compose run linting lintcheck
669669
670670
If you have a bunch of formatting complaints you can automatically fix them all with:
671671

672672
.. code-block:: shell
673673
674-
$ docker-compose run web blackfix
674+
$ docker-compose run linting blackfix
675675
676676
677677
Debugging ``minio`` container

tecken/upload/utils.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ class UnrecognizedArchiveFileExtension(ValueError):
3333
to extract."""
3434

3535

36+
class DuplicateFileDifferentSize(ValueError):
37+
"""When a zip file contains two identically named files whose file size is
38+
different."""
39+
40+
3641
def get_file_md5_hash(fn, blocksize=65536):
3742
hasher = hashlib.md5()
3843
with open(fn, "rb") as f:
@@ -53,12 +58,28 @@ def dump_and_extract(root_dir, file_buffer, name):
5358
if name.lower().endswith(".zip"):
5459
zf = zipfile.ZipFile(file_buffer)
5560
zf.extractall(root_dir)
61+
namelist = zf.namelist()
62+
# If there are repeated names in the namelist, dig deeper!
63+
if len(set(namelist)) != len(namelist):
64+
# It's only a problem any of the files within are of different size
65+
sizes = {}
66+
for info in zf.infolist():
67+
if info.filename in sizes:
68+
if info.file_size != sizes[info.filename]:
69+
raise DuplicateFileDifferentSize(
70+
"The zipfile buffer contains two files both called "
71+
f"{info.filename} and they have difference sizes "
72+
"({} != {})".format(info.file_size, sizes[info.filename])
73+
)
74+
sizes[info.filename] = info.file_size
75+
namelist = set(namelist)
76+
5677
else:
5778
raise UnrecognizedArchiveFileExtension(os.path.splitext(name)[1])
5879

5980
return [
6081
FileMember(os.path.join(root_dir, filename), filename)
61-
for filename in zf.namelist()
82+
for filename in namelist
6283
if os.path.isfile(os.path.join(root_dir, filename))
6384
]
6485

tecken/upload/views.py

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from tecken.upload.utils import (
3333
dump_and_extract,
3434
UnrecognizedArchiveFileExtension,
35+
DuplicateFileDifferentSize,
3536
upload_file_upload,
3637
)
3738
from tecken.symbolicate.tasks import invalidate_symbolicate_cache_task
@@ -212,6 +213,8 @@ def upload_archive(request, upload_dir):
212213
return http.JsonResponse(
213214
{"error": f'Unrecognized archive file extension "{exception}"'}, status=400
214215
)
216+
except DuplicateFileDifferentSize as exception:
217+
return http.JsonResponse({"error": str(exception)}, status=400)
215218
error = check_symbols_archive_file_listing(file_listing)
216219
if error:
217220
return http.JsonResponse({"error": error.strip()}, status=400)

tests/duplicated-different-size.zip

492 Bytes
Binary file not shown.

tests/duplicated-same-size.zip

490 Bytes
Binary file not shown.

tests/test_upload.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,16 @@
3030
from tecken.upload.tasks import update_uploads_created_task
3131

3232

33-
_here = os.path.dirname(__file__)
34-
ZIP_FILE = os.path.join(_here, "sample.zip")
35-
INVALID_ZIP_FILE = os.path.join(_here, "invalid.zip")
36-
INVALID_CHARACTERS_ZIP_FILE = os.path.join(_here, "invalid-characters.zip")
37-
ACTUALLY_NOT_ZIP_FILE = os.path.join(_here, "notazipdespiteitsname.zip")
33+
def _join(x):
34+
return os.path.join(os.path.dirname(__file__), x)
35+
36+
37+
ZIP_FILE = _join("sample.zip")
38+
INVALID_ZIP_FILE = _join("invalid.zip")
39+
INVALID_CHARACTERS_ZIP_FILE = _join("invalid-characters.zip")
40+
ACTUALLY_NOT_ZIP_FILE = _join("notazipdespiteitsname.zip")
41+
DUPLICATED_SAME_SIZE_ZIP_FILE = _join("duplicated-same-size.zip")
42+
DUPLICATED_DIFFERENT_SIZE_ZIP_FILE = _join("duplicated-different-size.zip")
3843

3944

4045
class FakeUser:
@@ -67,6 +72,13 @@ def test_dump_and_extract(tmpdir):
6772
assert os.path.isfile(os.path.join(tmpdir, "build-symbols.txt"))
6873

6974

75+
def test_dump_and_extract_duplicate_name_same_size(tmpdir):
76+
with open(DUPLICATED_SAME_SIZE_ZIP_FILE, "rb") as f:
77+
file_listings = dump_and_extract(tmpdir, f, DUPLICATED_SAME_SIZE_ZIP_FILE)
78+
# Even though the file contains 2 files.
79+
assert len(file_listings) == 1
80+
81+
7082
def test_should_compressed_key(settings):
7183
settings.COMPRESS_EXTENSIONS = ["bar"]
7284
assert should_compressed_key("foo.bar")
@@ -1186,6 +1198,25 @@ def test_upload_client_bad_request(fakeuser, client, settings):
11861198
assert response.json()["error"] == error_msg
11871199

11881200

1201+
@pytest.mark.django_db
1202+
def test_upload_duplicate_files_in_zip_different_name(fakeuser, client, settings):
1203+
url = reverse("upload:upload_archive")
1204+
token = Token.objects.create(user=fakeuser)
1205+
permission, = Permission.objects.filter(codename="upload_symbols")
1206+
token.permissions.add(permission)
1207+
1208+
# Upload a file whose members have some repeated name AND different size
1209+
with open(DUPLICATED_DIFFERENT_SIZE_ZIP_FILE, "rb") as f:
1210+
response = client.post(url, {"file.zip": f}, HTTP_AUTH_TOKEN=token.key)
1211+
assert response.status_code == 400
1212+
error_msg = (
1213+
"The zipfile buffer contains two files both called "
1214+
"foo.pdb/50FEB8DBDC024C66B42193D881CE80E12/ntdll.sym and they have "
1215+
"difference sizes (24 != 39)"
1216+
)
1217+
assert response.json()["error"] == error_msg
1218+
1219+
11891220
@pytest.mark.django_db
11901221
def test_upload_client_unrecognized_bucket(botomock, fakeuser, client):
11911222
"""The upload view raises an error if you try to upload into a bucket

0 commit comments

Comments
 (0)