Skip to content

Commit b4764f4

Browse files
alexmvtimabbott
authored andcommitted
upload: Download files with their original names.
Fixes: zulip#29491.
1 parent 933e3cb commit b4764f4

File tree

7 files changed

+127
-28
lines changed

7 files changed

+127
-28
lines changed

zerver/lib/upload/__init__.py

+6
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ def sanitize_name(value: str) -> str:
122122
value = unicodedata.normalize("NFKC", value)
123123
value = re.sub(r"[^\w\s.-]", "", value).strip()
124124
value = re.sub(r"[-\s]+", "-", value)
125+
126+
# Django's MultiPartParser never returns files named this, but we
127+
# could get them after removing spaces; change the name to a safer
128+
# value.
125129
if value in {"", ".", ".."}:
126130
return "uploaded-file"
127131
return value
@@ -139,9 +143,11 @@ def upload_message_attachment(
139143
path_id = upload_backend.generate_message_upload_path(
140144
str(target_realm.id), sanitize_name(uploaded_file_name)
141145
)
146+
142147
with transaction.atomic():
143148
upload_backend.upload_message_attachment(
144149
path_id,
150+
uploaded_file_name,
145151
content_type,
146152
file_data,
147153
user_profile,

zerver/lib/upload/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def generate_message_upload_path(self, realm_id: str, uploaded_file_name: str) -
3838
def upload_message_attachment(
3939
self,
4040
path_id: str,
41+
filename: str,
4142
content_type: str,
4243
file_data: bytes,
4344
user_profile: UserProfile | None,

zerver/lib/upload/local.py

+1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ def generate_message_upload_path(self, realm_id: str, sanitized_file_name: str)
8989
def upload_message_attachment(
9090
self,
9191
path_id: str,
92+
filename: str,
9293
content_type: str,
9394
file_data: bytes,
9495
user_profile: UserProfile | None,

zerver/lib/upload/s3.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import botocore
1111
from botocore.client import Config
1212
from django.conf import settings
13+
from django.utils.http import content_disposition_header
1314
from mypy_boto3_s3.service_resource import Bucket
1415
from typing_extensions import override
1516

@@ -62,7 +63,7 @@ def get_bucket(bucket_name: str, authed: bool = True) -> Bucket:
6263

6364
def upload_content_to_s3(
6465
bucket: Bucket,
65-
file_name: str,
66+
path: str,
6667
content_type: str | None,
6768
user_profile: UserProfile | None,
6869
contents: bytes,
@@ -77,8 +78,9 @@ def upload_content_to_s3(
7778
] = "STANDARD",
7879
cache_control: str | None = None,
7980
extra_metadata: dict[str, str] | None = None,
81+
filename: str | None = None,
8082
) -> None:
81-
key = bucket.Object(file_name)
83+
key = bucket.Object(path)
8284
metadata: dict[str, str] = {}
8385
if user_profile:
8486
metadata["user_profile_id"] = str(user_profile.id)
@@ -89,7 +91,10 @@ def upload_content_to_s3(
8991
extras = {}
9092
if content_type is None: # nocoverage
9193
content_type = ""
92-
if content_type not in INLINE_MIME_TYPES:
94+
is_attachment = content_type not in INLINE_MIME_TYPES
95+
if filename is not None:
96+
extras["ContentDisposition"] = content_disposition_header(is_attachment, filename)
97+
elif is_attachment:
9398
extras["ContentDisposition"] = "attachment"
9499
if cache_control is not None:
95100
extras["CacheControl"] = cache_control
@@ -211,6 +216,7 @@ def generate_message_upload_path(self, realm_id: str, sanitized_file_name: str)
211216
def upload_message_attachment(
212217
self,
213218
path_id: str,
219+
filename: str,
214220
content_type: str,
215221
file_data: bytes,
216222
user_profile: UserProfile | None,
@@ -222,6 +228,7 @@ def upload_message_attachment(
222228
user_profile,
223229
file_data,
224230
storage_class=settings.S3_UPLOADS_STORAGE_CLASS,
231+
filename=filename,
225232
)
226233

227234
@override

zerver/tests/test_upload.py

+88-17
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import os
22
import re
3-
import time
3+
from datetime import timedelta
44
from io import StringIO
55
from unittest import mock
66
from unittest.mock import patch
77
from urllib.parse import quote
88

99
import orjson
1010
import pyvips
11+
import time_machine
1112
from django.conf import settings
1213
from django.utils.timezone import now as timezone_now
1314
from pyvips import at_least_libvips
@@ -304,21 +305,43 @@ def test_serve_local_file_unauthed_token_expires(self) -> None:
304305
self.login("hamlet")
305306
fp = StringIO("zulip!")
306307
fp.name = "zulip.txt"
307-
result = self.client_post("/json/user_uploads", {"file": fp})
308-
response_dict = self.assert_json_success(result)
309-
url = "/json" + response_dict["url"]
308+
now = timezone_now()
309+
with time_machine.travel(now, tick=False):
310+
result = self.client_post("/json/user_uploads", {"file": fp})
311+
response_dict = self.assert_json_success(result)
312+
url = "/json" + response_dict["url"]
310313

311-
start_time = time.time()
312-
with mock.patch("django.core.signing.time.time", return_value=start_time):
313314
result = self.client_get(url)
314315
data = self.assert_json_success(result)
315316
url_only_url = data["url"]
316-
317317
self.logout()
318318
self.assertEqual(self.client_get(url_only_url).getvalue(), b"zulip!")
319319

320+
with time_machine.travel(now + timedelta(seconds=30), tick=False):
321+
self.assertEqual(self.client_get(url_only_url).getvalue(), b"zulip!")
322+
320323
# After over 60 seconds, the token should become invalid:
321-
with mock.patch("django.core.signing.time.time", return_value=start_time + 61):
324+
with time_machine.travel(now + timedelta(seconds=61), tick=False):
325+
result = self.client_get(url_only_url)
326+
self.assert_json_error(result, "Invalid token")
327+
328+
def test_serve_local_file_unauthed_token_deleted(self) -> None:
329+
self.login("hamlet")
330+
fp = StringIO("zulip!")
331+
fp.name = "zulip.txt"
332+
now = timezone_now()
333+
with time_machine.travel(now, tick=False):
334+
result = self.client_post("/json/user_uploads", {"file": fp})
335+
response_dict = self.assert_json_success(result)
336+
url = "/json" + response_dict["url"]
337+
338+
result = self.client_get(url)
339+
data = self.assert_json_success(result)
340+
url_only_url = data["url"]
341+
342+
path_id = response_dict["url"].removeprefix("/user_uploads/")
343+
Attachment.objects.get(path_id=path_id).delete()
344+
322345
result = self.client_get(url_only_url)
323346
self.assert_json_error(result, "Invalid token")
324347

@@ -909,6 +932,7 @@ def check_xsend_links(
909932
name_str_for_test: str,
910933
content_disposition: str = "",
911934
download: bool = False,
935+
returned_attachment: bool = False,
912936
) -> None:
913937
self.login("hamlet")
914938
fp = StringIO("zulip!")
@@ -927,27 +951,74 @@ def check_xsend_links(
927951
response["X-Accel-Redirect"],
928952
"/internal/local/uploads/" + fp_path + "/" + name_str_for_test,
929953
)
930-
if content_disposition != "":
954+
if returned_attachment:
931955
self.assertIn("attachment;", response["Content-disposition"])
932-
self.assertIn(content_disposition, response["Content-disposition"])
933956
else:
934957
self.assertIn("inline;", response["Content-disposition"])
958+
if content_disposition != "":
959+
self.assertIn(content_disposition, response["Content-disposition"])
935960
self.assertEqual(set(response["Cache-Control"].split(", ")), {"private", "immutable"})
936961

937-
check_xsend_links("zulip.txt", "zulip.txt", 'filename="zulip.txt"')
962+
check_xsend_links(
963+
"zulip.txt", "zulip.txt", 'filename="zulip.txt"', returned_attachment=True
964+
)
938965
check_xsend_links(
939966
"áéБД.txt",
940967
"%C3%A1%C3%A9%D0%91%D0%94.txt",
941968
"filename*=utf-8''%C3%A1%C3%A9%D0%91%D0%94.txt",
969+
returned_attachment=True,
970+
)
971+
check_xsend_links(
972+
"zulip.html",
973+
"zulip.html",
974+
'filename="zulip.html"',
975+
returned_attachment=True,
976+
)
977+
check_xsend_links(
978+
"zulip.sh",
979+
"zulip.sh",
980+
'filename="zulip.sh"',
981+
returned_attachment=True,
982+
)
983+
check_xsend_links(
984+
"zulip.jpeg",
985+
"zulip.jpeg",
986+
'filename="zulip.jpeg"',
987+
returned_attachment=False,
988+
)
989+
check_xsend_links(
990+
"zulip.jpeg",
991+
"zulip.jpeg",
992+
download=True,
993+
content_disposition='filename="zulip.jpeg"',
994+
returned_attachment=True,
995+
)
996+
check_xsend_links(
997+
"áéБД.pdf",
998+
"%C3%A1%C3%A9%D0%91%D0%94.pdf",
999+
"filename*=utf-8''%C3%A1%C3%A9%D0%91%D0%94.pdf",
1000+
returned_attachment=False,
1001+
)
1002+
check_xsend_links(
1003+
"some file (with spaces).png",
1004+
"some-file-with-spaces.png",
1005+
'filename="some file (with spaces).png"',
1006+
returned_attachment=False,
1007+
)
1008+
check_xsend_links(
1009+
"some file (with spaces).png",
1010+
"some-file-with-spaces.png",
1011+
'filename="some file (with spaces).png"',
1012+
download=True,
1013+
returned_attachment=True,
9421014
)
943-
check_xsend_links("zulip.html", "zulip.html", 'filename="zulip.html"')
944-
check_xsend_links("zulip.sh", "zulip.sh", 'filename="zulip.sh"')
945-
check_xsend_links("zulip.jpeg", "zulip.jpeg")
9461015
check_xsend_links(
947-
"zulip.jpeg", "zulip.jpeg", download=True, content_disposition='filename="zulip.jpeg"'
1016+
".().",
1017+
"uploaded-file",
1018+
'filename=".()."',
1019+
returned_attachment=True,
9481020
)
949-
check_xsend_links("áéБД.pdf", "%C3%A1%C3%A9%D0%91%D0%94.pdf")
950-
check_xsend_links("zulip", "zulip", 'filename="zulip"')
1021+
check_xsend_links("zulip", "zulip", 'filename="zulip"', returned_attachment=True)
9511022

9521023

9531024
class AvatarTest(UploadSerializeMixin, ZulipTestCase):

zerver/views/upload.py

+20-8
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,11 @@
4545
from zerver.lib.upload.base import INLINE_MIME_TYPES
4646
from zerver.lib.upload.local import assert_is_local_storage_path
4747
from zerver.lib.upload.s3 import get_signed_upload_url
48-
from zerver.models import ImageAttachment, UserProfile
48+
from zerver.models import Attachment, ImageAttachment, UserProfile
4949
from zerver.worker.thumbnail import ensure_thumbnails
5050

5151

52-
def patch_disposition_header(response: HttpResponse, url: str, is_attachment: bool) -> None:
53-
filename = os.path.basename(urlsplit(url).path)
52+
def patch_disposition_header(response: HttpResponse, filename: str, is_attachment: bool) -> None:
5453
content_disposition = content_disposition_header(is_attachment, filename)
5554

5655
if content_disposition is not None:
@@ -112,15 +111,18 @@ def serve_s3(request: HttpRequest, path_id: str, force_download: bool = False) -
112111

113112

114113
def serve_local(
115-
request: HttpRequest, path_id: str, force_download: bool = False
114+
request: HttpRequest,
115+
path_id: str,
116+
filename: str,
117+
force_download: bool = False,
116118
) -> HttpResponseBase:
117119
assert settings.LOCAL_FILES_DIR is not None
118120
local_path = os.path.join(settings.LOCAL_FILES_DIR, path_id)
119121
assert_is_local_storage_path("files", local_path)
120122
if not os.path.isfile(local_path):
121123
return HttpResponseNotFound("<p>File not found</p>")
122124

123-
mimetype, encoding = guess_type(path_id)
125+
mimetype, encoding = guess_type(filename)
124126
download = force_download or mimetype not in INLINE_MIME_TYPES
125127

126128
if settings.DEVELOPMENT:
@@ -130,6 +132,7 @@ def serve_local(
130132
response: HttpResponseBase = FileResponse(
131133
open(local_path, "rb"), # noqa: SIM115
132134
as_attachment=download,
135+
filename=filename,
133136
)
134137
patch_cache_control(response, private=True, immutable=True)
135138
return response
@@ -143,7 +146,7 @@ def serve_local(
143146
response = internal_nginx_redirect(
144147
quote(f"/internal/local/uploads/{path_id}"), content_type=mimetype
145148
)
146-
patch_disposition_header(response, local_path, download)
149+
patch_disposition_header(response, filename, download)
147150
patch_cache_control(response, private=True, immutable=True)
148151
return response
149152

@@ -335,9 +338,14 @@ def serve_image_error(status: int, image_path: str) -> HttpResponseBase:
335338

336339
# Update the path that we are fetching to be the thumbnail
337340
path_id = get_image_thumbnail_path(image_attachment, requested_format)
341+
served_filename = str(requested_format)
342+
else:
343+
served_filename = attachment.file_name
338344

339345
if settings.LOCAL_UPLOADS_DIR is not None:
340-
return serve_local(request, path_id, force_download=force_download)
346+
return serve_local(
347+
request, path_id, filename=served_filename, force_download=force_download
348+
)
341349
else:
342350
return serve_s3(request, path_id, force_download=force_download)
343351

@@ -374,9 +382,13 @@ def serve_file_unauthed_from_token(
374382
raise JsonableError(_("Invalid token"))
375383
if path_id.split("/")[-1] != filename:
376384
raise JsonableError(_("Invalid filename"))
385+
try:
386+
attachment = Attachment.objects.get(path_id=path_id)
387+
except Attachment.DoesNotExist:
388+
raise JsonableError(_("Invalid token"))
377389

378390
if settings.LOCAL_UPLOADS_DIR is not None:
379-
return serve_local(request, path_id)
391+
return serve_local(request, path_id, filename=attachment.file_name)
380392
else:
381393
return serve_s3(request, path_id)
382394

zerver/worker/thumbnail.py

+1
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def ensure_thumbnails(image_attachment: ImageAttachment) -> int:
100100
logger.info("Uploading %d bytes to %s", len(thumbnailed_bytes), thumbnail_path)
101101
upload_backend.upload_message_attachment(
102102
thumbnail_path,
103+
str(thumbnail_format),
103104
content_type,
104105
thumbnailed_bytes,
105106
None,

0 commit comments

Comments
 (0)