Skip to content

Commit f978106

Browse files
Guess file content type when not specified (#655)
Co-authored-by: Martin Durant <[email protected]>
1 parent e3ebfb9 commit f978106

File tree

2 files changed

+41
-4
lines changed

2 files changed

+41
-4
lines changed

gcsfs/core.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""
22
Google Cloud Storage pythonic interface
33
"""
4+
45
import asyncio
56
import io
67
import json
78
import logging
9+
import mimetypes
810
import os
911
import posixpath
1012
import re
@@ -1391,7 +1393,7 @@ async def _put_file(
13911393
rpath,
13921394
metadata=None,
13931395
consistency=None,
1394-
content_type="application/octet-stream",
1396+
content_type=None,
13951397
chunksize=50 * 2**20,
13961398
callback=None,
13971399
fixed_key_metadata=None,
@@ -1401,6 +1403,10 @@ async def _put_file(
14011403
# enforce blocksize should be a multiple of 2**18
14021404
if os.path.isdir(lpath):
14031405
return
1406+
if content_type is None:
1407+
content_type, _ = mimetypes.guess_type(lpath)
1408+
if content_type is None:
1409+
content_type = "application/octet-stream"
14041410
callback = callback or NoOpCallback()
14051411
consistency = consistency or self.consistency
14061412
checker = get_consistency_checker(consistency)
@@ -1755,7 +1761,8 @@ def __init__(
17551761
the number we wrote; 'md5' does a full checksum. Any value other
17561762
than 'size' or 'md5' or 'crc32c' is assumed to mean no checking.
17571763
content_type: str
1758-
default is `application/octet-stream`. See the list of available
1764+
default when unspecified is provided by mimetypes.guess_type or
1765+
otherwise `application/octet-stream`. See the list of available
17591766
content types at https://www.iana.org/assignments/media-types/media-types.txt
17601767
metadata: dict
17611768
Custom metadata, in key/value pairs, added at file creation
@@ -1798,7 +1805,8 @@ def __init__(
17981805
else:
17991806
det = {}
18001807
self.content_type = content_type or det.get(
1801-
"contentType", "application/octet-stream"
1808+
"contentType",
1809+
mimetypes.guess_type(self.path)[0] or "application/octet-stream",
18021810
)
18031811
self.metadata = metadata or det.get("metadata", {})
18041812
self.fixed_key_metadata = _convert_fixed_key_metadata(det, from_google=True)

gcsfs/tests/test_core.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,36 @@ def test_array(gcs):
890890
assert out == b"A" * 1000
891891

892892

893+
def test_content_type_set(gcs):
894+
fn = TEST_BUCKET + "/content_type"
895+
with gcs.open(fn, "wb", content_type="text/html") as f:
896+
f.write(b"<html>")
897+
assert gcs.info(fn)["contentType"] == "text/html"
898+
899+
900+
def test_content_type_guess(gcs):
901+
fn = TEST_BUCKET + "/content_type.txt"
902+
with gcs.open(fn, "wb") as f:
903+
f.write(b"zz")
904+
assert gcs.info(fn)["contentType"] == "text/plain"
905+
906+
907+
def test_content_type_default(gcs):
908+
fn = TEST_BUCKET + "/content_type.abcdef"
909+
with gcs.open(fn, "wb") as f:
910+
f.write(b"zz")
911+
assert gcs.info(fn)["contentType"] == "application/octet-stream"
912+
913+
914+
def test_content_type_put_guess(gcs):
915+
dst = TEST_BUCKET + "/content_type_put_guess"
916+
with tmpfile(extension="txt") as fn:
917+
with open(fn, "w") as f:
918+
f.write("zz")
919+
gcs.put(fn, f"gs://{dst}", b"")
920+
assert gcs.info(dst)["contentType"] == "text/plain"
921+
922+
893923
def test_attrs(gcs):
894924
if not gcs.on_google:
895925
# https://github.com/fsspec/gcsfs/pull/479
@@ -1194,7 +1224,6 @@ def test_dir_marker(gcs):
11941224

11951225

11961226
def test_mkdir_with_path(gcs):
1197-
11981227
with pytest.raises(FileNotFoundError):
11991228
gcs.mkdir(f"{TEST_BUCKET + 'new'}/path", create_parents=False)
12001229
assert not gcs.exists(f"{TEST_BUCKET + 'new'}")

0 commit comments

Comments
 (0)