Skip to content

Commit 0cc6560

Browse files
authored
Ignore invalid supplied MIME types (#18)
1 parent 062c229 commit 0cc6560

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

tests/test_main.py

+10
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,16 @@ class TestMain:
127127
),
128128
)
129129
),
130+
# Malformed MIME type
131+
*(
132+
(b"...", (mime_type,), True, False, None, None, b"text/plain")
133+
for mime_type in (
134+
b"javascript charset=UTF-8",
135+
b"a/b/c",
136+
b"a/[",
137+
b"[/a",
138+
)
139+
),
130140
],
131141
)
132142
def test_extract_mime(

xtractmime/__init__.py

+24
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
__version__ = "0.2.0"
2+
import re
23
from typing import Optional, Set, Tuple
34
from xtractmime._patterns import _APACHE_TYPES, BINARY_BYTES, WHITESPACE_BYTES
45
from xtractmime._utils import (
@@ -187,6 +188,27 @@ def _sniff_mislabled_feed(input_bytes: bytes, supplied_type: bytes) -> Optional[
187188
return supplied_type
188189

189190

191+
_TOKEN = rb"^\s*[-!#$%&'*+.0-9A-Z^_`a-z{|}~]+\s*$"
192+
193+
194+
def _is_valid_mime_type(mime_type):
195+
"""Return True if the specified MIME type is valid as per RFC 2045, or
196+
False otherwise.
197+
198+
Only the type and subtype are validated, parameters are ignored.
199+
"""
200+
parts = mime_type.split(b"/", maxsplit=1)
201+
if len(parts) < 2:
202+
return False
203+
_type, subtype_and_params = parts
204+
if not re.match(_TOKEN, _type):
205+
return False
206+
subtype = subtype_and_params.split(b";", maxsplit=1)[0]
207+
if not re.match(_TOKEN, subtype):
208+
return False
209+
return True
210+
211+
190212
def extract_mime(
191213
body: bytes,
192214
*,
@@ -199,6 +221,8 @@ def extract_mime(
199221
extra_types = extra_types or tuple()
200222
supplied_type = content_types[-1] if content_types else b""
201223
check_for_apache = http_origin and supplied_type in _APACHE_TYPES
224+
if not _is_valid_mime_type(supplied_type):
225+
supplied_type = b""
202226
supplied_type = supplied_type.split(b";")[0].strip().lower()
203227
resource_header = memoryview(body)[:RESOURCE_HEADER_BUFFER_LENGTH]
204228

0 commit comments

Comments
 (0)