Skip to content

Commit

Permalink
Ignore invalid supplied MIME types
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Dec 29, 2023
1 parent 062c229 commit 39c2aec
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 0 deletions.
2 changes: 2 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ class TestMain:
),
)
),
# Malformed MIME type
(b"...", (b"javascript charset=UTF-8",), True, False, None, None, b"text/plain"),
],
)
def test_extract_mime(
Expand Down
24 changes: 24 additions & 0 deletions xtractmime/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__version__ = "0.2.0"
import re
from typing import Optional, Set, Tuple
from xtractmime._patterns import _APACHE_TYPES, BINARY_BYTES, WHITESPACE_BYTES
from xtractmime._utils import (
Expand Down Expand Up @@ -187,6 +188,27 @@ def _sniff_mislabled_feed(input_bytes: bytes, supplied_type: bytes) -> Optional[
return supplied_type


_TOKEN = br"^\s*[-!#$%&'*+.0-9A-Z^_`a-z{|}~]+\s*$"


def _is_valid_mime_type(mime_type):
"""Return True if the specified MIME type is valid as per RFC 2045, or
False otherwise.
Only the type and subtype are validated, parameters are ignored.
"""
parts = mime_type.split(b"/", maxsplit=1)
if len(parts) < 2:
return False
_type, subtype_and_params = parts
if not re.match(_TOKEN, _type):
return False

Check warning on line 205 in xtractmime/__init__.py

View check run for this annotation

Codecov / codecov/patch

xtractmime/__init__.py#L205

Added line #L205 was not covered by tests
subtype = subtype_and_params.split(b";", maxsplit=1)[0]
if not re.match(_TOKEN, subtype):
return False

Check warning on line 208 in xtractmime/__init__.py

View check run for this annotation

Codecov / codecov/patch

xtractmime/__init__.py#L208

Added line #L208 was not covered by tests
return True


def extract_mime(
body: bytes,
*,
Expand All @@ -199,6 +221,8 @@ def extract_mime(
extra_types = extra_types or tuple()
supplied_type = content_types[-1] if content_types else b""
check_for_apache = http_origin and supplied_type in _APACHE_TYPES
if not _is_valid_mime_type(supplied_type):
supplied_type = b""
supplied_type = supplied_type.split(b";")[0].strip().lower()
resource_header = memoryview(body)[:RESOURCE_HEADER_BUFFER_LENGTH]

Expand Down

0 comments on commit 39c2aec

Please sign in to comment.