Skip to content

Commit fcedc66

Browse files
kxepalasvetlov
authored andcommitted
Support reading multipart data with \n (LF) lines (#3492)
* Support reading multipart data with `\n` (`LF`) lines While RFC clearly says about `CRLF` newlines, there quite a lot of implementations which uses just `LF`. Even Python's stdlib produces multiparts with `\n` newlines by default for compatibility reasons. We wouldn't change how we produce multipart content - here we follow RFC. However, we can detect `\n` lines quite easily which makes their support quite cheap. * Add test about mixed newlines Just for case. That's a strange case, but it seems we pass it. * Make newline argument as keyword one and explicitly private one This argument is not designed to be defined by users. Depending on parsing multipart newline format it will be chosen automatically and due to recursive native of multipart format it have to be passed around for nested readers.
1 parent 8fbe7a1 commit fcedc66

File tree

3 files changed

+429
-208
lines changed

3 files changed

+429
-208
lines changed

CHANGES/2302.feature

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Support reading multipart data with `\n` (`LF`) lines

aiohttp/multipart.py

+40-13
Original file line numberDiff line numberDiff line change
@@ -237,11 +237,17 @@ class BodyPartReader:
237237

238238
chunk_size = 8192
239239

240-
def __init__(self, boundary: bytes,
241-
headers: Mapping[str, Optional[str]],
242-
content: StreamReader) -> None:
240+
def __init__(
241+
self,
242+
boundary: bytes,
243+
headers: Mapping[str, Optional[str]],
244+
content: StreamReader,
245+
*,
246+
_newline: bytes = b'\r\n'
247+
) -> None:
243248
self.headers = headers
244249
self._boundary = boundary
250+
self._newline = _newline
245251
self._content = content
246252
self._at_eof = False
247253
length = self.headers.get(CONTENT_LENGTH, None)
@@ -300,8 +306,8 @@ async def read_chunk(self, size: int=chunk_size) -> bytes:
300306
if self._read_bytes == self._length:
301307
self._at_eof = True
302308
if self._at_eof:
303-
clrf = await self._content.readline()
304-
assert b'\r\n' == clrf, \
309+
newline = await self._content.readline()
310+
assert newline == self._newline, \
305311
'reader did not read all the data or it is malformed'
306312
return chunk
307313

@@ -328,11 +334,15 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
328334
assert self._content_eof < 3, "Reading after EOF"
329335
assert self._prev_chunk is not None
330336
window = self._prev_chunk + chunk
331-
sub = b'\r\n' + self._boundary
337+
338+
intermeditate_boundary = self._newline + self._boundary
339+
332340
if first_chunk:
333-
idx = window.find(sub)
341+
pos = 0
334342
else:
335-
idx = window.find(sub, max(0, len(self._prev_chunk) - len(sub)))
343+
pos = max(0, len(self._prev_chunk) - len(intermeditate_boundary))
344+
345+
idx = window.find(intermeditate_boundary, pos)
336346
if idx >= 0:
337347
# pushing boundary back to content
338348
with warnings.catch_warnings():
@@ -344,6 +354,7 @@ async def _read_chunk_from_stream(self, size: int) -> bytes:
344354
chunk = window[len(self._prev_chunk):idx]
345355
if not chunk:
346356
self._at_eof = True
357+
347358
result = self._prev_chunk
348359
self._prev_chunk = chunk
349360
return result
@@ -372,7 +383,8 @@ async def readline(self) -> bytes:
372383
else:
373384
next_line = await self._content.readline()
374385
if next_line.startswith(self._boundary):
375-
line = line[:-2] # strip CRLF but only once
386+
# strip newline but only once
387+
line = line[:-len(self._newline)]
376388
self._unread.append(next_line)
377389

378390
return line
@@ -516,10 +528,16 @@ class MultipartReader:
516528
#: Body part reader class for non multipart/* content types.
517529
part_reader_cls = BodyPartReader
518530

519-
def __init__(self, headers: Mapping[str, str],
520-
content: StreamReader) -> None:
531+
def __init__(
532+
self,
533+
headers: Mapping[str, str],
534+
content: StreamReader,
535+
*,
536+
_newline: bytes = b'\r\n'
537+
) -> None:
521538
self.headers = headers
522539
self._boundary = ('--' + self._get_boundary()).encode()
540+
self._newline = _newline
523541
self._content = content
524542
self._last_part = None
525543
self._at_eof = False
@@ -592,9 +610,13 @@ def _get_part_reader(self, headers: 'CIMultiDictProxy[str]') -> Any:
592610
if mimetype.type == 'multipart':
593611
if self.multipart_reader_cls is None:
594612
return type(self)(headers, self._content)
595-
return self.multipart_reader_cls(headers, self._content)
613+
return self.multipart_reader_cls(
614+
headers, self._content, _newline=self._newline
615+
)
596616
else:
597-
return self.part_reader_cls(self._boundary, headers, self._content)
617+
return self.part_reader_cls(
618+
self._boundary, headers, self._content, _newline=self._newline
619+
)
598620

599621
def _get_boundary(self) -> str:
600622
mimetype = parse_mimetype(self.headers[CONTENT_TYPE])
@@ -625,6 +647,11 @@ async def _read_until_first_boundary(self) -> None:
625647
if chunk == b'':
626648
raise ValueError("Could not find starting boundary %r"
627649
% (self._boundary))
650+
if chunk.startswith(self._boundary):
651+
_, newline = chunk.split(self._boundary, 1)
652+
assert newline in (b'\r\n', b'\n')
653+
self._newline = newline
654+
628655
chunk = chunk.rstrip()
629656
if chunk == self._boundary:
630657
return

0 commit comments

Comments
 (0)