Skip to content
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
b4719e1
Chunked encoding decoder
WyriHaximus Jul 4, 2016
91cb3a5
Renamed to more suitable class name
WyriHaximus Jul 4, 2016
fb14e64
Removed test debug
WyriHaximus Jul 4, 2016
d6d2c1e
Removed assert true as it wasn't useful to the test
WyriHaximus Jul 4, 2016
dd04416
Swap stream when transfer encoding is chunked
WyriHaximus Jul 4, 2016
897c4a1
Removed chunked transfer encoding from todo
WyriHaximus Jul 4, 2016
b51cdb4
Response test to ensure chunked encoding decoder is used when it should
WyriHaximus Jul 5, 2016
e1a3708
Lower case checking as suggested by @jsor https://github.com/reactphp…
WyriHaximus Jul 6, 2016
bfba037
Chunked encoding extensions as mentioned by @jsor https://github.com/…
WyriHaximus Jul 7, 2016
a3b9c80
Don't emit empty data chunks
WyriHaximus Jul 7, 2016
4ca65b3
Pass the extra's from the stream through the response
WyriHaximus Jul 8, 2016
0eab14b
Changed emitted extra to be more explicit as suggested by @jsor at ht…
WyriHaximus Aug 3, 2016
f9067ce
chunkedExtension => chunkExtension
WyriHaximus Aug 3, 2016
85b0c93
Store CRLF position into a variable, suggested by @clue at https://gi…
WyriHaximus Aug 3, 2016
03d271a
Downgraded to a ReadableStreamInterface from the duplex as suggested …
WyriHaximus Aug 3, 2016
18f044a
Removed write stream `end` event forward as suggested by @clue at htt…
WyriHaximus Aug 3, 2016
cd4ba40
Removed chunk extensions as suggested by @clue as they don't have a u…
WyriHaximus Aug 3, 2016
537de43
Enure chunk length header is in as suggested by @clue at https://gith…
WyriHaximus Aug 4, 2016
42201c8
Ensure CRLF in the middle of bodies don't cause unexpected behavior b…
WyriHaximus Aug 4, 2016
48763cf
Removed surplus arguments via @clue https://github.com/reactphp/http-…
WyriHaximus Aug 9, 2016
b32a188
Marking method internal via @clue https://github.com/reactphp/http-cl…
WyriHaximus Aug 9, 2016
6e1ebcc
Removed dead end method via @clue https://github.com/reactphp/http-cl…
WyriHaximus Aug 9, 2016
69ab9c6
Implement ReadableStreamInterface and added missing methods for it vi…
WyriHaximus Aug 9, 2016
d4eda4d
Removed transfer encoding header as suggested by @joelwurtz at https:…
WyriHaximus Aug 15, 2016
804902f
No need to unset when we're not using it from there on
WyriHaximus Aug 15, 2016
29e283b
Implement chunked encoding overflow vulnerability prevention suggeste…
WyriHaximus Aug 16, 2016
0b43386
Removed accidental committed debug code
WyriHaximus Aug 16, 2016
07f649a
Close decoder stream after emitting error via @clue https://github.co…
WyriHaximus Aug 17, 2016
3ca85d6
Implemented max content length limit, by default set at 32KB, va @clu…
WyriHaximus Aug 17, 2016
7f303d8
Don't iterate the buffer when it is smaller then three bytes (removed…
WyriHaximus Aug 17, 2016
01ecdcb
Made it smaller then two instead of three, as two can still still be …
WyriHaximus Aug 17, 2016
37dfdf9
Renamed $maxBufferLength to $maxChunkLength
WyriHaximus Aug 20, 2016
61fb8a3
Removed maximum chunk length as data is flowing through and not buffe…
WyriHaximus Aug 20, 2016
e4bf1ea
Limit chunk header length to 1024 bytes https://github.com/reactphp/h…
WyriHaximus Aug 20, 2016
69e3335
Unnecessary "
WyriHaximus Aug 20, 2016
6aa86db
When the underlying stream ends emit an error when we have unprocesse…
WyriHaximus Aug 20, 2016
eab3c71
In case a substr fails reinitialize buffer as empty string
WyriHaximus Aug 20, 2016
34ee759
Added note about chunked encoded response to readme
WyriHaximus Aug 20, 2016
4f95569
Use write instead of end to let the stream detect the end properly
WyriHaximus Aug 21, 2016
09cde22
End reading from this stream when the end of the chunked encoding has…
WyriHaximus Aug 21, 2016
d7d0473
Close stream regardless https://github.com/reactphp/http-client/pull/…
WyriHaximus Aug 23, 2016
95277ff
Prevent CRLF DOS https://github.com/reactphp/http-client/pull/58#disc…
WyriHaximus Aug 23, 2016
9d4896f
Ignore chunked encoding trailers https://github.com/reactphp/http-cli…
WyriHaximus Aug 23, 2016
8540b4e
Mark ChunkedStreamDecoder as internal
WyriHaximus Sep 13, 2016
415d790
Gave names to exception test data
WyriHaximus Sep 13, 2016
49e9e9e
Gave names to non-exception test data
WyriHaximus Sep 13, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,5 @@ $loop->run();
## TODO

* gzip content encoding
* chunked transfer encoding
* keep-alive connections
* following redirections
186 changes: 186 additions & 0 deletions src/ChunkedStreamDecoder.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
<?php

namespace React\HttpClient;

use Evenement\EventEmitterTrait;
use Exception;
use React\Stream\ReadableStreamInterface;
use React\Stream\Util;
use React\Stream\WritableStreamInterface;

class ChunkedStreamDecoder implements ReadableStreamInterface
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we mark this as @internal or should consumers of this library be allowed to rely on this?

Afaict this should only be used internally and should not be part of our public API.

{
const CRLF = "\r\n";

use EventEmitterTrait;

/**
* @var string
*/
protected $buffer = '';

/**
* @var int
*/
protected $remainingLength = 0;

/**
* @var bool
*/
protected $nextChunkIsLength = true;

/**
* @var ReadableStreamInterface
*/
protected $stream;

/**
* @var bool
*/
protected $closed = false;

/**
* @param ReadableStreamInterface $stream
*/
public function __construct(ReadableStreamInterface $stream)
{
$this->stream = $stream;
$this->stream->on('data', array($this, 'handleData'));
$this->stream->on('end', array($this, 'handleEnd'));
Util::forwardEvents($this->stream, $this, [
'error',
]);
}

/** @internal */
public function handleData($data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/** @internal */ (just in case so we can change this in the future without introducing a BC break)

{
$this->buffer .= $data;

do {
$bufferLength = strlen($this->buffer);
$continue = $this->iterateBuffer();
$iteratedBufferLength = strlen($this->buffer);
} while (
$continue &&
$bufferLength !== $iteratedBufferLength &&
$iteratedBufferLength > 0 &&
strpos($this->buffer, static::CRLF) !== false
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be used for a DOS because all data will be buffered until a CRLF has been found (see also below).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This still seems to be an issue?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah sorry about that, I'll remove it later today when I push the rest of the changes 👍 .

);
}

protected function iterateBuffer()
{
if (strlen($this->buffer) <= 1) {
return false;
}

if ($this->nextChunkIsLength) {
$crlfPosition = strpos($this->buffer, static::CRLF);
if ($crlfPosition === false && strlen($this->buffer) > 1024) {
$this->emit('error', [
new Exception('Chunk length header longer then 1024 bytes'),
]);
$this->close();
return false;
}
if ($crlfPosition === false) {
return false; // Chunk header hasn't completely come in yet
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be used for a DOS because all data will be buffered until a CRLF has been found (see also above).

We should probably apply a limit here (1K or so) and then emit an error if no CRLF can be found.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is probably the thing that should be limited instead? The chunk header should have a reasonable length, whereas the chunk body can be of arbitrary size? :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes agreed. But how long could it be? 8, 16, 32? I really don't have a clue tbh. Maybe 1024 or 2048 is a safe bet so chunk header extensions also have enough room. Might have to look this up in the RFC :)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RFC makes no assumptions about the length: https://tools.ietf.org/html/rfc7230#section-4.1

Chunked extensions are rarely used in practice and we don't expose them anyway, so let's just play safe and assume 1KiB here? This should be plenty of room and will not cause any buffering issues even if we have thousands of streams open.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1024 bytes it is 👍 .

$this->nextChunkIsLength = false;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This block does not currently check the chunk header is actually complete. Should add a test for an incomplete chunk header without a trailing CRLF?

$lengthChunk = substr($this->buffer, 0, $crlfPosition);
if (strpos($lengthChunk, ';') !== false) {
list($lengthChunk) = explode(';', $lengthChunk, 2);
}
if (dechex(hexdec($lengthChunk)) !== $lengthChunk) {
$this->emit('error', [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should emit the error event and then close this stream (which should implicitly close the parent stream).

new Exception('Unable to validate "' . $lengthChunk . '" as chunk length header'),
]);
$this->close();
return false;
}
$this->remainingLength = hexdec($lengthChunk);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to sanitize/validate the chunk length here :) (many similar projects have suffered from a chunked encoding overflow vulnerability).

$this->buffer = substr($this->buffer, $crlfPosition + 2);
return true;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably emit an end event when the final chunk has been found, to signal a "clean close"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it should, but doesn't it make more sense to forward the event from the underlying stream rather then detecting it our self?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the underlying stream emits an end while we're expecting more data, this actually means the chunked stream should emit an error, because it contains a chunk that is now invalid.

If the chunk decoder sees a zero length chunk it means the chunked stream ends, it should no longer expect any data from the input stream. (This is in fact also used for connection reuse, as the underlying streams can be reused for the next request/response)

See for example https://github.com/clue/php-term-react/blob/bb8784283aef16b8b1fe3ed5a8fe16d4669cfc92/src/ControlCodeParser.php#L205

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the chunk decoder sees a zero length chunk it means the chunked stream ends

That might not be entirely true but I have to check it. When first writing a first POC for this PR against the twitter streams it would happen that nothing was to be send during X seconds and this twitter would send a new chunk to keep the connection alive. Now I can't recall exactly if that was a zero length chunk or not but I have to check it. But if we see a 0\r\n\r\n I think we can safely assume that the body has been fully transmitted.

See for example https://github.com/clue/php-term-react/blob/bb8784283aef16b8b1fe3ed5a8fe16d4669cfc92/src/ControlCodeParser.php#L205

That looks good, I'll implement that 👍

}

if ($this->remainingLength > 0) {
$chunkLength = $this->getChunkLength();
if ($chunkLength === 0) {
return true;
}
$this->emit('data', array(
substr($this->buffer, 0, $chunkLength),
$this
));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sold on the idea we should emit the chunk extension as part of the data message.
Do we have any specific use case for this right now?

Personally, I would rather remove this for now (reduces complexity). We can still think about introducing this once we see an actual need and we can figure out a decent API without introducing a BC break.

Also, in my projects I only emit the actual data as the other event attributes have always been unreliable across the React ecosystem and it's very easy to use a closure to add arbitrary references to the event handler anyway (e.g. https://github.com/clue/php-utf8-react/blob/master/src/Sequencer.php#L111)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very valid point, especially since there is no use case. I'll remove it

$this->remainingLength -= $chunkLength;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a design decision here: Do we really want to emit incomplete chunks or do we honor the chunk header and then only emit complete chunks?

I'm currently undecided on this, but would likely have suggested to honor the header.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reasoning behind not honoring the header is to keep as less data floating around somewhere in a buffer and handing it over to who ever is using the client as soon as possible.

I don't have any strong preference regarding this. Can write up a benchmark to see which method performs best in various situations, but not sure if that is worth the effort 😄 .

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair point: If the header says the next chunk is 2 GB in size, we should probably not allocate a 2 GB buffer :)

Let's keep this as-is, but perhaps consider adding some documentation and/or tests? 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add docs and tests 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests have been added by the way

$this->buffer = substr($this->buffer, $chunkLength);
return true;
}

$this->nextChunkIsLength = true;
$this->buffer = substr($this->buffer, 2);
return true;
}

protected function getChunkLength()
{
$bufferLength = strlen($this->buffer);

if ($bufferLength >= $this->remainingLength) {
return $this->remainingLength;
}

return $bufferLength;
}

public function pause()
{
$this->stream->pause();
}

public function resume()
{
$this->stream->resume();
}

public function isReadable()
{
return $this->stream->isReadable();
}

public function pipe(WritableStreamInterface $dest, array $options = array())
{
Util::pipe($this, $dest, $options);

return $dest;
}

public function close()
{
$this->closed = true;
return $this->stream->close();
}

/** @internal */
public function handleEnd()
{
if ($this->closed) {
return;
}

if ($this->buffer === '') {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently PHP5 tests are failing over this. Changing === to == fixes it. Investigating why.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Found the issue, in case substr fails it will return false. Forcing the buffer back into a sting in case that happens.

$this->emit('end');
$this->close();
return;
}

$this->emit(
'error',
[
new Exception('Stream ended with incomplete control code')
]
);
$this->close();
}
}
18 changes: 15 additions & 3 deletions src/Response.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,22 @@ public function __construct(DuplexStreamInterface $stream, $protocol, $version,
$this->code = $code;
$this->reasonPhrase = $reasonPhrase;
$this->headers = $headers;
$normalizedHeaders = array_change_key_case($headers, CASE_LOWER);

$stream->on('data', array($this, 'handleData'));
$stream->on('error', array($this, 'handleError'));
$stream->on('end', array($this, 'handleEnd'));
if (isset($normalizedHeaders['transfer-encoding']) && strtolower($normalizedHeaders['transfer-encoding']) === 'chunked') {
$this->stream = new ChunkedStreamDecoder($stream);

foreach ($this->headers as $key => $value) {
if (strcasecmp('transfer-encoding', $key) === 0) {
unset($this->headers[$key]);
break;
}
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically, this introduces a BC break.

How can a consumer of this library tell if the Transfer-Encoding has already been processed?

Should we raise to v0.5.0?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe remove this header on the request if it has been processed ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds like a solid way to go, what do you think @clue?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is also what I've had in mind, let's give it a try 👍

Documentation and/or tests would be much appreciated 👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do 👍


$this->stream->on('data', array($this, 'handleData'));
$this->stream->on('error', array($this, 'handleError'));
$this->stream->on('end', array($this, 'handleEnd'));
}

public function getProtocol()
Expand Down
129 changes: 129 additions & 0 deletions tests/DecodeChunkedStreamTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
<?php

namespace React\Tests\HttpClient;

use Exception;
use React\HttpClient\ChunkedStreamDecoder;
use React\Stream\ThroughStream;

class DecodeChunkedStreamTest extends TestCase
{
public function provideChunkedEncoding()
{
return [
[
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should assign names to the data sets? The outer array can use assoc keys to describe what each data set does and eventually may ease debugging. (see also below)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah should have done that from the start, now I have to figure out which is which 👍

["4\r\nWiki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"],
],
[
["4\r\nWiki\r\n", "5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"],
],
[
["4\r\nWiki\r\n", "5\r\n", "pedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"],
],
[
["4\r\nWiki\r\n", "5\r\n", "pedia\r\ne\r\n in\r\n", "\r\nchunks.\r\n0\r\n\r\n"],
],
[
["4\r\n", "Wiki\r\n", "5\r\n", "pedia\r\ne\r\n in\r\n", "\r\nchunks.\r\n0\r\n\r\n"],
],
[
["4\r\n", "Wiki\r\n", "5\r\n", "pedia\r\ne; foo=[bar,beer,pool,cue,win,won]\r\n", " in\r\n", "\r\nchunks.\r\n0\r\n\r\n"],
],
[
["4; foo=bar\r\n", "Wiki\r\n", "5\r\n", "pedia\r\ne\r\n", " in\r\n", "\r\nchunks.\r\n", "0\r\n\r\n"],
],
[
str_split("4\r\nWiki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"),
],
[
str_split("6\r\nWi\r\nki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"),
"Wi\r\nkipedia in\r\n\r\nchunks."
],
[
["6\r\nWi\r\n", "ki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"],
"Wi\r\nkipedia in\r\n\r\nchunks."
],
];
}

/**
* @test
* @dataProvider provideChunkedEncoding
*/
public function testChunkedEncoding(array $strings, $expected = "Wikipedia in\r\n\r\nchunks.")
{
$stream = new ThroughStream();
$response = new ChunkedStreamDecoder($stream);
$buffer = '';
$response->on('data', function ($data) use (&$buffer) {
$buffer .= $data;
});
$response->on('error', function (Exception $exception) {
throw $exception;
});
foreach ($strings as $string) {
$stream->write($string);
}
$this->assertSame($expected, $buffer);
}

public function provideInvalidChunkedEncoding()
{
return [
[
["4\r\nWiwot40n98w3498tw3049nyn039409t34\r\n", "ki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n"],
],
[
str_split("xyz\r\nWi\r\nki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n")
],
[
str_split(str_repeat('a', 2015) . "\r\nWi\r\nki\r\n5\r\npedia\r\ne\r\n in\r\n\r\nchunks.\r\n0\r\n\r\n")
],
];
}

/**
* @test
* @dataProvider provideInvalidChunkedEncoding
* @expectedException Exception
*/
public function testInvalidChunkedEncoding(array $strings)
{
$stream = new ThroughStream();
$response = new ChunkedStreamDecoder($stream);
$response->on('error', function (Exception $exception) {
throw $exception;
});
foreach ($strings as $string) {
$stream->write($string);
}
}

public function testHandleEnd()
{
$ended = false;
$stream = new ThroughStream();
$response = new ChunkedStreamDecoder($stream);
$response->on('end', function () use (&$ended) {
$ended = true;
});

$stream->end("4\r\nWiki\r\n0\r\n\r\n");

$this->assertTrue($ended);
}

public function testHandleEndIncomplete()
{
$exception = null;
$stream = new ThroughStream();
$response = new ChunkedStreamDecoder($stream);
$response->on('error', function ($e) use (&$exception) {
$exception = $e;
});

$stream->end("4\r\nWiki");

$this->assertInstanceOf('Exception', $exception);
}
}
Loading