-
Notifications
You must be signed in to change notification settings - Fork 5.3k
kafka: new deserializers #17202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
kafka: new deserializers #17202
Changes from all commits
2cff84e
24a8634
5bd1a50
be3d2d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -177,6 +177,7 @@ class BooleanDeserializer : public Deserializer<bool> { | |
| * Impl note: | ||
| * This implementation is equivalent to the one present in Kafka 2.4.0, what means that for 5-byte | ||
| * inputs, the data at bits 5-7 in 5th byte are *ignored* (as long as 8th bit is unset). | ||
| * Reference: org.apache.kafka.common.utils.ByteUtils.readUnsignedVarint | ||
| */ | ||
| class VarUInt32Deserializer : public Deserializer<uint32_t> { | ||
| public: | ||
|
|
@@ -224,6 +225,90 @@ class VarUInt32Deserializer : public Deserializer<uint32_t> { | |
| bool ready_ = false; | ||
| }; | ||
|
|
||
| /** | ||
| * Deserializer for Kafka 'varint' type. | ||
| * Encoding documentation: https://kafka.apache.org/24/protocol.html#protocol_types | ||
| * | ||
| * Impl note: | ||
| * This implementation is equivalent to the one present in Kafka 2.4.0, what means that for 5-byte | ||
| * inputs, the data at bits 5-7 in 5th byte are *ignored* (as long as 8th bit is unset). | ||
| * Reference: org.apache.kafka.common.utils.ByteUtils.readVarint | ||
| */ | ||
| class VarInt32Deserializer : public Deserializer<int32_t> { | ||
| public: | ||
| VarInt32Deserializer() = default; | ||
|
|
||
| uint32_t feed(absl::string_view& data) override { return varuint32_deserializer_.feed(data); } | ||
|
|
||
| bool ready() const override { return varuint32_deserializer_.ready(); } | ||
|
|
||
| int32_t get() const override { | ||
| const uint32_t res = varuint32_deserializer_.get(); | ||
| return (res >> 1) ^ -(res & 1); | ||
| } | ||
|
|
||
| private: | ||
| VarUInt32Deserializer varuint32_deserializer_; | ||
| }; | ||
|
|
||
| /** | ||
| * Deserializer for Kafka 'varlong' type. | ||
| * Encoding documentation: https://kafka.apache.org/24/protocol.html#protocol_types | ||
| * | ||
| * Impl note: | ||
| * This implementation is equivalent to the one present in Kafka 2.4.0, what means that for 10-byte | ||
| * inputs, the data at bits 3-7 in 10th byte are *ignored* (as long as 8th bit is unset). | ||
| * Reference: org.apache.kafka.common.utils.ByteUtils.readVarlong | ||
| */ | ||
| class VarInt64Deserializer : public Deserializer<int64_t> { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we eventually need a VarUInt64Deserializer that this deserializer can be written on top off like the above?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, fortunately not - its only int that has |
||
| public: | ||
| VarInt64Deserializer() = default; | ||
|
|
||
| uint32_t feed(absl::string_view& data) override { | ||
| uint32_t processed = 0; | ||
| while (!ready_ && !data.empty()) { | ||
|
|
||
| // Read next byte from input. | ||
| uint8_t el; | ||
| safeMemcpy(&el, data.data()); | ||
| data = {data.data() + 1, data.size() - 1}; | ||
| processed++; | ||
|
|
||
| // Put the 7 bits where they should have been. | ||
| // Impl note: the cast is done to avoid undefined behaviour when offset_ >= 63 and some bits | ||
| // at positions 3-7 are set (we would have left shift of signed value that does not fit in | ||
| // data type). | ||
| bytes_ |= ((static_cast<uint64_t>(el) & 0x7f) << offset_); | ||
| if ((el & 0x80) == 0) { | ||
| // If this was the last byte to process (what is marked by unset highest bit), we are done. | ||
| ready_ = true; | ||
| break; | ||
| } else { | ||
| // Otherwise, we need to read next byte. | ||
| offset_ += 7; | ||
| // Valid input can have at most 10 bytes. | ||
| if (offset_ >= 10 * 7) { | ||
| ExceptionUtil::throwEnvoyException( | ||
| "VarInt64 is too long (10th byte has highest bit set)"); | ||
| } | ||
| } | ||
| } | ||
| return processed; | ||
| } | ||
|
|
||
| bool ready() const override { return ready_; } | ||
|
|
||
| int64_t get() const override { | ||
| // Do the final conversion, this is a zig-zag encoded signed value. | ||
| return (bytes_ >> 1) ^ -(bytes_ & 1); | ||
| } | ||
|
|
||
| private: | ||
| uint64_t bytes_ = 0; | ||
| uint32_t offset_ = 0; | ||
| bool ready_ = false; | ||
| }; | ||
|
|
||
| /** | ||
| * Deserializer of string value. | ||
| * First reads length (INT16) and then allocates the buffer of given length. | ||
|
|
@@ -373,10 +458,10 @@ class BytesDeserializer : public Deserializer<Bytes> { | |
|
|
||
| /** | ||
| * Deserializer of compact bytes value. | ||
| * First reads length (UNSIGNED_VARINT) and then allocates the buffer of given length. | ||
| * First reads length (UNSIGNED_VARINT32) and then allocates the buffer of given length. | ||
| * | ||
| * From Kafka documentation: | ||
| * First the length N+1 is given as an UNSIGNED_VARINT. Then N bytes follow. | ||
| * First the length N+1 is given as an UNSIGNED_VARINT32. Then N bytes follow. | ||
| */ | ||
| class CompactBytesDeserializer : public Deserializer<Bytes> { | ||
| public: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we add class comments like the other classes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great idea!