Skip to content

Commit

Permalink
[FLASH-37] Json support (#159)
Browse files Browse the repository at this point in the history
  • Loading branch information
ilovesoup2000 authored Aug 22, 2019
1 parent a4bb94d commit 84ededb
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 1 deletion.
8 changes: 7 additions & 1 deletion dbms/src/Storages/Transaction/Codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <DataTypes/DataTypeDecimal.h>
#include <Storages/Transaction/TiDB.h>
#include <Storages/Transaction/TiKVVarInt.h>
#include <Storages/Transaction/JSONCodec.h>

namespace DB
{
Expand Down Expand Up @@ -290,9 +291,11 @@ Field DecodeDatum(size_t & cursor, const String & raw_value)
case TiDB::CodecFlagVarInt:
return DecodeVarInt(cursor, raw_value);
case TiDB::CodecFlagDuration:
throw Exception("Not implented yet. DecodeDatum: CodecFlagDuration", ErrorCodes::LOGICAL_ERROR);
throw Exception("Not implemented yet. DecodeDatum: CodecFlagDuration", ErrorCodes::LOGICAL_ERROR);
case TiDB::CodecFlagDecimal:
return DecodeDecimal(cursor, raw_value);
case TiDB::CodecFlagJson:
return DecodeJsonAsBinary(cursor, raw_value);
default:
throw Exception("Unknown Type:" + std::to_string(raw_value[cursor - 1]), ErrorCodes::LOGICAL_ERROR);
}
Expand Down Expand Up @@ -330,6 +333,9 @@ void SkipDatum(size_t & cursor, const String & raw_value)
case TiDB::CodecFlagDecimal:
SkipDecimal(cursor, raw_value);
return;
case TiDB::CodecFlagJson:
SkipJson(cursor, raw_value);
return;
default:
throw Exception("Unknown Type:" + std::to_string(raw_value[cursor - 1]), ErrorCodes::LOGICAL_ERROR);
}
Expand Down
276 changes: 276 additions & 0 deletions dbms/src/Storages/Transaction/JSONCodec.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
#include <Storages/Transaction/Codec.h>
#include <Storages/Transaction/JSONCodec.h>

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <Poco/JSON/Array.h>
#include <Poco/JSON/Object.h>
#pragma GCC diagnostic pop

/**
* https://github.com/pingcap/tidb/blob/release-3.0/types/json/binary.go
The binary JSON format from MySQL 5.7 is as follows:
JSON doc ::= type value
type ::=
0x01 | // large JSON object
0x03 | // large JSON array
0x04 | // literal (true/false/null)
0x05 | // int16
0x06 | // uint16
0x07 | // int32
0x08 | // uint32
0x09 | // int64
0x0a | // uint64
0x0b | // double
0x0c | // utf8mb4 string
value ::=
object |
array |
literal |
number |
string |
object ::= element-count size key-entry* value-entry* key* value*
array ::= element-count size value-entry* value*
// number of members in object or number of elements in array
element-count ::= uint32
// number of bytes in the binary representation of the object or array
size ::= uint32
key-entry ::= key-offset key-length
key-offset ::= uint32
key-length ::= uint16 // key length must be less than 64KB
value-entry ::= type offset-or-inlined-value
// This field holds either the offset to where the value is stored,
// or the value itself if it is small enough to be inlined (that is,
// if it is a JSON literal or a small enough [u]int).
offset-or-inlined-value ::= uint32
key ::= utf8mb4-data
literal ::=
0x00 | // JSON null literal
0x01 | // JSON true literal
0x02 | // JSON false literal
number ::= .... // little-endian format for [u]int(16|32|64), whereas
// double is stored in a platform-independent, eight-byte
// format using float8store()
string ::= data-length utf8mb4-data
data-length ::= uint8* // If the high bit of a byte is 1, the length
// field is continued in the next byte,
// otherwise it is the last byte of the length
// field. So we need 1 byte to represent
// lengths up to 127, 2 bytes to represent
// lengths up to 16383, and so on...
*/
namespace DB
{

namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}

using JsonVar = Poco::Dynamic::Var;

constexpr UInt8 TYPE_CODE_OBJECT = 0x01; // TypeCodeObject indicates the JSON is an object.
constexpr UInt8 TYPE_CODE_ARRAY = 0x03; // TypeCodeArray indicates the JSON is an array.
constexpr UInt8 TYPE_CODE_LITERAL = 0x04; // TypeCodeLiteral indicates the JSON is a literal.
constexpr UInt8 TYPE_CODE_INT64 = 0x09; // TypeCodeInt64 indicates the JSON is a signed integer.
constexpr UInt8 TYPE_CODE_UINT64 = 0x0a; // TypeCodeUint64 indicates the JSON is a unsigned integer.
constexpr UInt8 TYPE_CODE_FLOAT64 = 0x0b; // TypeCodeFloat64 indicates the JSON is a double float number.
constexpr UInt8 TYPE_CODE_STRING = 0x0c; // TypeCodeString indicates the JSON is a string.
constexpr UInt8 LITERAL_NIL = 0x00; // LiteralNil represents JSON null.
constexpr UInt8 LITERAL_TRUE = 0x01; // LiteralTrue represents JSON true.
constexpr UInt8 LITERAL_FALSE = 0x02; // LiteralFalse represents JSON false.

constexpr size_t VALUE_ENTRY_SIZE = 5;
constexpr size_t KEY_ENTRY_LENGTH = 6;
constexpr size_t PREFIX_LENGTH = 8;

using JsonArrayPtr = Poco::JSON::Array::Ptr;
using JsonObjectPtr = Poco::JSON::Object::Ptr;

JsonArrayPtr decodeArray(size_t & cursor, const String & raw_value);
JsonObjectPtr decodeObject(size_t & cursor, const String & raw_value);
inline JsonVar decodeLiteral(size_t & cursor, const String & raw_value);
inline String decodeString(size_t & cursor, const String & raw_value);
JsonVar decodeValue(UInt8 type, size_t & cursor, const String & raw_value);

// Below funcs decode via relative offset and base offset does not move
JsonVar decodeValueEntry(size_t base, const String & raw_value, size_t value_offset);
inline String decodeString(size_t base, const String & raw_value, size_t length);

template <typename T>
inline T decodeNumeric(size_t & cursor, const String & raw_value)
{
T res = *(reinterpret_cast<const T *>(raw_value.data() + cursor));
cursor += sizeof(T);
return res;
}

JsonObjectPtr decodeObject(size_t & cursor, const String & raw_value)
{
UInt32 element_count = decodeNumeric<UInt32>(cursor, raw_value);
size_t size = decodeNumeric<UInt32>(cursor, raw_value);
size_t base = cursor;
JsonObjectPtr obj_ptr = new Poco::JSON::Object();

for (UInt32 i = 0; i < element_count; i++)
{
// offset points to head of string content instead of length so - 2
size_t entry_base = base + i * KEY_ENTRY_LENGTH;
size_t key_offset = base + decodeNumeric<UInt32>(entry_base, raw_value) - 8;
size_t key_length = decodeNumeric<UInt16>(entry_base, raw_value);
String key = decodeString(key_offset, raw_value, key_length);

JsonVar val = decodeValueEntry(base, raw_value, element_count * KEY_ENTRY_LENGTH + i * VALUE_ENTRY_SIZE);
obj_ptr->set(key, val);
}
cursor += size - 8;

return obj_ptr;
}

JsonArrayPtr decodeArray(size_t & cursor, const String & raw_value)
{
UInt32 element_count = decodeNumeric<UInt32>(cursor, raw_value);
size_t size = decodeNumeric<UInt32>(cursor, raw_value);

JsonArrayPtr array_ptr = new Poco::JSON::Array();
for (UInt32 i = 0; i < element_count; i++)
{
JsonVar val = decodeValueEntry(cursor, raw_value, VALUE_ENTRY_SIZE * i);
array_ptr->add(val);
}
cursor += size - 8;
return array_ptr;
}

JsonVar decodeValueEntry(size_t base, const String & raw_value, size_t value_entry_offset)
{
UInt8 type = raw_value[base + value_entry_offset];
size_t abs_entry_offset = base + value_entry_offset + 1;

if (type == TYPE_CODE_LITERAL)
{
return decodeLiteral(abs_entry_offset, raw_value);
}

size_t value_offset = base + decodeNumeric<UInt32>(abs_entry_offset, raw_value) - PREFIX_LENGTH;
return decodeValue(type, value_offset, raw_value);
}

JsonVar decodeValue(UInt8 type, size_t & cursor, const String & raw_value)
{
switch (type) // JSON Root element type
{
case TYPE_CODE_OBJECT:
return decodeObject(cursor, raw_value);
case TYPE_CODE_ARRAY:
return decodeArray(cursor, raw_value);
case TYPE_CODE_LITERAL:
return decodeLiteral(cursor, raw_value);
case TYPE_CODE_INT64:
return JsonVar(decodeNumeric<Int64>(cursor, raw_value));
case TYPE_CODE_UINT64:
return JsonVar(decodeNumeric<UInt64>(cursor, raw_value));
case TYPE_CODE_FLOAT64:
return JsonVar(decodeNumeric<Float64>(cursor, raw_value));
case TYPE_CODE_STRING:
return JsonVar(decodeString(cursor, raw_value));
default:
throw Exception("decodeValue: Unknown JSON Element Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR);
}
}

inline JsonVar decodeLiteral(size_t & cursor, const String & raw_value)
{
UInt8 type = raw_value[cursor++];
switch (type)
{
case LITERAL_FALSE:
return JsonVar(false);
case LITERAL_NIL:
return JsonVar();
case LITERAL_TRUE:
return JsonVar(true);
default:
throw Exception("decodeLiteral: Unknown JSON Literal Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR);
}
}

inline String decodeString(size_t base, const String & raw_value, size_t length) { return String(raw_value, base, length); }

inline String decodeString(size_t & cursor, const String & raw_value)
{
size_t length = DecodeVarUInt(cursor, raw_value);
String val = String(raw_value, cursor, length);
cursor += length;
return val;
}

String DecodeJsonAsString(size_t & cursor, const String & raw_value)
{
UInt8 type = raw_value[cursor++];
return decodeValue(type, cursor, raw_value);
}

template<bool doDecode>
struct need_decode{};


template<>
struct need_decode<true>{ typedef String type; };

template<>
struct need_decode<false>{ typedef void type; };

template <bool doDecode>
typename need_decode<doDecode>::type DecodeJson(size_t & cursor, const String & raw_value)
{
size_t base = cursor;
UInt8 type = raw_value[cursor++];
size_t size = 0;

switch (type) // JSON Root element type
{
case TYPE_CODE_OBJECT:
cursor += 4;
size = decodeNumeric<UInt32>(cursor, raw_value);
break;
case TYPE_CODE_ARRAY:
cursor += 4;
size = decodeNumeric<UInt32>(cursor, raw_value);
break;
case TYPE_CODE_LITERAL:
size = 1;
break;
case TYPE_CODE_INT64:
case TYPE_CODE_UINT64:
case TYPE_CODE_FLOAT64:
size = 8;
break;
case TYPE_CODE_STRING:
size = DecodeVarUInt(cursor, raw_value);
size += (cursor - base - 1);
break;
default:
throw Exception("DecodeJsonBinary: Unknown JSON Element Type:" + std::to_string(type), ErrorCodes::LOGICAL_ERROR);
}

size++;
cursor = base + size;
if (!doDecode)
return static_cast<typename need_decode<doDecode>::type>(0);
else
return static_cast<typename need_decode<doDecode>::type>(raw_value.substr(base, size));
}

void SkipJson(size_t & cursor, const String & raw_value)
{
DecodeJson<false>(cursor, raw_value);
}

String DecodeJsonAsBinary(size_t & cursor, const String & raw_value)
{
return DecodeJson<true>(cursor, raw_value);
}

} // namespace DB
12 changes: 12 additions & 0 deletions dbms/src/Storages/Transaction/JSONCodec.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#include <Core/Types.h>

namespace DB
{

void SkipJson(size_t & cursor, const String & raw_value);
String DecodeJsonAsBinary(size_t & cursor, const String & raw_value);
String DecodeJsonAsString(size_t & cursor, const String & raw_value);

} // namespace DB

0 comments on commit 84ededb

Please sign in to comment.