Skip to content

Commit 748537e

Browse files
committed
[FFI] Serialization To/From JSONGraph
This PR implements serialization function for generic ffi::Any based on the reflection that is preserves the overall object graph reference relation. These extra APIs are implemented through the reflection system. They can be used to further modernize and unify the serialization mechanisms in the project under the new reflection mechanism.
1 parent a8bd559 commit 748537e

File tree

8 files changed

+998
-7
lines changed

8 files changed

+998
-7
lines changed

ffi/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ if (TVM_FFI_USE_EXTRA_CXX_API)
6868
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/structural_hash.cc"
6969
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/json_parser.cc"
7070
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/json_writer.cc"
71+
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/serialization.cc"
7172
)
7273
endif()
7374

ffi/include/tvm/ffi/extra/base64.h

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
/*!
20+
*
21+
* \file tvm/ffi/extra/base64.h
22+
* \brief Base64 encoding and decoding utilities
23+
*/
24+
#ifndef TVM_FFI_EXTRA_BASE64_H_
25+
#define TVM_FFI_EXTRA_BASE64_H_
26+
27+
#include <tvm/ffi/string.h>
28+
29+
#include <string>
30+
31+
namespace tvm {
32+
namespace ffi {
33+
/*!
34+
* \brief Encode a byte array into a base64 string
35+
* \param bytes The byte array to encode
36+
* \return The base64 encoded string
37+
*/
38+
inline String Base64Encode(TVMFFIByteArray bytes) {
39+
// encoding every 3 bytes into 4 characters
40+
constexpr const char kEncodeTable[] =
41+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
42+
std::string encoded;
43+
encoded.reserve(4 * (bytes.size + 2) / 3);
44+
45+
for (size_t i = 0; i < (bytes.size / 3) * 3; i += 3) {
46+
int32_t buf[3];
47+
buf[0] = static_cast<int32_t>(bytes.data[i]);
48+
buf[1] = static_cast<int32_t>(bytes.data[i + 1]);
49+
buf[2] = static_cast<int32_t>(bytes.data[i + 2]);
50+
encoded.push_back(kEncodeTable[buf[0] >> 2]);
51+
encoded.push_back(kEncodeTable[((buf[0] << 4) | (buf[1] >> 4)) & 0x3F]);
52+
encoded.push_back(kEncodeTable[((buf[1] << 2) | (buf[2] >> 6)) & 0x3F]);
53+
encoded.push_back(kEncodeTable[buf[2] & 0x3F]);
54+
}
55+
if (bytes.size % 3 == 1) {
56+
int32_t buf[1] = {static_cast<int32_t>(bytes.data[bytes.size - 1])};
57+
encoded.push_back(kEncodeTable[buf[0] >> 2]);
58+
encoded.push_back(kEncodeTable[(buf[0] << 4) & 0x3F]);
59+
encoded.push_back('=');
60+
encoded.push_back('=');
61+
} else if (bytes.size % 3 == 2) {
62+
int32_t buf[2] = {static_cast<int32_t>(bytes.data[bytes.size - 2]),
63+
static_cast<int32_t>(bytes.data[bytes.size - 1])};
64+
encoded.push_back(kEncodeTable[buf[0] >> 2]);
65+
encoded.push_back(kEncodeTable[((buf[0] << 4) | (buf[1] >> 4)) & 0x3F]);
66+
encoded.push_back(kEncodeTable[(buf[1] << 2) & 0x3F]);
67+
encoded.push_back('=');
68+
}
69+
return String(encoded);
70+
}
71+
72+
/*!
73+
* \brief Encode a bytes object into a base64 string
74+
* \param data The bytes object to encode
75+
* \return The base64 encoded string
76+
*/
77+
inline String Base64Encode(const Bytes& data) {
78+
return Base64Encode(TVMFFIByteArray{data.data(), data.size()});
79+
}
80+
81+
/*!
82+
* \brief Decode a base64 string into a byte array
83+
* \param data The base64 encoded string to decode
84+
* \return The decoded byte array
85+
*/
86+
inline Bytes Base64Decode(TVMFFIByteArray bytes) {
87+
constexpr const char kDecodeTable[] = {
88+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90+
62, // '+'
91+
0, 0, 0,
92+
63, // '/'
93+
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
94+
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
95+
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
96+
0, 0, 0, 0, 0, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
97+
42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
98+
};
99+
std::string decoded;
100+
decoded.reserve(bytes.size * 3 / 4);
101+
if (bytes.size == 0) return Bytes();
102+
TVM_FFI_ICHECK(bytes.size % 4 == 0) << "invalid base64 encoding";
103+
// leverage this property to simplify decoding
104+
static_assert('=' < sizeof(kDecodeTable) && kDecodeTable[static_cast<size_t>('=')] == 0);
105+
// base64 is always multiple of 4 bytes
106+
for (size_t i = 0; i < bytes.size; i += 4) {
107+
// decode every 4 characters into 24bits, each character contains 6 bits
108+
// note that = is also decoded as 0, which is safe to skip
109+
int32_t buf[4] = {
110+
static_cast<int32_t>(bytes.data[i]),
111+
static_cast<int32_t>(bytes.data[i + 1]),
112+
static_cast<int32_t>(bytes.data[i + 2]),
113+
static_cast<int32_t>(bytes.data[i + 3]),
114+
};
115+
int32_t value_i24 = (static_cast<int32_t>(kDecodeTable[buf[0]]) << 18) |
116+
(static_cast<int32_t>(kDecodeTable[buf[1]]) << 12) |
117+
(static_cast<int32_t>(kDecodeTable[buf[2]]) << 6) |
118+
static_cast<int32_t>(kDecodeTable[buf[3]]);
119+
// unpack 24bits into 3 bytes, each contains 8 bits
120+
decoded.push_back(static_cast<char>((value_i24 >> 16) & 0xFF));
121+
if (buf[2] != '=') {
122+
decoded.push_back(static_cast<char>((value_i24 >> 8) & 0xFF));
123+
}
124+
if (buf[3] != '=') {
125+
decoded.push_back(static_cast<char>(value_i24 & 0xFF));
126+
}
127+
}
128+
return Bytes(decoded);
129+
}
130+
131+
/*!
132+
* \brief Decode a base64 string into a byte array
133+
* \param data The base64 encoded string to decode
134+
* \return The decoded byte array
135+
*/
136+
inline Bytes Base64Decode(const String& data) {
137+
return Base64Decode(TVMFFIByteArray{data.data(), data.size()});
138+
}
139+
140+
} // namespace ffi
141+
} // namespace tvm
142+
#endif // TVM_FFI_EXTRA_BASE64_H_

ffi/include/tvm/ffi/extra/json.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919
/*!
20-
* \file tvm/ffi/json/json.h
20+
* \file tvm/ffi/extra/json.h
2121
* \brief Minimal lightweight JSON parsing and serialization utilities
2222
*/
2323
#ifndef TVM_FFI_EXTRA_JSON_H_
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
/*!
20+
* \file tvm/ffi/extra/serialization.h
21+
* \brief Reflection-based serialization utilities
22+
*/
23+
#ifndef TVM_FFI_EXTRA_SERIALIZATION_H_
24+
#define TVM_FFI_EXTRA_SERIALIZATION_H_
25+
26+
#include <tvm/ffi/extra/base.h>
27+
#include <tvm/ffi/extra/json.h>
28+
29+
namespace tvm {
30+
namespace ffi {
31+
32+
/**
33+
* \brief Serialize ffi::Any to a JSON that stores the object graph.
34+
*
35+
* The JSON graph structure is stored as follows:
36+
*
37+
* ```json
38+
* {
39+
* "root_index": <int>, // Index of root node in nodes array
40+
* "nodes": [<node>, ...], // Array of serialized nodes
41+
* "metadata": <object> // Optional metadata
42+
* }
43+
* ```
44+
*
45+
* Each node has the format: `{"type": "<type_key>", "data": <type_data>}`
46+
* For object types and strings, the data may contain indices to other nodes.
47+
* For object fields whose static type is known as a primitive type, it is stored directly,
48+
* otherwise, it is stored as a reference to the nodes array by an index.
49+
*
50+
* This function preserves the type and multiple references to the same object,
51+
* which is useful for debugging and serialization.
52+
*
53+
* \param value The ffi::Any value to serialize.
54+
* \param metadata Extra metadata attached to "metadata" field of the JSON object.
55+
* \return The serialized JSON value.
56+
*/
57+
TVM_FFI_EXTRA_CXX_API json::Value ToJSONGraph(const Any& value, const Any& metadata = Any(nullptr));
58+
59+
/**
60+
* \brief Deserialize a JSON that stores the object graph to an ffi::Any value.
61+
*
62+
* This function can be used to implement deserialization
63+
* and debugging.
64+
*
65+
* \param value The JSON value to deserialize.
66+
* \return The deserialized object graph.
67+
*/
68+
TVM_FFI_EXTRA_CXX_API Any FromJSONGraph(const json::Value& value);
69+
70+
} // namespace ffi
71+
} // namespace tvm
72+
#endif // TVM_FFI_EXTRA_SERIALIZATION_H_

0 commit comments

Comments
 (0)