Skip to content

Commit 484f93d

Browse files
committed
Add support for UTF-8 encoding in JSON serialization
1 parent 5e3728c commit 484f93d

File tree

1 file changed

+58
-34
lines changed

1 file changed

+58
-34
lines changed

src/lib/ndpi_serializer.c

+58-34
Original file line numberDiff line numberDiff line change
@@ -88,47 +88,71 @@ static int ndpi_is_number(const char *str, u_int32_t str_len) {
8888
* Upon successful return, these functions return the number of characters printed (excluding the null byte used to terminate the string).
8989
*/
9090
int ndpi_json_string_escape(const char *src, int src_len, char *dst, int dst_max_len) {
91-
char c = 0;
91+
u_char c = 0;
9292
int i, j = 0;
9393

9494
dst[j++] = '"';
9595

9696
for(i = 0; i < src_len && j < dst_max_len; i++) {
9797

98-
c = src[i];
98+
c = (u_char) src[i];
99+
100+
if (c < 0x20 /* ' ' */ || c == 0x7F) {
101+
; // Non-printable ASCII character (skip)
102+
} else if (c < 0x7F) {
103+
/* Valid ASCII character (escape if required) */
104+
switch (c) {
105+
case '\\':
106+
case '"':
107+
case '/':
108+
dst[j++] = '\\';
109+
dst[j++] = c;
110+
break;
111+
case '\b':
112+
dst[j++] = '\\';
113+
dst[j++] = 'b';
114+
break;
115+
case '\t':
116+
dst[j++] = '\\';
117+
dst[j++] = 't';
118+
break;
119+
case '\n':
120+
dst[j++] = '\\';
121+
dst[j++] = 'n';
122+
break;
123+
case '\f':
124+
dst[j++] = '\\';
125+
dst[j++] = 'f';
126+
break;
127+
case '\r':
128+
dst[j++] = '\\';
129+
dst[j++] = 'r';
130+
break;
131+
default:
132+
dst[j++] = c;
133+
}
99134

100-
switch (c) {
101-
case '\\':
102-
case '"':
103-
case '/':
104-
dst[j++] = '\\';
135+
} else if ((c >= 0xC2 && c <= 0xDF) &&
136+
((u_char) src[i+1] >= 0x80 && (u_char) src[i+1] <= 0xBF)) {
137+
// 2-byte sequence (U+0080 to U+07FF)
105138
dst[j++] = c;
106-
break;
107-
case '\b':
108-
dst[j++] = '\\';
109-
dst[j++] = 'b';
110-
break;
111-
case '\t':
112-
dst[j++] = '\\';
113-
dst[j++] = 't';
114-
break;
115-
case '\n':
116-
dst[j++] = '\\';
117-
dst[j++] = 'n';
118-
break;
119-
case '\f':
120-
dst[j++] = '\\';
121-
dst[j++] = 'f';
122-
break;
123-
case '\r':
124-
dst[j++] = '\\';
125-
dst[j++] = 'r';
126-
break;
127-
default:
128-
if(c < ' ')
129-
; /* non printable */
130-
else
131-
dst[j++] = c;
139+
dst[j++] = src[++i];
140+
} else if ((c >= 0xE0 && c <= 0xEF) &&
141+
((u_char) src[i+1] >= 0x80 && (u_char) src[i+1] <= 0xBF) &&
142+
((u_char) src[i+2] >= 0x80 && (u_char) src[i+2] <= 0xBF)) {
143+
// 3-byte sequence (U+0800 to U+FFFF)
144+
dst[j++] = c;
145+
dst[j++] = src[++i];
146+
dst[j++] = src[++i];
147+
} else if ((c >= 0xF0 && c <= 0xF4) &&
148+
((u_char) src[i+1] >= 0x80 && (u_char) src[i+1] <= 0xBF) &&
149+
((u_char) src[i+2] >= 0x80 && (u_char) src[i+2] <= 0xBF) &&
150+
((u_char) src[i+3] >= 0x80 && (u_char) src[i+3] <= 0xBF)) {
151+
// 4-byte sequence (U+10000 to U+10FFiFF)
152+
dst[j++] = c;
153+
dst[j++] = src[++i];
154+
dst[j++] = src[++i];
155+
dst[j++] = src[++i];
132156
}
133157
}
134158

@@ -1274,7 +1298,7 @@ int ndpi_serialize_uint32_binary(ndpi_serializer *_serializer,
12741298
int rc;
12751299

12761300
if(serializer->fmt == ndpi_serialization_format_json)
1277-
needed += 24 + slen;
1301+
needed += 24 + slen*2 /* account escape (x2) */;
12781302

12791303
if(buff_diff < needed) {
12801304
if(ndpi_extend_serializer_buffer(&serializer->buffer, needed - buff_diff) < 0)

0 commit comments

Comments
 (0)