Skip to content

Commit

Permalink
Don't require nul-terminated string inputs
Browse files Browse the repository at this point in the history
Avoid giving '\0' characters special treatment in JSON parsing code. This way
the parser will reject input strings like "[1,2,3]\0-extra-nonsense" instead of
returning a partially parsed interpretation of the input. It also allows the
parser to be called with char pointers that might not be nul terminated (like
the char pointers returned by std::string::data()).

Fixes following test failures in https://github.com/nst/JSONTestSuite:

bitcoin SHOULD_HAVE_FAILED  n_multidigit_number_then_00.json
  • Loading branch information
ryanofsky committed Nov 8, 2016
1 parent 3f03bfd commit fd32d1a
Show file tree
Hide file tree
Showing 8 changed files with 48 additions and 23 deletions.
8 changes: 7 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ libunivalue_la_LDFLAGS = \
-no-undefined
libunivalue_la_CXXFLAGS = -I$(top_srcdir)/include

TESTS = test/unitester
TESTS = test/unitester test/no_nul

GENBIN = gen/gen$(BUILD_EXEEXT)
GEN_SRCS = gen/gen.cpp
Expand All @@ -42,6 +42,11 @@ test_unitester_LDADD = libunivalue.la
test_unitester_CXXFLAGS = -I$(top_srcdir)/include -DJSON_TEST_SRC=\"$(srcdir)/$(TEST_DATA_DIR)\"
test_unitester_LDFLAGS = -static $(LIBTOOL_APP_LDFLAGS)

test_no_nul_SOURCES = test/no_nul.cpp
test_no_nul_LDADD = libunivalue.la
test_no_nul_CXXFLAGS = -I$(top_srcdir)/include
test_no_nul_LDFLAGS = -static $(LIBTOOL_APP_LDFLAGS)

TEST_FILES = \
$(TEST_DATA_DIR)/fail10.json \
$(TEST_DATA_DIR)/fail11.json \
Expand Down Expand Up @@ -77,6 +82,7 @@ TEST_FILES = \
$(TEST_DATA_DIR)/fail39.json \
$(TEST_DATA_DIR)/fail40.json \
$(TEST_DATA_DIR)/fail41.json \
$(TEST_DATA_DIR)/fail42.json \
$(TEST_DATA_DIR)/fail3.json \
$(TEST_DATA_DIR)/fail4.json \
$(TEST_DATA_DIR)/fail5.json \
Expand Down
5 changes: 3 additions & 2 deletions include/univalue.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,10 @@ class UniValue {
std::string write(unsigned int prettyIndent = 0,
unsigned int indentLevel = 0) const;

bool read(const char *raw, size_t len);
bool read(const char *raw);
bool read(const std::string& rawStr) {
return read(rawStr.c_str());
return read(rawStr.data(), rawStr.size());
}

private:
Expand Down Expand Up @@ -240,7 +241,7 @@ enum jtokentype {
};

extern enum jtokentype getJsonToken(std::string& tokenVal,
unsigned int& consumed, const char *raw);
unsigned int& consumed, const char *raw, const char *end);
extern const char *uvTypeName(UniValue::VType t);

static inline bool jsonTokenIsValue(enum jtokentype jtt)
Expand Down
2 changes: 1 addition & 1 deletion lib/univalue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ static bool validNumStr(const string& s)
{
string tokenVal;
unsigned int consumed;
enum jtokentype tt = getJsonToken(tokenVal, consumed, s.c_str());
enum jtokentype tt = getJsonToken(tokenVal, consumed, s.data(), s.data() + s.size());
return (tt == JTOK_NUMBER);
}

Expand Down
46 changes: 27 additions & 19 deletions lib/univalue_read.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,21 @@ static const char *hatoui(const char *first, const char *last,
}

enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed,
const char *raw)
const char *raw, const char *end)
{
tokenVal.clear();
consumed = 0;

const char *rawStart = raw;

while ((*raw) && (json_isspace(*raw))) // skip whitespace
while (raw < end && (json_isspace(*raw))) // skip whitespace
raw++;

switch (*raw) {

case 0:
if (raw >= end)
return JTOK_NONE;

switch (*raw) {

case '{':
raw++;
consumed = (raw - rawStart);
Expand Down Expand Up @@ -127,40 +127,40 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed,
numStr += *raw; // copy first char
raw++;

if ((*first == '-') && (!json_isdigit(*raw)))
if ((*first == '-') && (raw < end) && (!json_isdigit(*raw)))
return JTOK_ERR;

while ((*raw) && json_isdigit(*raw)) { // copy digits
while (raw < end && json_isdigit(*raw)) { // copy digits
numStr += *raw;
raw++;
}

// part 2: frac
if (*raw == '.') {
if (raw < end && *raw == '.') {
numStr += *raw; // copy .
raw++;

if (!json_isdigit(*raw))
if (raw >= end || !json_isdigit(*raw))
return JTOK_ERR;
while ((*raw) && json_isdigit(*raw)) { // copy digits
while (raw < end && json_isdigit(*raw)) { // copy digits
numStr += *raw;
raw++;
}
}

// part 3: exp
if (*raw == 'e' || *raw == 'E') {
if (raw < end && (*raw == 'e' || *raw == 'E')) {
numStr += *raw; // copy E
raw++;

if (*raw == '-' || *raw == '+') { // copy +/-
if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/-
numStr += *raw;
raw++;
}

if (!json_isdigit(*raw))
if (raw >= end || !json_isdigit(*raw))
return JTOK_ERR;
while ((*raw) && json_isdigit(*raw)) { // copy digits
while (raw < end && json_isdigit(*raw)) { // copy digits
numStr += *raw;
raw++;
}
Expand All @@ -177,13 +177,16 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed,
string valStr;
JSONUTF8StringFilter writer(valStr);

while (*raw) {
while (raw < end) {
if ((unsigned char)*raw < 0x20)
return JTOK_ERR;

else if (*raw == '\\') {
raw++; // skip backslash

if (raw >= end)
return JTOK_ERR;

switch (*raw) {
case '"': writer.push_back('\"'); break;
case '\\': writer.push_back('\\'); break;
Expand All @@ -196,7 +199,8 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed,

case 'u': {
unsigned int codepoint;
if (hatoui(raw + 1, raw + 1 + 4, codepoint) !=
if (raw + 1 + 4 >= end ||
hatoui(raw + 1, raw + 1 + 4, codepoint) !=
raw + 1 + 4)
return JTOK_ERR;
writer.push_back_u(codepoint);
Expand Down Expand Up @@ -246,7 +250,7 @@ enum expect_bits {
#define setExpect(bit) (expectMask |= EXP_##bit)
#define clearExpect(bit) (expectMask &= ~EXP_##bit)

bool UniValue::read(const char *raw)
bool UniValue::read(const char *raw, size_t size)
{
clear();

Expand All @@ -257,10 +261,11 @@ bool UniValue::read(const char *raw)
unsigned int consumed;
enum jtokentype tok = JTOK_NONE;
enum jtokentype last_tok = JTOK_NONE;
const char* end = raw + size;
do {
last_tok = tok;

tok = getJsonToken(tokenVal, consumed, raw);
tok = getJsonToken(tokenVal, consumed, raw, end);
if (tok == JTOK_NONE || tok == JTOK_ERR)
return false;
raw += consumed;
Expand Down Expand Up @@ -432,10 +437,13 @@ bool UniValue::read(const char *raw)
} while (!stack.empty ());

/* Check that nothing follows the initial construct (parsed above). */
tok = getJsonToken(tokenVal, consumed, raw);
tok = getJsonToken(tokenVal, consumed, raw, end);
if (tok != JTOK_NONE)
return false;

return true;
}

bool UniValue::read(const char *raw) {
return read(raw, strlen(raw));
}
1 change: 1 addition & 0 deletions test/.gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
unitester
no_nul

*.trs
*.log
Binary file added test/fail42.json
Binary file not shown.
8 changes: 8 additions & 0 deletions test/no_nul.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#include "univalue.h"

int main (int argc, char *argv[])
{
char buf[] = "___[1,2,3]___";
UniValue val;
return val.read(buf + 3, 7) ? 0 : 1;
}
1 change: 1 addition & 0 deletions test/unitester.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ static const char *filenames[] = {
"fail39.json", // invalid unicode: only second half of surrogate pair
"fail40.json", // invalid unicode: broken UTF-8
"fail41.json", // invalid unicode: unfinished UTF-8
"fail42.json", // valid json with garbage following a nul byte
"fail3.json",
"fail4.json", // extra comma
"fail5.json",
Expand Down

0 comments on commit fd32d1a

Please sign in to comment.