Skip to content

Commit

Permalink
Add yyjson_locate_pos() function: #166
Browse files Browse the repository at this point in the history
  • Loading branch information
ibireme committed May 7, 2024
1 parent b21c029 commit 8f609cf
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 0 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.


## Unreleased
#### Added
- Add `yyjson_locate_pos()` function to locate the line and column number for error position.


## 0.9.0 (2024-04-08)
#### Added
- Add `YYJSON_WRITE_NEWLINE_AT_END` flag for JSON writer: #147
Expand Down
36 changes: 36 additions & 0 deletions src/yyjson.c
Original file line number Diff line number Diff line change
Expand Up @@ -1831,6 +1831,42 @@ bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) {
}
}

bool yyjson_locate_pos(const char *str, size_t len, size_t pos,
size_t *line, size_t *col, size_t *chr) {
usize line_sum = 0, line_pos = 0, chr_sum = 0;
const u8 *cur = (const u8 *)str;
const u8 *end = cur + pos;

if (!str || pos > len) {
if (line) *line = 0;
if (col) *col = 0;
if (chr) *chr = 0;
return false;
}

while (cur < end) {
u8 c = *cur;
chr_sum += 1;
if (likely(c < 0x80)) { /* 0xxxxxxx (0x00-0x7F) ASCII */
if (c == '\n') {
line_sum += 1;
line_pos = chr_sum;
}
cur += 1;
}
else if (c < 0xC0) cur += 1; /* 10xxxxxx (0x80-0xBF) Invalid */
else if (c < 0xE0) cur += 2; /* 110xxxxx (0xC0-0xDF) 2-byte UTF-8 */
else if (c < 0xF0) cur += 3; /* 1110xxxx (0xE0-0xEF) 3-byte UTF-8 */
else if (c < 0xF8) cur += 4; /* 11110xxx (0xF0-0xF7) 4-byte UTF-8 */
else cur += 1; /* 11111xxx (0xF8-0xFF) Invalid */
}

if (line) *line = line_sum + 1;
if (col) *col = chr_sum - line_pos + 1;
if (chr) *chr = chr_sum;
return true;
}



#if !YYJSON_DISABLE_UTILS
Expand Down
18 changes: 18 additions & 0 deletions src/yyjson.h
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,24 @@ typedef struct yyjson_read_err {
size_t pos;
} yyjson_read_err;

/**
Locate the line and column number for a byte position in a string.
This can be used to get better description for error position.
@param str The input string.
@param len The byte length of the input string.
@param pos The byte position within the input string.
@param line A pointer to receive the line number, starting from 1.
@param col A pointer to receive the column number, starting from 1.
@param chr A pointer to receive the character index, starting from 0.
@return true on success, false if `str` is NULL or `pos` is out of bounds.
@note Line/column/character are calculated based on Unicode characters for
compatibility with text editors. For multi-byte UTF-8 characters,
the returned value may not directly correspond to the byte position.
*/
yyjson_api bool yyjson_locate_pos(const char *str, size_t len, size_t pos,
size_t *line, size_t *col, size_t *chr);



/**
Expand Down
100 changes: 100 additions & 0 deletions test/test_err_code.c
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,107 @@ static void test_write_err_code(void) {
#endif
}



static void test_locate_pos(void) {
const char *str;
size_t len, pos, line, col, chr;

// -------------------------------------------------------------------------
// Invalid input.
yy_assert(!yyjson_locate_pos(NULL, 0, 0, NULL, NULL, NULL));

line = col = chr = SIZE_MAX;
yy_assert(!yyjson_locate_pos(NULL, 0, 0, &line, &col, &chr));
yy_assert(line == 0 && col == 0 && chr == 0);

yy_assert(!yyjson_locate_pos("abc", 3, 4, NULL, NULL, NULL));

line = col = chr = SIZE_MAX;
yy_assert(!yyjson_locate_pos("abc", 3, 4, &line, &col, &chr));
yy_assert(line == 0 && col == 0 && chr == 0);

// -------------------------------------------------------------------------
// Empty.
yy_assert(yyjson_locate_pos("", 0, 0, &line, &col, &chr));
yy_assert(line == 1 && col == 1 && chr == 0);

// -------------------------------------------------------------------------
// Empty new line.
yy_assert(yyjson_locate_pos("\n", 1, 0, &line, &col, &chr));
yy_assert(line == 1 && col == 1 && chr == 0);
yy_assert(yyjson_locate_pos("\n", 1, 1, &line, &col, &chr));
yy_assert(line == 2 && col == 1 && chr == 1);
yy_assert(yyjson_locate_pos("\n\n", 2, 1, &line, &col, &chr));
yy_assert(line == 2 && col == 1 && chr == 1);
yy_assert(yyjson_locate_pos("\n\n", 2, 2, &line, &col, &chr));
yy_assert(line == 3 && col == 1 && chr == 2);

// -------------------------------------------------------------------------
// 1 line.
str = "abc";
len = strlen(str);
for (pos = 0; pos <= len; pos++) {
yy_assert(yyjson_locate_pos(str, len, pos, &line, &col, &chr));
yy_assert(line == 1 && col == pos + 1 && chr == pos);
}

// -------------------------------------------------------------------------
// 2 lines.
str = "abc\ndef";
len = strlen(str);
for (pos = 0; pos <= len; pos++) {
yy_assert(yyjson_locate_pos(str, len, pos, &line, &col, &chr));
if (pos <= 3) {
yy_assert(line == 1 && col == pos + 1 && chr == pos);
} else {
yy_assert(line == 2 && col == pos - 4 + 1 && chr == pos);
}
}

// -------------------------------------------------------------------------
// 3 lines.
str = "abc\ndef\nghijklmn";
len = strlen(str);
for (pos = 0; pos <= len; pos++) {
yy_assert(yyjson_locate_pos(str, len, pos, &line, &col, &chr));
if (pos <= 3) {
yy_assert(line == 1 && col == pos + 1 && chr == pos);
} else if (pos <= 7) {
yy_assert(line == 2 && col == pos - 4 + 1 && chr == pos);
} else {
yy_assert(line == 3 && col == pos - 8 + 1 && chr == pos);
}
}

// -------------------------------------------------------------------------
// Unicode.
str = "abcé果😀"; // 1-4 byte UTF-8
len = strlen(str);
for (pos = 0; pos <= len; pos++) {
size_t pos_uni = pos;
if (4 <= pos && pos <= 5) pos_uni = 4;
if (6 <= pos && pos <= 8) pos_uni = 5;
if (9 <= pos && pos <= 12) pos_uni = 6;
yy_assert(yyjson_locate_pos(str, len, pos, &line, &col, &chr));
yy_assert(line == 1 && col == pos_uni + 1 && chr == pos_uni);
}
str = "abcdef"; // invalid UTF-8
len = strlen(str);
char buf[7] = { 0 };
memcpy(buf, str, len + 1);
buf[1] = 0x80;
buf[2] = 0xF8;
for (pos = 0; pos <= len; pos++) {
yy_assert(yyjson_locate_pos(buf, len, pos, &line, &col, &chr));
yy_assert(line == 1 && col == pos + 1 && chr == pos);
}
}



yy_test_case(test_err_code) {
test_read_err_code();
test_write_err_code();
test_locate_pos();
}

0 comments on commit 8f609cf

Please sign in to comment.