Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement scansets support in RStr.scanf ##util #22572

Merged
merged 1 commit into from
Feb 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 108 additions & 20 deletions libr/util/bscanf.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,64 @@
#define _BSCANF_CHECK_BUFFER() _BSCANF_CHECK('\0' != *buffer);
#define _BSCANF_CHECK_STRTONUM() _BSCANF_CHECK(buf_ptr != end_ptr);

static bool scanset_check(const char *scanset, char ch) {
bool negated = false;
if (*scanset == '^') {
scanset++;
negated = true;
}
const char *ss = scanset;
while (*ss) {
bool found = false;
if (ss[1] == '-') {
const char min = ss[0];
const char max = ss[2];
if (ch >= min && ch <= max) {
found = true;
}
ss += 2;
} else {
found = (*ss == ch);
ss++;
}
if (negated) {
if (found) {
return false;
}
} else {
if (!found) {
return false;
}
}
}
if (negated) {
return true;
}
return false;
}

static const char *scanset_parse(const char *fmt_ptr, char *scanset, size_t scanset_size) {
char *scanset_last = scanset + sizeof (scanset);
char *scanset_cur = scanset;
*scanset_cur = 0;
fmt_ptr++;
while (*fmt_ptr) {
if (scanset_cur >= scanset_last) {
R_LOG_ERROR ("Too large scanset found");
return NULL;
}
if (*fmt_ptr == ']') {
fmt_ptr ++;
break;
}
*scanset_cur = *fmt_ptr;
scanset_cur++;
*scanset_cur = 0;
fmt_ptr++;
}
return fmt_ptr;
}

R_API int r_str_scanf(const char *buffer, const char *format, ...) {
R_RETURN_VAL_IF_FAIL (buffer && format, -1);
/* Our return value. On a conversion error, we return this immediately. */
Expand Down Expand Up @@ -49,7 +107,7 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {

while (*fmt_ptr) {
/* We ignore spaces before specifiers. */
if (isspace(*fmt_ptr)) {
if (isspace (*fmt_ptr)) {
/* Any whitespace in the format consumes all of the whitespace in the buffer. */
_BSCANF_CONSUME_WSPACE();
fmt_ptr++;
Expand Down Expand Up @@ -114,12 +172,12 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {
}

/* All other specifiers move the buffer pointer, so check that it's not NUL. */
_BSCANF_CHECK_BUFFER();
_BSCANF_CHECK_BUFFER ();

if ('%' == *fmt_ptr) {
/* '%': match literal %. */
_BSCANF_CONSUME_WSPACE();
_BSCANF_MATCH();
_BSCANF_CONSUME_WSPACE ();
_BSCANF_MATCH ();
buf_ptr++;
} else if ('c' == *fmt_ptr || 's' == *fmt_ptr) {
/* 'c'/'s': match a character sequence/string. */
Expand All @@ -128,14 +186,18 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {

/* 'c' conversion specifiers DO NOT consume whitespace. */
if ('c' != *fmt_ptr) {
_BSCANF_CONSUME_WSPACE();
_BSCANF_CONSUME_WSPACE ();
}

if (is_suppressed) {
/* Consume the character (string) and ignore it in this case. */
for (; max_width > 0; max_width--) {
while (true) {
buf_ptr++;
if (*buf_ptr == '\0' || (isspace(*buf_ptr) && 's' == *fmt_ptr)) {
if (*buf_ptr == '\0') {
break;
}
if ((isspace (*buf_ptr) && 's' == *fmt_ptr)) {
buf_ptr++;
break;
}
}
Expand Down Expand Up @@ -212,17 +274,45 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {
}

} else if ('[' == *fmt_ptr) {
/* TODO: '[': match a non-empty sequence of characters from a set. */
_BSCANF_CHECK(0);

char scanset[32] = {0};
// https://cplusplus.com/reference/cstdio/scanf/
/* '[': match a non-empty sequence of characters from a scanset. */
fmt_ptr = scanset_parse (fmt_ptr, scanset, sizeof (scanset));
if (!fmt_ptr) {
// error invalid scanset
goto beach;
}
// process scanset and fill the string
/* String conversion requires a width. */
_BSCANF_CHECK_STRING();
_BSCANF_CHECK_STRING ();
/* '[' conversion specifiers DO NOT consume whitespace. */
char_ptr = va_arg (args, char*);
_BSCANF_CHECK_NULL (char_ptr);
*char_ptr = 0; // null byte the first char before failing
if (max_width < 1) {
R_LOG_DEBUG ("Missing length specifier for string");
} else {
for (; *buf_ptr && max_width > 0; max_width--) {
if (!scanset_check (scanset, *buf_ptr)) {
break;
}
*char_ptr = *buf_ptr;
char_ptr++;
buf_ptr++;
}
if (max_width == 0) {
R_LOG_DEBUG ("Truncated string in scanf");
}
/* Strings must be null-terminated. */
*char_ptr = '\0';
num_args_set++;
}
// reset max width value
max_width = 0;

} else if ('i' == *fmt_ptr || 'd' == *fmt_ptr) {
/* 'i'/'d': match a integer/decimal integer. */

_BSCANF_CONSUME_WSPACE();
_BSCANF_CONSUME_WSPACE ();
base = ('d' == *fmt_ptr) * 10;

if (is_suppressed) {
Expand All @@ -234,19 +324,19 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {
*long_ptr = (long) strtol (buf_ptr, &end_ptr, base);
} else if ('L' == length_mod) {
ut64_ptr = va_arg (args, ut64*);
_BSCANF_CHECK_NULL(long_ptr);
_BSCANF_CHECK_NULL (long_ptr);
*ut64_ptr = (ut64) strtoll (buf_ptr, &end_ptr, base);
} else if ('h' == length_mod) {
short_ptr = va_arg (args, short*);
_BSCANF_CHECK_NULL (short_ptr);
*short_ptr = (short) (strtol (buf_ptr, &end_ptr, base));
} else {
int_ptr = va_arg (args, int*);
_BSCANF_CHECK_NULL(int_ptr);
_BSCANF_CHECK_NULL (int_ptr);
*int_ptr = (int) (strtol (buf_ptr, &end_ptr, base));
}

_BSCANF_CHECK_STRTONUM();
_BSCANF_CHECK_STRTONUM ();
buf_ptr = end_ptr;
num_args_set++;

Expand Down Expand Up @@ -275,8 +365,7 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {
num_args_set++;

} else if ('p' == *fmt_ptr) {

_BSCANF_CONSUME_WSPACE();
_BSCANF_CONSUME_WSPACE ();
base = 16;

if (is_suppressed) {
Expand All @@ -294,8 +383,7 @@ R_API int r_str_scanf(const char *buffer, const char *format, ...) {
num_args_set++;
} else if ('u' == *fmt_ptr || 'o' == *fmt_ptr || 'x' == *fmt_ptr || 'X' == *fmt_ptr) {
/* 'u'/'o'/'x': match a unsigned decimal/octal/hexadecimal integer */

_BSCANF_CONSUME_WSPACE();
_BSCANF_CONSUME_WSPACE ();
base = ('u' == *fmt_ptr) * 10 + ('o' == *fmt_ptr) * 8 +
('x' == *fmt_ptr || 'X' == *fmt_ptr) * 16;

Expand Down
15 changes: 15 additions & 0 deletions test/unit/test_scanf.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ bool test_r_str_scanf(void) {
res = r_str_scanf ("0x120000023b2d8000", "0x%Lx", &bignum);
mu_assert_eq (0x120000023b2d8000, bignum, "portable ut64 scanf failed");
mu_assert_eq (res, 1, "return value for scanf failed");

mu_end;
}

Expand All @@ -36,9 +37,23 @@ bool test_r_str_scanf_pointer(void) {
mu_end;
}

bool test_r_str_scanf_scanset(void) {
char msg0[32];
char msg1[32];
char *s = r_str_newf ("Hello World ITS OVER\nAGAIN");
int res = r_str_scanf (s, "%.s %*s %.[^\n]", sizeof (msg0), &msg0, sizeof (msg1), &msg1);
free (s);
mu_assert_streq (msg0, "Hello", "first word");
mu_assert_streq (msg1, "ITS OVER", "the rest until newline");
mu_assert_eq (res, 2, "return value for scanf failed");

mu_end;
}

bool all_tests(void) {
mu_run_test (test_r_str_scanf);
mu_run_test (test_r_str_scanf_pointer);
mu_run_test (test_r_str_scanf_scanset);
return tests_passed != tests_run;
}

Expand Down
Loading