Skip to content

Commit

Permalink
fix #6027
Browse files Browse the repository at this point in the history
only normalize identifiers during parsing; not all constructed symbols
  • Loading branch information
JeffBezanson committed Mar 3, 2014
1 parent e1868ac commit 95fcc90
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 37 deletions.
38 changes: 2 additions & 36 deletions src/flisp/flisp.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@ char * dirname(char *);
#include "flisp.h"
#include "opcodes.h"

#include "utf8proc.h"

static char *builtin_names[] =
{ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
Expand Down Expand Up @@ -260,34 +258,6 @@ int fl_is_keyword_name(const char *str, size_t len)
return len>1 && ((str[0] == ':' || str[len-1] == ':') && str[1] != '\0');
}

// return NFC-normalized UTF8-encoded version of s
static const char *normalize(char *s)
{
static size_t buflen = 0;
static void *buf = NULL; // persistent buffer (avoid repeated malloc/free)
// options equivalent to utf8proc_NFC:
const int options = UTF8PROC_NULLTERM|UTF8PROC_STABLE|UTF8PROC_COMPOSE;
ssize_t result;
size_t newlen;
result = utf8proc_decompose((uint8_t*) s, 0, NULL, 0, options);
if (result < 0) goto error;
newlen = result * sizeof(int32_t) + 1;
if (newlen > buflen) {
buflen = newlen * 2;
buf = realloc(buf, buflen);
if (!buf) lerror(MemoryError, "error allocating UTF8 buffer");
}
result = utf8proc_decompose((uint8_t*)s,0, (int32_t*)buf,result, options);
if (result < 0) goto error;
result = utf8proc_reencode((int32_t*)buf,result, options);
if (result < 0) goto error;
return (char*) buf;
error:
lerrorf(ParseError, "error normalizing identifier %s: %s", s,
utf8proc_errmsg(result));
}

// note: assumes str is normalized
static symbol_t *mk_symbol(const char *str)
{
symbol_t *sym;
Expand All @@ -312,7 +282,6 @@ static symbol_t *mk_symbol(const char *str)
return sym;
}

// note: assumes str is normalized
static symbol_t **symtab_lookup(symbol_t **ptree, const char *str)
{
int x;
Expand All @@ -331,12 +300,9 @@ static symbol_t **symtab_lookup(symbol_t **ptree, const char *str)

value_t symbol(char *str)
{
symbol_t **pnode;
const char *nstr = normalize(str);

pnode = symtab_lookup(&symtab, nstr);
symbol_t **pnode = symtab_lookup(&symtab, str);
if (*pnode == NULL)
*pnode = mk_symbol(nstr);
*pnode = mk_symbol(str);
return tagptr(*pnode, TAG_SYM);
}

Expand Down
30 changes: 29 additions & 1 deletion src/flisp/julia_extensions.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <string.h>
#include <assert.h>
#include "flisp.h"
#include "utf8proc.h"

static int is_uws(uint32_t wc)
{
Expand Down Expand Up @@ -42,6 +43,33 @@ static int jl_id_char(uint32_t wc)
wc == '!' || wc == '_');
}

// return NFC-normalized UTF8-encoded version of s
static char *normalize(char *s)
{
static size_t buflen = 0;
static void *buf = NULL; // persistent buffer (avoid repeated malloc/free)
// options equivalent to utf8proc_NFC:
const int options = UTF8PROC_NULLTERM|UTF8PROC_STABLE|UTF8PROC_COMPOSE;
ssize_t result;
size_t newlen;
result = utf8proc_decompose((uint8_t*) s, 0, NULL, 0, options);
if (result < 0) goto error;
newlen = result * sizeof(int32_t) + 1;
if (newlen > buflen) {
buflen = newlen * 2;
buf = realloc(buf, buflen);
if (!buf) lerror(MemoryError, "error allocating UTF8 buffer");
}
result = utf8proc_decompose((uint8_t*)s,0, (int32_t*)buf,result, options);
if (result < 0) goto error;
result = utf8proc_reencode((int32_t*)buf,result, options);
if (result < 0) goto error;
return (char*) buf;
error:
lerrorf(symbol("error"), "error normalizing identifier %s: %s", s,
utf8proc_errmsg(result));
}

value_t fl_accum_julia_symbol(value_t *args, u_int32_t nargs)
{
argcount("accum-julia-symbol", nargs, 2);
Expand All @@ -67,7 +95,7 @@ value_t fl_accum_julia_symbol(value_t *args, u_int32_t nargs)
break;
}
ios_pututf8(&str, 0);
return symbol(str.buf);
return symbol(normalize(str.buf));
}

static builtinspec_t julia_flisp_func_info[] = {
Expand Down
9 changes: 9 additions & 0 deletions test/strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -943,3 +943,12 @@ end
# issue #5870
@test !ismatch(Regex("aa"), SubString("",1,0))
@test ismatch(Regex(""), SubString("",1,0))

# issue #6027
let
# make symbol with invalid char
sym = symbol(char(0xdcdb))
@test string(sym) == "\udcdb"
@test expand(sym) === sym
@test parse("\udcdb = 1",1,raise=false)[1] == Expr(:error, "error normalizing identifier \udcdb: Invalid UTF-8 string")
end

0 comments on commit 95fcc90

Please sign in to comment.