diff --git a/decodevarname.m b/decodevarname.m new file mode 100644 index 0000000..4a4e3fa --- /dev/null +++ b/decodevarname.m @@ -0,0 +1,66 @@ +function newname = decodevarname(name,varargin) +% +% newname = decodevarname(name) +% +% Decode a hex-encoded variable name (from encodevarname) and restore +% its original form +% +% This function is sensitive to the default charset +% settings in MATLAB, please call feature('DefaultCharacterSet','utf8') +% to set the encoding to UTF-8 before calling this function. +% +% author: Qianqian Fang (q.fang neu.edu) +% +% input: +% name: a string output from encodevarname, which converts the leading non-ascii +% letter into "x0xHH_" and non-ascii letters into "_0xHH_" +% format, where hex key HH stores the ascii (or Unicode) value +% of the character. +% +% output: +% newname: the restored original string +% +% example: +% decodevarname('x0x5F_a) % returns _a +% decodevarname('a_') % returns a_ as it is a valid variable name +% decodevarname('x0xE58F98__0xE9878F_') % returns '变量' +% +% this file is part of EasyH5 Toolbox: https://github.com/fangq/easyh5 +% +% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/easyh5 for details +% + +isunpack=jsonopt('UnpackHex',1,varargin{:}); +newname=name; +if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once'))) + return +end +if(isunpack) + if(exist('native2unicode','builtin')) + h2u=@hex2unicode; + newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${h2u($2)}'); + else + pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start'); + pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end'); + if(isempty(pos)) + return; + end + str0=name; + pos0=[0 pend(:)' length(name)]; + newname=''; + for i=1:length(pos) + newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))]; + end + if(pos(end)~=length(name)) + newname=[newname str0(pos0(end-1)+1:pos0(end))]; + end + end +end + +%-------------------------------------------------------------------------- +function str=hex2unicode(hexstr) +val=hex2dec(hexstr); +id=histc(val,[0 2^8 2^16 2^32 2^64]); +type={'uint8','uint16','uint32','uint64'}; +bytes=typecast(cast(val,type{id~=0}),'uint8'); +str=native2unicode(fliplr(bytes(:,1:find(bytes,1,'last')))); diff --git a/encodevarname.m b/encodevarname.m new file mode 100644 index 0000000..b336f2d --- /dev/null +++ b/encodevarname.m @@ -0,0 +1,67 @@ +function str = encodevarname(str,varargin) +% +% newname = encodevarname(name) +% +% Encode an invalid variable name using a hex-format for bi-directional +% conversions. + +% This function is sensitive to the default charset +% settings in MATLAB, please call feature('DefaultCharacterSet','utf8') +% to set the encoding to UTF-8 before calling this function. +% +% author: Qianqian Fang (q.fang neu.edu) +% +% input: +% name: a string, can be either a valid or invalid variable name +% +% output: +% newname: a valid variable name by converting the leading non-ascii +% letter into "x0xHH_" and non-ascii letters into "_0xHH_" +% format, where HH is the ascii (or Unicode) value of the +% character. +% +% if the encoded variable name CAN NOT be longer than 63, i.e. +% the maximum variable name specified by namelengthmax, and +% one uses the output of this function as a struct or variable +% name, the name will be trucated at 63. Please consider using +% the name as a containers.Map key, which does not have such +% limit. +% +% example: +% encodevarname('_a') % returns x0x5F_a +% encodevarname('a_') % returns a_ as it is a valid variable name +% encodevarname('变量') % returns 'x0xE58F98__0xE9878F_' +% +% this file is part of EasyH5 Toolbox: https://github.com/fangq/easyh5 +% +% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/easyh5 for details +% + + if(~isempty(regexp(str,'^[^A-Za-z]','once'))) + if(exist('unicode2native','builtin')) + str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once'); + else + str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end)); + end + end + if(isvarname(str)) + return; + end + if(exist('unicode2native','builtin')) + str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_'); + else + cpos=regexp(str,'[^0-9A-Za-z_]'); + if(isempty(cpos)) + return; + end + str0=str; + pos0=[0 cpos(:)' length(str)]; + str=''; + for i=1:length(cpos) + str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)]; + end + if(cpos(end)~=length(str)) + str=[str str0(pos0(end-1)+1:pos0(end))]; + end + end +end \ No newline at end of file diff --git a/loadjson.m b/loadjson.m index 87b6b2a..58d95e2 100644 --- a/loadjson.m +++ b/loadjson.m @@ -413,7 +413,7 @@ end pos=parse_char(inputstr, pos, ':'); [val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:}); - object.(valid_field(str,varargin{:}))=val; + object.(encodevarname(str,varargin{:}))=val; [cc,pos]=next_char(inputstr,pos); if cc == '}' break; @@ -442,41 +442,6 @@ %%------------------------------------------------------------------------- -function str = valid_field(str,varargin) -% From MATLAB doc: field names must begin with a letter, which may be -% followed by any combination of letters, digits, and underscores. -% Invalid characters will be converted to underscores, and the prefix -% "x0x[Hex code]_" will be added if the first character is not a letter. - if(~isempty(regexp(str,'^[^A-Za-z]','once'))) - if(~isoctavemesh && str(1)+0 > 255) - str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once'); - else - str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end)); - end - end - if(isvarname(str)) - return; - end - if(~isoctavemesh) - str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_'); - else - cpos=regexp(str,'[^0-9A-Za-z_]'); - if(isempty(cpos)) - return; - end - str0=str; - pos0=[0 cpos(:)' length(str)]; - str=''; - for i=1:length(cpos) - str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)]; - end - if(cpos(end)~=length(str)) - str=[str str0(pos0(end-1)+1:pos0(end))]; - end - end -end -%%------------------------------------------------------------------------- - function newpos=skip_whitespace(pos, inputstr) newpos=pos; while newpos <= length(inputstr) && isspace(inputstr(newpos)) diff --git a/loadmsgpack.m b/loadmsgpack.m index a718b45..42036c7 100644 --- a/loadmsgpack.m +++ b/loadmsgpack.m @@ -227,42 +227,6 @@ out = struct(); for n=1:len [key, idx] = parse(bytes, idx); - [out.(valid_field(char(key))), idx] = parse(bytes, idx); - end -end - -function str = valid_field(str,varargin) -% From MATLAB doc: field names must begin with a letter, which may be -% followed by any combination of letters, digits, and underscores. -% Invalid characters will be converted to underscores, and the prefix -% "x0x[Hex code]_" will be added if the first character is not a letter. - isoct=exist('OCTAVE_VERSION','builtin'); - cpos=regexp(str,'^[^A-Za-z]','once'); - if(~isempty(cpos)) - if(~isoct) - str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once'); - else - str=sprintf('x0x%X_%s',char(str(1)),str(2:end)); - end - end - if(isempty(regexp(str,'[^0-9A-Za-z_]', 'once' ))) - return; - end - if(~isoct) - str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_'); - else - cpos=regexp(str,'[^0-9A-Za-z_]'); - if(isempty(cpos)) - return; - end - str0=str; - pos0=[0 cpos(:)' length(str)]; - str=''; - for i=1:length(cpos) - str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i)))]; - end - if(cpos(end)~=length(str)) - str=[str str0(pos0(end-1)+1:pos0(end))]; - end + [out.(encodevarname(char(key))), idx] = parse(bytes, idx); end end diff --git a/loadubjson.m b/loadubjson.m index 389d686..6ba7f7c 100644 --- a/loadubjson.m +++ b/loadubjson.m @@ -330,7 +330,7 @@ end [val, pos] = parse_value(inputstr, pos, varargin{:}); num=num+1; - object.(valid_field(str,varargin{:}))=val; + object.(encodevarname(str,varargin{:}))=val; [cc, pos]=next_char(inputstr,pos); if cc == '}' || (count>=0 && num>=count) break; @@ -357,38 +357,3 @@ error_pos('unsupported type at position %d',inputstr, pos); end end -%%------------------------------------------------------------------------- - -function str = valid_field(str,varargin) -% From MATLAB doc: field names must begin with a letter, which may be -% followed by any combination of letters, digits, and underscores. -% Invalid characters will be converted to underscores, and the prefix -% "x0x[Hex code]_" will be added if the first character is not a letter. - if(~isempty(regexp(str,'^[^A-Za-z]','once'))) - if(~isoctavemesh && str(1)+0 > 255) - str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once'); - else - str=sprintf('x0x%X_%s',char(str(1)),str(2:end)); - end - end - if(isvarname(str)) - return; - end - if(~isoctavemesh) - str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_'); - else - cpos=regexp(str,'[^0-9A-Za-z_]'); - if(isempty(cpos)) - return; - end - str0=str; - pos0=[0 cpos(:)' length(str)]; - str=''; - for i=1:length(cpos) - str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i)))]; - end - if(cpos(end)~=length(str)) - str=[str str0(pos0(end-1)+1:pos0(end))]; - end - end -end diff --git a/savejson.m b/savejson.m index 1bd044b..3b21c0e 100644 --- a/savejson.m +++ b/savejson.m @@ -268,13 +268,13 @@ bracketlevel=~jsonopt('singletcell',1,varargin{:}); if(len>bracketlevel) if(~isempty(name)) - txt={padding0, '"', checkname(name,varargin{:}),'": [', nl}; name=''; + txt={padding0, '"', decodevarname(name,varargin{:}),'": [', nl}; name=''; else txt={padding0, '[', nl}; end elseif(len==0) if(~isempty(name)) - txt={padding0, '"' checkname(name,varargin{:}) '": []'}; name=''; + txt={padding0, '"' decodevarname(name,varargin{:}) '": []'}; name=''; else txt={padding0, '[]'}; end @@ -324,7 +324,7 @@ if(isempty(item)) if(~isempty(name)) - txt={padding0, '"', checkname(name,varargin{:}),'": []'}; + txt={padding0, '"', decodevarname(name,varargin{:}),'": []'}; else txt={padding0, '[]'}; end @@ -333,7 +333,7 @@ end if(~isempty(name)) if(forcearray) - txt={padding0, '"', checkname(name,varargin{:}),'": [', nl}; + txt={padding0, '"', decodevarname(name,varargin{:}),'": [', nl}; end else if(forcearray) @@ -347,7 +347,7 @@ for i=1:dim(1) names = fieldnames(item(i,j)); if(~isempty(name) && len==1 && ~forcearray) - txt(end+1:end+5)={padding1, '"', checkname(name,varargin{:}),'": {', nl}; + txt(end+1:end+5)={padding1, '"', decodevarname(name,varargin{:}),'": {', nl}; else txt(end+1:end+3)={padding1, '{', nl}; end @@ -412,7 +412,7 @@ if(isempty(item)) if(~isempty(name)) - txt={padding0, '"', checkname(name,varargin{:}),'": []'}; + txt={padding0, '"', decodevarname(name,varargin{:}),'": []'}; else txt={padding0, '[]'}; end @@ -421,7 +421,7 @@ end if(~isempty(name)) if(forcearray) - txt={padding0, '"', checkname(name,varargin{:}),'": {', nl}; + txt={padding0, '"', decodevarname(name,varargin{:}),'": {', nl}; end else if(forcearray) @@ -463,7 +463,7 @@ if(~isempty(name)) if(len>1) - txt={padding1, '"', checkname(name,varargin{:}),'": [', nl}; + txt={padding1, '"', decodevarname(name,varargin{:}),'": [', nl}; end else if(len>1) @@ -473,7 +473,7 @@ for e=1:len val=escapejsonstring(item(e,:)); if(len==1) - obj=['"' checkname(name,varargin{:}) '": ' '"',val,'"']; + obj=['"' decodevarname(name,varargin{:}) '": ' '"',val,'"']; if(isempty(name)) obj=['"',val,'"']; end @@ -514,7 +514,7 @@ padding1,nl,padding0,class(item),nl,padding0,regexprep(mat2str(size(item)),'\s+',','),nl); else txt=sprintf('%s"%s": {%s%s"_ArrayType_": "%s",%s%s"_ArraySize_": %s,%s',... - padding1,checkname(name,varargin{:}),nl,padding0,class(item),nl,padding0,regexprep(mat2str(size(item)),'\s+',','),nl); + padding1,decodevarname(name,varargin{:}),nl,padding0,class(item),nl,padding0,regexprep(mat2str(size(item)),'\s+',','),nl); end else if(numel(item)==1 && jsonopt('SingletArray',0,varargin{:})==0 && level>0) @@ -526,9 +526,9 @@ txt=sprintf('%s%s',padding1,numtxt); else if(numel(item)==1 && jsonopt('SingletArray',0,varargin{:})==0) - txt=sprintf('%s"%s": %s',padding1,checkname(name,varargin{:}),numtxt); + txt=sprintf('%s"%s": %s',padding1,decodevarname(name,varargin{:}),numtxt); else - txt=sprintf('%s"%s": %s',padding1,checkname(name,varargin{:}),numtxt); + txt=sprintf('%s"%s": %s',padding1,decodevarname(name,varargin{:}),numtxt); end end return; @@ -705,35 +705,6 @@ txt=regexprep(txt,'NaN',jsonopt('NaN','"_NaN_"',varargin{:})); end -%%------------------------------------------------------------------------- -function newname=checkname(name,varargin) -isunpack=jsonopt('UnpackHex',1,varargin{:}); -newname=name; -if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once'))) - return -end -if(isunpack) - isoct=jsonopt('IsOctave',0,varargin{:}); - if(~isoct) - newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${native2unicode(hex2dec($2))}'); - else - pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start'); - pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end'); - if(isempty(pos)) - return; - end - str0=name; - pos0=[0 pend(:)' length(name)]; - newname=''; - for i=1:length(pos) - newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))]; - end - if(pos(end)~=length(name)) - newname=[newname str0(pos0(end-1)+1:pos0(end))]; - end - end -end - %%------------------------------------------------------------------------- function newstr=escapejsonstring(str) newstr=str; diff --git a/saveubjson.m b/saveubjson.m index 2cff233..302df37 100644 --- a/saveubjson.m +++ b/saveubjson.m @@ -265,13 +265,13 @@ len=numel(item); % let's handle 1D cell first if(len>bracketlevel) if(~isempty(name)) - txt=[N_(checkname(name,varargin{:})) am0]; name=''; + txt=[N_(decodevarname(name,varargin{:})) am0]; name=''; else txt=am0; end elseif(len==0) if(~isempty(name)) - txt=[N_(checkname(name,varargin{:})) Zmarker]; name=''; + txt=[N_(decodevarname(name,varargin{:})) Zmarker]; name=''; else txt=Zmarker; end @@ -319,7 +319,7 @@ if(~isempty(name)) if(forcearray) - txt=[N_(checkname(name,varargin{:})) am0]; + txt=[N_(decodevarname(name,varargin{:})) am0]; end else if(forcearray) @@ -341,7 +341,7 @@ om0=Omarker{1}; end if(~isempty(name) && len==1 && ~forcearray) - txt=[txt N_(checkname(name,varargin{:})) om0]; + txt=[txt N_(decodevarname(name,varargin{:})) om0]; else txt=[txt om0]; end @@ -382,7 +382,7 @@ if(~isempty(name)) if(forcearray) - txt=[N_(checkname(name,varargin{:})) om0]; + txt=[N_(decodevarname(name,varargin{:})) om0]; end else if(forcearray) @@ -417,7 +417,7 @@ end if(~isempty(name)) if(len>1) - txt=[N_(checkname(name,varargin{:})) am0]; + txt=[N_(decodevarname(name,varargin{:})) am0]; end else if(len>1) @@ -427,7 +427,7 @@ for e=1:len val=item(e,:); if(len==1) - obj=[N_(checkname(name,varargin{:})) '' '',S_(val),'']; + obj=[N_(decodevarname(name,varargin{:})) '' '',S_(val),'']; if(isempty(name)) obj=['',S_(val),'']; end @@ -466,10 +466,10 @@ txt=[Omarker{1} N_('_ArrayType_'),S_(class(item)),N_('_ArraySize_'),I_a(size(item),cid(1),Imarker,varargin{:}) ]; else if(isempty(item)) - txt=[N_(checkname(name,varargin{:})),Zmarker]; + txt=[N_(decodevarname(name,varargin{:})),Zmarker]; return; else - txt=[N_(checkname(name,varargin{:})),Omarker{1},N_('_ArrayType_'),S_(class(item)),N_('_ArraySize_'),I_a(size(item),cid(1),Imarker,varargin{:})]; + txt=[N_(decodevarname(name,varargin{:})),Omarker{1},N_('_ArrayType_'),S_(class(item)),N_('_ArraySize_'),I_a(size(item),cid(1),Imarker,varargin{:})]; end end childcount=2; @@ -479,9 +479,9 @@ else if(numel(item)==1 && jsonopt('SingletArray',0,varargin{:})==0) numtxt=regexprep(regexprep(matdata2ubjson(item,level+1,varargin{:}),'^\[',''),']$',''); - txt=[N_(checkname(name,varargin{:})) numtxt]; + txt=[N_(decodevarname(name,varargin{:})) numtxt]; else - txt=[N_(checkname(name,varargin{:})),matdata2ubjson(item,level+1,varargin{:})]; + txt=[N_(decodevarname(name,varargin{:})),matdata2ubjson(item,level+1,varargin{:})]; end end return; @@ -697,34 +697,6 @@ end end -%%------------------------------------------------------------------------- -function newname=checkname(name,varargin) -isunpack=jsonopt('UnpackHex',1,varargin{:}); -newname=name; -if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once'))) - return -end -if(isunpack) - isoct=jsonopt('IsOctave',0,varargin{:}); - if(~isoct) - newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${native2unicode(hex2dec($2))}'); - else - pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start'); - pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end'); - if(isempty(pos)) - return; - end - str0=name; - pos0=[0 pend(:)' length(name)]; - newname=''; - for i=1:length(pos) - newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))]; - end - if(pos(end)~=length(name)) - newname=[newname str0(pos0(end-1)+1:pos0(end))]; - end - end -end %%------------------------------------------------------------------------- function val=N_(str) global ismsgpack