Skip to content

Commit

Permalink
extract name encoding/decoding to separate function, like in easyh5
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Oct 18, 2019
1 parent b660fc1 commit f97de9b
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 189 deletions.
66 changes: 66 additions & 0 deletions decodevarname.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
function newname = decodevarname(name,varargin)
%
% newname = decodevarname(name)
%
% Decode a hex-encoded variable name (from encodevarname) and restore
% its original form
%
% This function is sensitive to the default charset
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8')
% to set the encoding to UTF-8 before calling this function.
%
% author: Qianqian Fang (q.fang <at> neu.edu)
%
% input:
% name: a string output from encodevarname, which converts the leading non-ascii
% letter into "x0xHH_" and non-ascii letters into "_0xHH_"
% format, where hex key HH stores the ascii (or Unicode) value
% of the character.
%
% output:
% newname: the restored original string
%
% example:
% decodevarname('x0x5F_a) % returns _a
% decodevarname('a_') % returns a_ as it is a valid variable name
% decodevarname('x0xE58F98__0xE9878F_') % returns '变量'
%
% this file is part of EasyH5 Toolbox: https://github.com/fangq/easyh5
%
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/easyh5 for details
%

isunpack=jsonopt('UnpackHex',1,varargin{:});
newname=name;
if(isempty(regexp(name,'0x([0-9a-fA-F]+)_','once')))
return
end
if(isunpack)
if(exist('native2unicode','builtin'))
h2u=@hex2unicode;
newname=regexprep(name,'(^x|_){1}0x([0-9a-fA-F]+)_','${h2u($2)}');
else
pos=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','start');
pend=regexp(name,'(^x|_){1}0x([0-9a-fA-F]+)_','end');
if(isempty(pos))
return;
end
str0=name;
pos0=[0 pend(:)' length(name)];
newname='';
for i=1:length(pos)
newname=[newname str0(pos0(i)+1:pos(i)-1) char(hex2dec(str0(pos(i)+3:pend(i)-1)))];
end
if(pos(end)~=length(name))
newname=[newname str0(pos0(end-1)+1:pos0(end))];
end
end
end

%--------------------------------------------------------------------------
function str=hex2unicode(hexstr)
val=hex2dec(hexstr);
id=histc(val,[0 2^8 2^16 2^32 2^64]);
type={'uint8','uint16','uint32','uint64'};
bytes=typecast(cast(val,type{id~=0}),'uint8');
str=native2unicode(fliplr(bytes(:,1:find(bytes,1,'last'))));
67 changes: 67 additions & 0 deletions encodevarname.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
function str = encodevarname(str,varargin)
%
% newname = encodevarname(name)
%
% Encode an invalid variable name using a hex-format for bi-directional
% conversions.

% This function is sensitive to the default charset
% settings in MATLAB, please call feature('DefaultCharacterSet','utf8')
% to set the encoding to UTF-8 before calling this function.
%
% author: Qianqian Fang (q.fang <at> neu.edu)
%
% input:
% name: a string, can be either a valid or invalid variable name
%
% output:
% newname: a valid variable name by converting the leading non-ascii
% letter into "x0xHH_" and non-ascii letters into "_0xHH_"
% format, where HH is the ascii (or Unicode) value of the
% character.
%
% if the encoded variable name CAN NOT be longer than 63, i.e.
% the maximum variable name specified by namelengthmax, and
% one uses the output of this function as a struct or variable
% name, the name will be trucated at 63. Please consider using
% the name as a containers.Map key, which does not have such
% limit.
%
% example:
% encodevarname('_a') % returns x0x5F_a
% encodevarname('a_') % returns a_ as it is a valid variable name
% encodevarname('变量') % returns 'x0xE58F98__0xE9878F_'
%
% this file is part of EasyH5 Toolbox: https://github.com/fangq/easyh5
%
% License: GPLv3 or 3-clause BSD license, see https://github.com/fangq/easyh5 for details
%

if(~isempty(regexp(str,'^[^A-Za-z]','once')))
if(exist('unicode2native','builtin'))
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
else
str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end));
end
end
if(isvarname(str))
return;
end
if(exist('unicode2native','builtin'))
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
else
cpos=regexp(str,'[^0-9A-Za-z_]');
if(isempty(cpos))
return;
end
str0=str;
pos0=[0 cpos(:)' length(str)];
str='';
for i=1:length(cpos)
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)];
end
if(cpos(end)~=length(str))
str=[str str0(pos0(end-1)+1:pos0(end))];
end
end
end
37 changes: 1 addition & 36 deletions loadjson.m
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@
end
pos=parse_char(inputstr, pos, ':');
[val, pos,index_esc] = parse_value(inputstr, pos, esc, index_esc, varargin{:});
object.(valid_field(str,varargin{:}))=val;
object.(encodevarname(str,varargin{:}))=val;
[cc,pos]=next_char(inputstr,pos);
if cc == '}'
break;
Expand Down Expand Up @@ -442,41 +442,6 @@

%%-------------------------------------------------------------------------

function str = valid_field(str,varargin)
% From MATLAB doc: field names must begin with a letter, which may be
% followed by any combination of letters, digits, and underscores.
% Invalid characters will be converted to underscores, and the prefix
% "x0x[Hex code]_" will be added if the first character is not a letter.
if(~isempty(regexp(str,'^[^A-Za-z]','once')))
if(~isoctavemesh && str(1)+0 > 255)
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
else
str=sprintf('x0x%X_%s',char(str(1))+0,str(2:end));
end
end
if(isvarname(str))
return;
end
if(~isoctavemesh)
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
else
cpos=regexp(str,'[^0-9A-Za-z_]');
if(isempty(cpos))
return;
end
str0=str;
pos0=[0 cpos(:)' length(str)];
str='';
for i=1:length(cpos)
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i))+0)];
end
if(cpos(end)~=length(str))
str=[str str0(pos0(end-1)+1:pos0(end))];
end
end
end
%%-------------------------------------------------------------------------

function newpos=skip_whitespace(pos, inputstr)
newpos=pos;
while newpos <= length(inputstr) && isspace(inputstr(newpos))
Expand Down
38 changes: 1 addition & 37 deletions loadmsgpack.m
Original file line number Diff line number Diff line change
Expand Up @@ -227,42 +227,6 @@
out = struct();
for n=1:len
[key, idx] = parse(bytes, idx);
[out.(valid_field(char(key))), idx] = parse(bytes, idx);
end
end

function str = valid_field(str,varargin)
% From MATLAB doc: field names must begin with a letter, which may be
% followed by any combination of letters, digits, and underscores.
% Invalid characters will be converted to underscores, and the prefix
% "x0x[Hex code]_" will be added if the first character is not a letter.
isoct=exist('OCTAVE_VERSION','builtin');
cpos=regexp(str,'^[^A-Za-z]','once');
if(~isempty(cpos))
if(~isoct)
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
else
str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
end
end
if(isempty(regexp(str,'[^0-9A-Za-z_]', 'once' )))
return;
end
if(~isoct)
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
else
cpos=regexp(str,'[^0-9A-Za-z_]');
if(isempty(cpos))
return;
end
str0=str;
pos0=[0 cpos(:)' length(str)];
str='';
for i=1:length(cpos)
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i)))];
end
if(cpos(end)~=length(str))
str=[str str0(pos0(end-1)+1:pos0(end))];
end
[out.(encodevarname(char(key))), idx] = parse(bytes, idx);
end
end
37 changes: 1 addition & 36 deletions loadubjson.m
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@
end
[val, pos] = parse_value(inputstr, pos, varargin{:});
num=num+1;
object.(valid_field(str,varargin{:}))=val;
object.(encodevarname(str,varargin{:}))=val;
[cc, pos]=next_char(inputstr,pos);
if cc == '}' || (count>=0 && num>=count)
break;
Expand All @@ -357,38 +357,3 @@
error_pos('unsupported type at position %d',inputstr, pos);
end
end
%%-------------------------------------------------------------------------

function str = valid_field(str,varargin)
% From MATLAB doc: field names must begin with a letter, which may be
% followed by any combination of letters, digits, and underscores.
% Invalid characters will be converted to underscores, and the prefix
% "x0x[Hex code]_" will be added if the first character is not a letter.
if(~isempty(regexp(str,'^[^A-Za-z]','once')))
if(~isoctavemesh && str(1)+0 > 255)
str=regexprep(str,'^([^A-Za-z])','x0x${sprintf(''%X'',unicode2native($1))}_','once');
else
str=sprintf('x0x%X_%s',char(str(1)),str(2:end));
end
end
if(isvarname(str))
return;
end
if(~isoctavemesh)
str=regexprep(str,'([^0-9A-Za-z_])','_0x${sprintf(''%X'',unicode2native($1))}_');
else
cpos=regexp(str,'[^0-9A-Za-z_]');
if(isempty(cpos))
return;
end
str0=str;
pos0=[0 cpos(:)' length(str)];
str='';
for i=1:length(cpos)
str=[str str0(pos0(i)+1:cpos(i)-1) sprintf('_0x%X_',str0(cpos(i)))];
end
if(cpos(end)~=length(str))
str=[str str0(pos0(end-1)+1:pos0(end))];
end
end
end
Loading

0 comments on commit f97de9b

Please sign in to comment.