Skip to content

Commit 3322f6f

Browse files
committed
major new feature: support array compression and decompression
1 parent 9c01046 commit 3322f6f

12 files changed

+410
-40
lines changed

AUTHORS.txt

+7
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,13 @@ The script loadjson.m was built upon previous works by
2222
- Joel Feenstra: http://www.mathworks.com/matlabcentral/fileexchange/20565
2323
date: 2008/07/03
2424

25+
The data compression/decompression utilities ({zlib,gzip,base64}{encode,decode}.m)
26+
were copied from
27+
28+
- "Byte encoding utilities" by Kota Yamaguchi
29+
https://www.mathworks.com/matlabcentral/fileexchange/39526-byte-encoding-utilities
30+
date: 2013/01/04
31+
2532

2633
This toolbox contains patches submitted by the following contributors:
2734

LICENSE_BSD.txt

+30
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,33 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2323
The views and conclusions contained in the software and documentation are those of the
2424
authors and should not be interpreted as representing official policies, either expressed
2525
or implied, of the copyright holders.
26+
27+
28+
29+
30+
For the included compression/decompression utilities (base64encode.m, base64decode.m,
31+
gzipencode.m, gzipdecode.m, zlibencode.m, zlibdecode.m), the author Kota Yamaguchi
32+
requires the following copyright declaration:
33+
34+
Copyright (c) 2012, Kota Yamaguchi
35+
All rights reserved.
36+
37+
Redistribution and use in source and binary forms, with or without
38+
modification, are permitted provided that the following conditions are met:
39+
40+
* Redistributions of source code must retain the above copyright notice, this
41+
list of conditions and the following disclaimer.
42+
43+
* Redistributions in binary form must reproduce the above copyright notice,
44+
this list of conditions and the following disclaimer in the documentation
45+
and/or other materials provided with the distribution
46+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
47+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
49+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
50+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
52+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
53+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
54+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
55+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

base64decode.m

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
function output = base64decode(input)
2+
%BASE64DECODE Decode Base64 string to a byte array.
3+
%
4+
% output = base64decode(input)
5+
%
6+
% The function takes a Base64 string INPUT and returns a uint8 array
7+
% OUTPUT. JAVA must be running to use this function. The result is always
8+
% given as a 1-by-N array, and doesn't retrieve the original dimensions.
9+
%
10+
% See also base64encode
11+
%
12+
% Copyright (c) 2012, Kota Yamaguchi
13+
% URL: https://www.mathworks.com/matlabcentral/fileexchange/39526-byte-encoding-utilities
14+
% License : BSD, see LICENSE_*.txt
15+
%
16+
17+
error(nargchk(1, 1, nargin));
18+
error(javachk('jvm'));
19+
if ischar(input), input = uint8(input); end
20+
21+
output = typecast(org.apache.commons.codec.binary.Base64.decodeBase64(input), 'uint8')';
22+
23+
end
24+

base64encode.m

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
function output = base64encode(input)
2+
%BASE64ENCODE Encode a byte array using Base64 codec.
3+
%
4+
% output = base64encode(input)
5+
%
6+
% The function takes a char, int8, or uint8 array INPUT and returns Base64
7+
% encoded string OUTPUT. JAVA must be running to use this function. Note
8+
% that encoding doesn't preserve input dimensions.
9+
%
10+
% See also base64decode
11+
%
12+
% Copyright (c) 2012, Kota Yamaguchi
13+
% URL: https://www.mathworks.com/matlabcentral/fileexchange/39526-byte-encoding-utilities
14+
% License : BSD, see LICENSE_*.txt
15+
%
16+
17+
error(nargchk(1, 1, nargin));
18+
error(javachk('jvm'));
19+
if ischar(input), input = uint8(input); end
20+
21+
output = char(org.apache.commons.codec.binary.Base64.encodeBase64Chunked(input))';
22+
23+
end

gzipdecode.m

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
function output = gzipdecode(input)
2+
%GZIPDECODE Decompress input bytes using GZIP.
3+
%
4+
% output = gzipdecode(input)
5+
%
6+
% The function takes a compressed byte array INPUT and returns inflated
7+
% bytes OUTPUT. The INPUT is a result of GZIPENCODE function. The OUTPUT
8+
% is always an 1-by-N uint8 array. JAVA must be enabled to use the function.
9+
%
10+
% See also gzipencode typecast
11+
%
12+
% Copyright (c) 2012, Kota Yamaguchi
13+
% URL: https://www.mathworks.com/matlabcentral/fileexchange/39526-byte-encoding-utilities
14+
% License : BSD, see LICENSE_*.txt
15+
%
16+
17+
error(nargchk(1, 1, nargin));
18+
error(javachk('jvm'));
19+
if ischar(input)
20+
warning('gzipdecode:inputTypeMismatch', ...
21+
'Input is char, but treated as uint8.');
22+
input = uint8(input);
23+
end
24+
if ~isa(input, 'int8') && ~isa(input, 'uint8')
25+
error('Input must be either int8 or uint8.');
26+
end
27+
28+
gzip = java.util.zip.GZIPInputStream(java.io.ByteArrayInputStream(input));
29+
buffer = java.io.ByteArrayOutputStream();
30+
org.apache.commons.io.IOUtils.copy(gzip, buffer);
31+
gzip.close();
32+
output = typecast(buffer.toByteArray(), 'uint8')';
33+
34+
end

gzipencode.m

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
function output = gzipencode(input)
2+
%GZIPENCODE Compress input bytes with GZIP.
3+
%
4+
% output = gzipencode(input)
5+
%
6+
% The function takes a char, int8, or uint8 array INPUT and returns
7+
% compressed bytes OUTPUT as a uint8 array. Note that the compression
8+
% doesn't preserve input dimensions. JAVA must be enabled to use the
9+
% function.
10+
%
11+
% See also gzipdecode typecast
12+
%
13+
% Copyright (c) 2012, Kota Yamaguchi
14+
% URL: https://www.mathworks.com/matlabcentral/fileexchange/39526-byte-encoding-utilities
15+
% License : BSD, see LICENSE_*.txt
16+
%
17+
18+
error(nargchk(1, 1, nargin));
19+
error(javachk('jvm'));
20+
if ischar(input), input = uint8(input); end
21+
if ~isa(input, 'int8') && ~isa(input, 'uint8')
22+
error('Input must be either char, int8 or uint8.');
23+
end
24+
25+
buffer = java.io.ByteArrayOutputStream();
26+
gzip = java.util.zip.GZIPOutputStream(buffer);
27+
gzip.write(input, 0, numel(input));
28+
gzip.close();
29+
output = typecast(buffer.toByteArray(), 'uint8')';
30+
31+
end
32+

loadubjson.m

+5-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
% the "name" tag is treated as a string. To load
3232
% these UBJSON data, you need to manually set this
3333
% flag to 1.
34+
% opt.Compression 'zlib' or 'gzip': specify array compression
35+
% method; currently only support 'gzip' or 'zlib'.
36+
% opt.CompressArraySize [0|int]: only compress arrays with a total
37+
% element count larger than this number.
3438
%
3539
% output:
3640
% dat: a cell array, where {...} blocks are converted into cell arrays,
@@ -133,7 +137,7 @@
133137
parse_char('}');
134138
end
135139
if(isstruct(object))
136-
object=struct2jdata(object);
140+
object=struct2jdata(object,'Base64',0);
137141
end
138142

139143
%%-------------------------------------------------------------------------

savejson.m

+79-18
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@
6767
% back to the string form
6868
% opt.SaveBinary [0|1]: 1 - save the JSON file in binary mode; 0 - text mode.
6969
% opt.Compact [0|1]: 1- out compact JSON format (remove all newlines and tabs)
70-
%
70+
% opt.Compression 'zlib' or 'gzip': specify array compression
71+
% method; currently only support 'gzip' or 'zlib'.
72+
% opt.CompressArraySize [0|int]: only compress arrays with a total
73+
% element count larger than this number.
7174
% opt can be replaced by a list of ('param',value) pairs. The param
7275
% string is equivallent to a field in opt and is case sensitive.
7376
% output:
@@ -105,6 +108,21 @@
105108
opt=varargin2struct(varargin{:});
106109
end
107110
opt.IsOctave=exist('OCTAVE_VERSION','builtin');
111+
112+
dozip=jsonopt('Compression','',opt);
113+
if(~opt.IsOctave && ~isempty(dozip))
114+
if(~(strcmpi(dozip,'gzip') || strcmpi(dozip,'zlib')))
115+
error('compression method "%s" is not supported',dozip);
116+
end
117+
try
118+
error(javachk('jvm'));
119+
matlab.net.base64decode('test');
120+
catch
121+
error('java-based compression is not supported');
122+
end
123+
opt.Compression=dozip;
124+
end
125+
108126
if(isfield(opt,'norowbracket'))
109127
warning('Option ''NoRowBracket'' is depreciated, please use ''SingletArray'' and set its value to not(NoRowBracket)');
110128
if(~isfield(opt,'singletarray'))
@@ -370,8 +388,11 @@
370388
nl=ws.newline;
371389
sep=ws.sep;
372390

391+
dozip=jsonopt('Compression','',varargin{:});
392+
zipsize=jsonopt('CompressArraySize',0,varargin{:});
393+
373394
if(length(size(item))>2 || issparse(item) || ~isreal(item) || ...
374-
(isempty(item) && any(size(item))) ||jsonopt('ArrayToStruct',0,varargin{:}))
395+
(isempty(item) && any(size(item))) ||jsonopt('ArrayToStruct',0,varargin{:}) || (~isempty(dozip) && numel(item)>zipsize))
375396
if(isempty(name))
376397
txt=sprintf('%s{%s%s"_ArrayType_": "%s",%s%s"_ArraySize_": %s,%s',...
377398
padding1,nl,padding0,class(item),nl,padding0,regexprep(mat2str(size(item)),'\s+',','),nl);
@@ -411,27 +432,67 @@
411432
txt=sprintf(dataformat,txt,padding0,'"_ArrayIsComplex_": ','1', sep);
412433
end
413434
txt=sprintf(dataformat,txt,padding0,'"_ArrayIsSparse_": ','1', sep);
414-
if(size(item,1)==1)
415-
% Row vector, store only column indices.
416-
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
417-
matdata2json([iy(:),data'],level+2,varargin{:}), nl);
418-
elseif(size(item,2)==1)
419-
% Column vector, store only row indices.
420-
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
421-
matdata2json([ix,data],level+2,varargin{:}), nl);
435+
if(~isempty(dozip) && numel(data*2)>zipsize)
436+
if(size(item,1)==1)
437+
% Row vector, store only column indices.
438+
fulldata=[iy(:),data'];
439+
elseif(size(item,2)==1)
440+
% Column vector, store only row indices.
441+
fulldata=[ix,data];
442+
else
443+
% General case, store row and column indices.
444+
fulldata=[ix,iy,data];
445+
end
446+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressionSize_": ',regexprep(mat2str(size(fulldata)),'\s+',','), sep);
447+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressionMethod_": "',dozip, ['"' sep]);
448+
if(strcmpi(dozip,'gzip'))
449+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressedData_": "',base64encode(gzipencode(typecast(fulldata(:),'uint8'))),['"' nl]);
450+
elseif(strcmpi(dozip,'zlib'))
451+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressedData_": "',base64encode(zlibencode(typecast(fulldata(:),'uint8'))),['"' nl]);
452+
else
453+
error('compression method not supported');
454+
end
422455
else
423-
% General case, store row and column indices.
424-
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
425-
matdata2json([ix,iy,data],level+2,varargin{:}), nl);
456+
if(size(item,1)==1)
457+
% Row vector, store only column indices.
458+
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
459+
matdata2json([iy(:),data'],level+2,varargin{:}), nl);
460+
elseif(size(item,2)==1)
461+
% Column vector, store only row indices.
462+
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
463+
matdata2json([ix,data],level+2,varargin{:}), nl);
464+
else
465+
% General case, store row and column indices.
466+
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
467+
matdata2json([ix,iy,data],level+2,varargin{:}), nl);
468+
end
426469
end
427470
else
428-
if(isreal(item))
429-
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
430-
matdata2json(item(:)',level+2,varargin{:}), nl);
471+
if(~isempty(dozip) && numel(item)>zipsize)
472+
if(isreal(item))
473+
fulldata=item(:)';
474+
else
475+
txt=sprintf(dataformat,txt,padding0,'"_ArrayIsComplex_": ','1', sep);
476+
fulldata=[real(item(:)) imag(item(:))];
477+
end
478+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressionSize_": ',regexprep(mat2str(size(fulldata)),'\s+',','), sep);
479+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressionMethod_": "',dozip, ['"' sep]);
480+
if(strcmpi(dozip,'gzip'))
481+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressedData_": "',base64encode(gzipencode(typecast(fulldata(:),'uint8'))),['"' nl]);
482+
elseif(strcmpi(dozip,'zlib'))
483+
txt=sprintf(dataformat,txt,padding0,'"_ArrayCompressedData_": "',base64encode(zlibencode(typecast(fulldata(:),'uint8'))),['"' nl]);
484+
else
485+
error('compression method not supported');
486+
end
431487
else
432-
txt=sprintf(dataformat,txt,padding0,'"_ArrayIsComplex_": ','1', sep);
433-
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
488+
if(isreal(item))
489+
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
490+
matdata2json(item(:)',level+2,varargin{:}), nl);
491+
else
492+
txt=sprintf(dataformat,txt,padding0,'"_ArrayIsComplex_": ','1', sep);
493+
txt=sprintf(dataformat,txt,padding0,'"_ArrayData_": ',...
434494
matdata2json([real(item(:)) imag(item(:))],level+2,varargin{:}), nl);
495+
end
435496
end
436497
end
437498
txt=sprintf('%s%s%s',txt,padding1,'}');

0 commit comments

Comments
 (0)