Skip to content

Commit d69686d

Browse files
committed
[feat] add jdlink to dynamically download and cache linked data
1 parent 772a1ef commit d69686d

File tree

6 files changed

+141
-30
lines changed

6 files changed

+141
-30
lines changed

Contents.m

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,25 @@
33
% Files
44
% base64decode - output = base64decode(input)
55
% base64encode - output = base64encode(input)
6+
% blosc2decode - output = blosc2decode(input)
7+
% blosc2encode - output = blosc2encode(input)
68
% decodevarname - newname = decodevarname(name)
79
% encodevarname - newname = encodevarname(name)
810
% fast_match_bracket - [endpos, maxlevel] = fast_match_bracket(key,pos,startpos,brackets)
911
% filterjsonmmap - mmap=filterjsonmmap(mmap, patterns, isinclude)
10-
% jsoncache - [cachepath, filename]=jsoncache(hyperlink)
11-
% jsonpath - obj=jsonpath(root, jsonpath)
1212
% gzipdecode - output = gzipdecode(input)
1313
% gzipencode - output = gzipencode(input)
1414
% isoctavemesh - [isoctave verinfo]=isoctavemesh
1515
% jdatadecode - newdata=jdatadecode(data,opt,...)
1616
% jdataencode - jdata=jdataencode(data)
17+
% jdatahash.m - key = jdatahash(data, algorithm)
18+
% jdlink - data = jdlink(uripath)
1719
% jload - jload
1820
% jsave - jsave
21+
% jsoncache - [cachepath, filename]=jsoncache(hyperlink)
1922
% jsonget - json=jsonget(fname,mmap,'$.jsonpath1','$.jsonpath2',...)
2023
% jsonopt - val=jsonopt(key,default,optstruct)
24+
% jsonpath - obj=jsonpath(root, jsonpath)
2125
% jsonset - json=jsonset(fname,mmap,'$.jsonpath1',newval1,'$.jsonpath2','newval2',...)
2226
% loadbj - data=loadbj(fname,opt)
2327
% loadjd - data=loadjd(inputfile)

jdatadecode.m

+5-22
Original file line numberDiff line numberDiff line change
@@ -477,29 +477,12 @@
477477
end
478478
if (~isempty(ref.path))
479479
uripath = [ref.proto ref.path];
480-
[cachepath, filename] = jsoncache(uripath);
481-
if (iscell(cachepath) && ~isempty(cachepath))
482-
rawdata = webread(uripath);
483-
fname = [cachepath{1} filesep filename];
484-
fpath = fileparts(fname);
485-
if (~exist(fpath, 'dir'))
486-
mkdir(fpath);
487-
end
488-
fid = fopen(fname, 'wb');
489-
if (fid == 0)
490-
error('can not save URL to cache at path %s', fname);
491-
end
492-
fwrite(fid, uint8(rawdata));
493-
fclose(fid);
494-
480+
[newdata, fname] = jdlink(uripath);
481+
if (exist(fname, 'file'))
495482
opt.maxlinklevel = opt.maxlinklevel - 1;
496-
newdata = loadjd(fname, opt);
497-
elseif (~iscell(cachepath) && exist(cachepath, 'file'))
498-
opt.maxlinklevel = opt.maxlinklevel - 1;
499-
newdata = loadjd(cachepath, opt);
500-
end
501-
if (~isempty(ref.jsonpath))
502-
newdata = jsonpath(newdata, ref.jsonpath);
483+
if (~isempty(ref.jsonpath))
484+
newdata = jsonpath(newdata, ref.jsonpath);
485+
end
503486
end
504487
end
505488
end

jdlink.m

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
function varargout = jdlink(uripath, varargin)
2+
%
3+
% data = jdlink(uripath)
4+
% or
5+
% [data, fname, cachepath] = jdlink(uripath, 'param1', value1, ...)
6+
%
7+
% Download linked data files from URLs and store those in cached folders
8+
%
9+
% author: Qianqian Fang (q.fang <at> neu.edu)
10+
%
11+
% input:
12+
% uripath: a single string or a cell array of strings, containing
13+
% the http:// or https:// links pointing to the linked
14+
% resources
15+
% 'param'/value pairs: (optional) additional options are supported,
16+
% including
17+
% showlink: [1]: print URL or cached file; 0 do not print.
18+
% showsize: [1]: print the total size of the linked files; 0 do not print.
19+
% regex: a regular expression that is used to filter the URL
20+
% cell array; only those matching the pattern are being
21+
% downloaded; this has no effect to a single URL input
22+
%
23+
% output:
24+
% data: a cell array storing the parsed data of each linked file
25+
% fname: a cell array listing the path to each locally cached files
26+
% cachepath: a cell array listing the cache search path orders
27+
%
28+
% examples:
29+
% data = loadjson('https://neurojson.io:7777/openneuro/ds000001');
30+
% anatfiles = jsonpath(data, '$..anat.._DataLink_');
31+
% data = jdlink(anatfiles, 'regex', 'sub-0[12].*\.nii');
32+
% jsonpath(data, '$..Dim')
33+
%
34+
% license:
35+
% BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details
36+
%
37+
% -- this function is part of JSONLab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab)
38+
%
39+
40+
opt = varargin2struct(varargin{:});
41+
opt.showlink = jsonopt('showlink', 1, opt);
42+
opt.showsize = jsonopt('showsize', 1, opt);
43+
44+
if (iscell(uripath))
45+
if (isfield(opt, 'regex'))
46+
haspattern = cellfun(@(x) isempty(regexp(x, opt.regex, 'once')), uripath);
47+
uripath(haspattern) = [];
48+
end
49+
if (isfield(opt, 'showsize'))
50+
totalsize = 0;
51+
nosize = 0;
52+
for i = 1:length(uripath)
53+
filesize = regexp(uripath{i}, '&size=(\d+)', 'tokens');
54+
if (~isempty(filesize) && ~isempty(filesize{1}))
55+
totalsize = totalsize + str2double(filesize{1});
56+
else
57+
nosize = nosize + 1;
58+
end
59+
end
60+
fprintf('total %d links, %.0f bytes, %d files with unknown size\n', length(uripath), totalsize, nosize);
61+
end
62+
alloutput = cell(1, nargout);
63+
for i = 1:length(uripath)
64+
[newdata, fname, cachepath] = downloadlink(uripath{i}, opt);
65+
if (nargout > 0)
66+
alloutput{1}{end + 1} = newdata;
67+
if (nargout > 1)
68+
alloutput{2}{end + 1} = fname;
69+
if (nargout > 2)
70+
alloutput{3}{end + 1} = cachepath;
71+
end
72+
end
73+
end
74+
end
75+
if (length(uripath) == 1)
76+
alloutput = cellfun(@(x) x{1}, alloutput, 'UniformOutput', false);
77+
end
78+
varargout = alloutput;
79+
elseif (ischar(uripath) || isa(uripath, 'string'))
80+
[varargout{1:nargout}] = downloadlink(uripath, opt);
81+
end
82+
83+
%%
84+
function [newdata, fname, cachepath] = downloadlink(uripath, opt)
85+
newdata = [];
86+
[cachepath, filename] = jsoncache(uripath);
87+
if (iscell(cachepath) && ~isempty(cachepath))
88+
if (opt.showlink)
89+
fprintf(1, 'downloading from URL: %s\n', uripath);
90+
end
91+
rawdata = webread(uripath);
92+
fname = [cachepath{1} filesep filename];
93+
fpath = fileparts(fname);
94+
if (~exist(fpath, 'dir'))
95+
mkdir(fpath);
96+
end
97+
fid = fopen(fname, 'wb');
98+
if (fid == 0)
99+
error('can not save URL to cache at path %s', fname);
100+
end
101+
fwrite(fid, uint8(rawdata));
102+
fclose(fid);
103+
newdata = loadjd(fname, opt);
104+
elseif (~iscell(cachepath) && exist(cachepath, 'file'))
105+
if (opt.showlink)
106+
fprintf(1, 'loading from cache: %s\n', cachepath);
107+
end
108+
fname = cachepath;
109+
newdata = loadjd(fname, opt);
110+
end

jsonlab.prj

+3-2
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,19 @@ Please note that data files produced by `saveubjson` may utilize a special "opti
106106
<file>${PROJECT_ROOT}/filterjsonmmap.m</file>
107107
<file>${PROJECT_ROOT}/gendocs.sh</file>
108108
<file>${PROJECT_ROOT}/genlog.sh</file>
109-
<file>${PROJECT_ROOT}/getfromjsonpath.m</file>
110109
<file>${PROJECT_ROOT}/gzipdecode.m</file>
111110
<file>${PROJECT_ROOT}/gzipencode.m</file>
112111
<file>${PROJECT_ROOT}/images</file>
113112
<file>${PROJECT_ROOT}/isoctavemesh.m</file>
114113
<file>${PROJECT_ROOT}/jdatadecode.m</file>
115114
<file>${PROJECT_ROOT}/jdataencode.m</file>
115+
<file>${PROJECT_ROOT}/jdlink.m</file>
116116
<file>${PROJECT_ROOT}/jload.m</file>
117117
<file>${PROJECT_ROOT}/jsave.m</file>
118+
<file>${PROJECT_ROOT}/jsoncache.m</file>
118119
<file>${PROJECT_ROOT}/jsonget.m</file>
119120
<file>${PROJECT_ROOT}/jsonopt.m</file>
121+
<file>${PROJECT_ROOT}/jsonpath.m</file>
120122
<file>${PROJECT_ROOT}/jsonset.m</file>
121123
<file>${PROJECT_ROOT}/loadbj.m</file>
122124
<file>${PROJECT_ROOT}/loadjd.m</file>
@@ -140,7 +142,6 @@ Please note that data files produced by `saveubjson` may utilize a special "opti
140142
<file>${PROJECT_ROOT}/savejson.m</file>
141143
<file>${PROJECT_ROOT}/savemsgpack.m</file>
142144
<file>${PROJECT_ROOT}/saveubjson.m</file>
143-
<file>${PROJECT_ROOT}/test</file>
144145
<file>${PROJECT_ROOT}/varargin2struct.m</file>
145146
<file>${PROJECT_ROOT}/zlibdecode.m</file>
146147
<file>${PROJECT_ROOT}/zlibencode.m</file>

jsonpath.m

+15-3
Original file line numberDiff line numberDiff line change
@@ -131,14 +131,26 @@
131131
end
132132
end
133133
if (~exist('obj', 'var') && deepscan)
134-
items = fieldnames(input);
134+
if (isa(input, 'containers.Map'))
135+
items = keys(input);
136+
else
137+
items = fieldnames(input);
138+
end
135139
for idx = 1:length(items)
136-
[val, isfound] = getonelevel(input.(items{idx}), [paths{1:pathid - 1} {['..' pathname]}], pathid);
140+
if (isa(input, 'containers.Map'))
141+
[val, isfound] = getonelevel(input(items{idx}), [paths{1:pathid - 1} {['..' pathname]}], pathid);
142+
else
143+
[val, isfound] = getonelevel(input.(items{idx}), [paths{1:pathid - 1} {['..' pathname]}], pathid);
144+
end
137145
if (isfound)
138146
if (~exist('obj', 'var'))
139147
obj = {};
140148
end
141-
obj = [obj(:)', val(:)'];
149+
if (iscell(val))
150+
obj = [obj(:)', val(:)'];
151+
else
152+
obj = [obj(:)', {val}];
153+
end
142154
end
143155
end
144156
if (exist('obj', 'var') && length(obj) == 1)

loadjd.m

+2-1
Original file line numberDiff line numberDiff line change
@@ -77,5 +77,6 @@
7777
elseif (regexpi(filename, '\.mat$|\.bvec$|\.bval$'))
7878
[varargout{1:nargout}] = load(filename, varargin{:});
7979
else
80-
error('file suffix must be one of .json,.jnii,.jdt,.jmsh,.jnirs,.jbids,.bjd,.bnii,.jdb,.bmsh,.bnirs,.ubj,.msgpack,.h5,.hdf5,.snirf,.pmat,.nwb,.nii,.nii.gz,.tsv,.tsv.gz,.csv,.csv.gz,.mat,.bvec,.bval');
80+
warning('only support parsing .json,.jnii,.jdt,.jmsh,.jnirs,.jbids,.bjd,.bnii,.jdb,.bmsh,.bnirs,.ubj,.msgpack,.h5,.hdf5,.snirf,.pmat,.nwb,.nii,.nii.gz,.tsv,.tsv.gz,.csv,.csv.gz,.mat,.bvec,.bval; load unparsed raw data');
81+
[varargout{1:nargout}] = fileread(filename);
8182
end

0 commit comments

Comments
 (0)