|
| 1 | +function varargout = jdlink(uripath, varargin) |
| 2 | +% |
| 3 | +% data = jdlink(uripath) |
| 4 | +% or |
| 5 | +% [data, fname, cachepath] = jdlink(uripath, 'param1', value1, ...) |
| 6 | +% |
| 7 | +% Download linked data files from URLs and store those in cached folders |
| 8 | +% |
| 9 | +% author: Qianqian Fang (q.fang <at> neu.edu) |
| 10 | +% |
| 11 | +% input: |
| 12 | +% uripath: a single string or a cell array of strings, containing |
| 13 | +% the http:// or https:// links pointing to the linked |
| 14 | +% resources |
| 15 | +% 'param'/value pairs: (optional) additional options are supported, |
| 16 | +% including |
| 17 | +% showlink: [1]: print URL or cached file; 0 do not print. |
| 18 | +% showsize: [1]: print the total size of the linked files; 0 do not print. |
| 19 | +% regex: a regular expression that is used to filter the URL |
| 20 | +% cell array; only those matching the pattern are being |
| 21 | +% downloaded; this has no effect to a single URL input |
| 22 | +% |
| 23 | +% output: |
| 24 | +% data: a cell array storing the parsed data of each linked file |
| 25 | +% fname: a cell array listing the path to each locally cached files |
| 26 | +% cachepath: a cell array listing the cache search path orders |
| 27 | +% |
| 28 | +% examples: |
| 29 | +% data = loadjson('https://neurojson.io:7777/openneuro/ds000001'); |
| 30 | +% anatfiles = jsonpath(data, '$..anat.._DataLink_'); |
| 31 | +% data = jdlink(anatfiles, 'regex', 'sub-0[12].*\.nii'); |
| 32 | +% jsonpath(data, '$..Dim') |
| 33 | +% |
| 34 | +% license: |
| 35 | +% BSD or GPL version 3, see LICENSE_{BSD,GPLv3}.txt files for details |
| 36 | +% |
| 37 | +% -- this function is part of JSONLab toolbox (http://iso2mesh.sf.net/cgi-bin/index.cgi?jsonlab) |
| 38 | +% |
| 39 | + |
| 40 | +opt = varargin2struct(varargin{:}); |
| 41 | +opt.showlink = jsonopt('showlink', 1, opt); |
| 42 | +opt.showsize = jsonopt('showsize', 1, opt); |
| 43 | + |
| 44 | +if (iscell(uripath)) |
| 45 | + if (isfield(opt, 'regex')) |
| 46 | + haspattern = cellfun(@(x) isempty(regexp(x, opt.regex, 'once')), uripath); |
| 47 | + uripath(haspattern) = []; |
| 48 | + end |
| 49 | + if (isfield(opt, 'showsize')) |
| 50 | + totalsize = 0; |
| 51 | + nosize = 0; |
| 52 | + for i = 1:length(uripath) |
| 53 | + filesize = regexp(uripath{i}, '&size=(\d+)', 'tokens'); |
| 54 | + if (~isempty(filesize) && ~isempty(filesize{1})) |
| 55 | + totalsize = totalsize + str2double(filesize{1}); |
| 56 | + else |
| 57 | + nosize = nosize + 1; |
| 58 | + end |
| 59 | + end |
| 60 | + fprintf('total %d links, %.0f bytes, %d files with unknown size\n', length(uripath), totalsize, nosize); |
| 61 | + end |
| 62 | + alloutput = cell(1, nargout); |
| 63 | + for i = 1:length(uripath) |
| 64 | + [newdata, fname, cachepath] = downloadlink(uripath{i}, opt); |
| 65 | + if (nargout > 0) |
| 66 | + alloutput{1}{end + 1} = newdata; |
| 67 | + if (nargout > 1) |
| 68 | + alloutput{2}{end + 1} = fname; |
| 69 | + if (nargout > 2) |
| 70 | + alloutput{3}{end + 1} = cachepath; |
| 71 | + end |
| 72 | + end |
| 73 | + end |
| 74 | + end |
| 75 | + if (length(uripath) == 1) |
| 76 | + alloutput = cellfun(@(x) x{1}, alloutput, 'UniformOutput', false); |
| 77 | + end |
| 78 | + varargout = alloutput; |
| 79 | +elseif (ischar(uripath) || isa(uripath, 'string')) |
| 80 | + [varargout{1:nargout}] = downloadlink(uripath, opt); |
| 81 | +end |
| 82 | + |
| 83 | +%% |
| 84 | +function [newdata, fname, cachepath] = downloadlink(uripath, opt) |
| 85 | +newdata = []; |
| 86 | +[cachepath, filename] = jsoncache(uripath); |
| 87 | +if (iscell(cachepath) && ~isempty(cachepath)) |
| 88 | + if (opt.showlink) |
| 89 | + fprintf(1, 'downloading from URL: %s\n', uripath); |
| 90 | + end |
| 91 | + rawdata = webread(uripath); |
| 92 | + fname = [cachepath{1} filesep filename]; |
| 93 | + fpath = fileparts(fname); |
| 94 | + if (~exist(fpath, 'dir')) |
| 95 | + mkdir(fpath); |
| 96 | + end |
| 97 | + fid = fopen(fname, 'wb'); |
| 98 | + if (fid == 0) |
| 99 | + error('can not save URL to cache at path %s', fname); |
| 100 | + end |
| 101 | + fwrite(fid, uint8(rawdata)); |
| 102 | + fclose(fid); |
| 103 | + newdata = loadjd(fname, opt); |
| 104 | +elseif (~iscell(cachepath) && exist(cachepath, 'file')) |
| 105 | + if (opt.showlink) |
| 106 | + fprintf(1, 'loading from cache: %s\n', cachepath); |
| 107 | + end |
| 108 | + fname = cachepath; |
| 109 | + newdata = loadjd(fname, opt); |
| 110 | +end |
0 commit comments