Skip to content

Commit 5a90a26

Browse files
aacicpre-commit-ci[bot]shaneahmed
authored
Add FsspecJsonWSIReader class. (#897)
The `FsspecJsonWSIReader` reads fsspec json file which represents SVS or TIFF whole slide image. The images are accessible by HTTP range requests, eg: `https://api.gdc.cancer.gov/data/73c69d24-6f9e-44e2-bfe5-a608d4cf5c27` The whole image can be downloaded like: `curl -C - -o TCGA-22-1017-01Z-00-DX1.9562FE79-A261-42D3-B394-F3E0E2FF7DDA.svs https://api.gdc.cancer.gov/data/73c69d24-6f9e-44e2-bfe5-a608d4cf5c27` The `FsspecJsonWSIReader` class has a `_zarr_store` field which is created by reading json file using `fsspec`: ``` mapper = fsspec.get_mapper( "reference://", fo=str(input_img), target_protocol="file" ) self._zarr_array = zarr.open(mapper, mode="r") self._zarr_store = self._zarr_array.store self._zarr_lru_cache = zarr.LRUStoreCache(self._zarr_store, max_size=cache_size) self._zarr_group = zarr.open(self._zarr_lru_cache) ``` This is equivalent to `TIFFWSIReader` code: ``` self._zarr_store = tifffile.imread( self.input_path, series=self.series_n, aszarr=True, ) self._zarr_lru_cache = zarr.LRUStoreCache(self._zarr_store, max_size=cache_size) self._zarr_group = zarr.open(self._zarr_lru_cache) ``` Both FsspecJsonWSIReader and TIFFWSIReader forward calls to `read_bounds` and `read_rect` methods of the`TIFFWSIReaderDelegate` delegate instance. The method `_info` of the`TIFFWSIReaderDelegate` reads SVS metadata which is stored in the root group metadata like: ``` { ".zattrs": { "multiscales": [ { "metadata": { "objective_power": 40, "vendor": "Aperio", "mpp": [0.2525, 0.2525] } } ] } } ``` To test, execute from the root dir: ``` pip install -r requirements/requirements_dev.txt mkdir -p samples/slides mkdir -p samples/fsspec cd samples/slides curl -C - -o TCGA-22-1017-01Z-00-DX1.9562FE79-A261-42D3-B394-F3E0E2FF7DDA.svs https://api.gdc.cancer.gov/data/73c69d24-6f9e-44e2-bfe5-a608d4cf5c27 cd ../../ cp tiatoolbox/utils/tiff_to_fsspec.py . python tiff_to_fsspec.py "samples/slides/TCGA-22-1017-01Z-00-DX1.9562FE79-A261-42D3-B394-F3E0E2FF7DDA.svs" "samples/fsspec/73c69d24-6f9e-44e2-bfe5-a608d4cf5c27_fsspec.json" "https://api.gdc.cancer.gov/data/73c69d24-6f9e-44e2-bfe5-a608d4cf5c27" ``` Create `tileserver.py` inside of the project root: ``` from flask_cors import CORS from tiatoolbox.visualization import TileServer from tiatoolbox.wsicore.wsireader import FsspecJsonWSIReader wsi = FsspecJsonWSIReader.open( "./samples/fsspec/73c69d24-6f9e-44e2-bfe5-a608d4cf5c27_fsspec.json" ) # Initialize and run the TileServer tile_server = TileServer( title="Tiatoolbox TileServer", layers={"layer": wsi}, ) CORS(tile_server, send_wildcard=True) tile_server.run(host="127.0.0.1", port=5000) ``` Open `http://127.0.0.1:5000/` and verify that it works. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Shan E Ahmed Raza <[email protected]>
1 parent 2416ba9 commit 5a90a26

File tree

4 files changed

+772
-180
lines changed

4 files changed

+772
-180
lines changed

requirements/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# torch installation
22
--extra-index-url https://download.pytorch.org/whl/cu118; sys_platform != "darwin"
3+
aiohttp>=3.8.1
34
albumentations>=1.3.0
45
bokeh>=3.1.1, <3.6.0
56
Click>=8.1.3

tests/test_wsireader.py

Lines changed: 170 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,8 @@
99
import shutil
1010
from copy import deepcopy
1111
from pathlib import Path
12-
13-
# When no longer supporting Python <3.9 this should be collections.abc.Iterable
1412
from typing import TYPE_CHECKING, Callable
13+
from unittest.mock import patch
1514

1615
import cv2
1716
import glymur
@@ -27,7 +26,7 @@
2726

2827
from tiatoolbox import cli, utils
2928
from tiatoolbox.annotation import SQLiteStore
30-
from tiatoolbox.utils import imread
29+
from tiatoolbox.utils import imread, tiff_to_fsspec
3130
from tiatoolbox.utils.exceptions import FileNotSupportedError
3231
from tiatoolbox.utils.magic import is_sqlite3
3332
from tiatoolbox.utils.transforms import imresize, locsize2bounds
@@ -37,6 +36,7 @@
3736
AnnotationStoreReader,
3837
ArrayView,
3938
DICOMWSIReader,
39+
FsspecJsonWSIReader,
4040
JP2WSIReader,
4141
NGFFWSIReader,
4242
OpenSlideWSIReader,
@@ -221,6 +221,43 @@ def read_bounds_level_consistency(wsi: WSIReader, bounds: IntBounds) -> None:
221221
# Utility Test Classes & Functions
222222
# -------------------------------------------------------------------------------------
223223

224+
_FSSPEC_WSI_CACHE = {}
225+
226+
227+
def fsspec_wsi(sample_svs: Path, tmp_path: Path) -> FsspecJsonWSIReader:
228+
"""Returns cached FsspecJsonWSIReader instance.
229+
230+
The reader instance opens CMU-1-Small-Region.svs image.
231+
232+
It's cached so the reader can be reused,
233+
234+
since loading the whole image using HTTP range requests from:
235+
236+
https://tiatoolbox.dcs.warwick.ac.uk/sample_wsis/CMU-1-Small-Region.svs
237+
238+
takes about 20 seconds.
239+
240+
"""
241+
cache_key = "sample_svs"
242+
243+
if cache_key in _FSSPEC_WSI_CACHE:
244+
return _FSSPEC_WSI_CACHE[cache_key] # Return cached instance
245+
246+
file_types = ("*.svs",)
247+
files_all = utils.misc.grab_files_from_dir(
248+
input_path=Path(sample_svs).parent,
249+
file_types=file_types,
250+
)
251+
svs_file_path = str(files_all[0])
252+
json_file_path = str(tmp_path / "fsspec.json")
253+
final_url = (
254+
"https://tiatoolbox.dcs.warwick.ac.uk/sample_wsis/CMU-1-Small-Region.svs"
255+
)
256+
tiff_to_fsspec.main(svs_file_path, json_file_path, final_url)
257+
258+
_FSSPEC_WSI_CACHE[cache_key] = wsireader.FsspecJsonWSIReader(json_file_path)
259+
return _FSSPEC_WSI_CACHE[cache_key]
260+
224261

225262
class DummyMutableOpenSlideObject:
226263
"""Dummy OpenSlide object with mutable properties."""
@@ -2812,3 +2849,133 @@ def test_read_multi_channel(source_image: Path) -> None:
28122849
assert region.shape == (100, 50, (new_img_array.shape[-1]))
28132850
assert np.abs(np.median(region.astype(int) - target.astype(int))) == 0
28142851
assert np.abs(np.mean(region.astype(int) - target.astype(int))) < 0.2
2852+
2853+
2854+
def test_fsspec_json_wsi_reader_instantiation() -> None:
2855+
"""Test if FsspecJsonWSIReader is instantiated.
2856+
2857+
In case json is passed to WSIReader.open, FsspecJsonWSIReader
2858+
should be instantiated.
2859+
"""
2860+
input_path = "mock_path.json"
2861+
mpp = None
2862+
power = None
2863+
2864+
with (
2865+
patch(
2866+
"tiatoolbox.wsicore.wsireader.FsspecJsonWSIReader.is_valid_zarr_fsspec",
2867+
return_value=True,
2868+
),
2869+
patch("tiatoolbox.wsicore.wsireader.FsspecJsonWSIReader") as mock_reader,
2870+
):
2871+
WSIReader.open(input_path, mpp, power)
2872+
mock_reader.assert_called_once_with(input_path, mpp=mpp, power=power)
2873+
2874+
2875+
def test_generate_fsspec_json_file_and_validate(
2876+
sample_svs: Path, tmp_path: Path
2877+
) -> None:
2878+
"""Test generate fsspec json file and validate it."""
2879+
file_types = ("*.svs",)
2880+
2881+
files_all = utils.misc.grab_files_from_dir(
2882+
input_path=Path(sample_svs).parent,
2883+
file_types=file_types,
2884+
)
2885+
2886+
svs_file_path = str(files_all[0])
2887+
json_file_path = str(tmp_path / "fsspec.json")
2888+
final_url = "https://example.com/some_id"
2889+
2890+
tiff_to_fsspec.main(svs_file_path, json_file_path, final_url)
2891+
2892+
assert Path(json_file_path).exists(), "Output JSON file was not created."
2893+
2894+
assert FsspecJsonWSIReader.is_valid_zarr_fsspec(json_file_path), (
2895+
"FSSPEC JSON file is invalid."
2896+
)
2897+
2898+
2899+
def test_fsspec_wsireader_info_read(sample_svs: Path, tmp_path: Path) -> None:
2900+
"""Test info read of the FsspecJsonWSIReader.
2901+
2902+
Generate fsspec json file and load image from:
2903+
2904+
https://tiatoolbox.dcs.warwick.ac.uk/sample_wsis/CMU-1-Small-Region.svs
2905+
2906+
"""
2907+
wsi = fsspec_wsi(sample_svs, tmp_path)
2908+
info = wsi.info
2909+
2910+
assert info is not None, "info should not be None."
2911+
2912+
2913+
def test_read_bounds_fsspec_reader_baseline(sample_svs: Path, tmp_path: Path) -> None:
2914+
"""Test FsspecJsonWSIReader read bounds at baseline.
2915+
2916+
Location coordinate is in baseline (level 0) reference frame.
2917+
2918+
"""
2919+
wsi = fsspec_wsi(sample_svs, tmp_path)
2920+
2921+
bounds = SVS_TEST_TISSUE_BOUNDS
2922+
size = SVS_TEST_TISSUE_SIZE
2923+
im_region = wsi.read_bounds(bounds, resolution=0, units="level")
2924+
2925+
assert isinstance(im_region, np.ndarray)
2926+
assert im_region.dtype == "uint8"
2927+
assert im_region.shape == (*size[::-1], 3)
2928+
2929+
2930+
def test_read_rect_fsspec_reader_baseline(sample_svs: Path, tmp_path: Path) -> None:
2931+
"""Test FsspecJsonWSIReader read rect at baseline.
2932+
2933+
Location coordinate is in baseline (level 0) reference frame.
2934+
2935+
"""
2936+
wsi = fsspec_wsi(sample_svs, tmp_path)
2937+
2938+
location = SVS_TEST_TISSUE_LOCATION
2939+
size = SVS_TEST_TISSUE_SIZE
2940+
im_region = wsi.read_rect(location, size, resolution=0, units="level")
2941+
2942+
assert isinstance(im_region, np.ndarray)
2943+
assert im_region.dtype == "uint8"
2944+
assert im_region.shape == (*size[::-1], 3)
2945+
2946+
2947+
def test_fsspec_reader_open_invalid_json_file(tmp_path: Path) -> None:
2948+
"""Ensure JSONDecodeError is handled properly.
2949+
2950+
Pass invalid JSON to FsspecJsonWSIReader.is_valid_zarr_fsspec.
2951+
"""
2952+
json_path = tmp_path / "invalid.json"
2953+
json_path.write_text("{invalid json}") # Corrupt JSON
2954+
2955+
assert not FsspecJsonWSIReader.is_valid_zarr_fsspec(str(json_path))
2956+
2957+
2958+
def test_fsspec_reader_open_oserror_handling() -> None:
2959+
"""Ensure OSError is handled properly.
2960+
2961+
Pass non existent JSON to FsspecJsonWSIReader.is_valid_zarr_fsspec.
2962+
2963+
"""
2964+
with patch("builtins.open", side_effect=OSError("File not found")):
2965+
result = FsspecJsonWSIReader.is_valid_zarr_fsspec("non_existent.json")
2966+
2967+
assert result is False, "Function should return False for OSError"
2968+
2969+
2970+
def test_fsspec_reader_open_pass_empty_json(tmp_path: Path) -> None:
2971+
"""Ensure empty JSON is handled properly.
2972+
2973+
Pass empty JSON to FsspecJsonWSIReader.is_valid_zarr_fsspec and
2974+
2975+
verify that it's not valid.
2976+
2977+
"""
2978+
json_path = tmp_path / "empty.json"
2979+
json_path.write_text("{}")
2980+
2981+
assert not FsspecJsonWSIReader.is_valid_zarr_fsspec(str(json_path))

tiatoolbox/utils/tiff_to_fsspec.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""Module for processing SVS metadata and generating fsspec zarr JSON file.
2+
3+
The fsspec zarr json file is meant to be used in case SVS or TIFF files
4+
can be accessed using byte range HTTP API.
5+
6+
The fsspec zarr json file can be opened using FsspecJsonWSIReader.
7+
8+
"""
9+
10+
from __future__ import annotations
11+
12+
import json
13+
import sys
14+
from datetime import datetime
15+
from pathlib import Path
16+
from typing import Any
17+
18+
from tifffile import TiffFile, tiff2fsspec
19+
20+
from tiatoolbox.wsicore.wsireader import TIFFWSIReaderDelegate
21+
22+
# Constants
23+
EXPECTED_KEY_VALUE_PAIRS = 2
24+
EXPECTED_ARG_COUNT = 4
25+
URL_PLACEHOLDER = "https://replace.me/"
26+
27+
28+
def convert_metadata(metadata: dict) -> dict:
29+
"""Convert metadata to JSON-compatible format."""
30+
if isinstance(metadata, dict):
31+
return {key: convert_metadata(value) for key, value in metadata.items()}
32+
if isinstance(metadata, list):
33+
return [convert_metadata(item) for item in metadata]
34+
if isinstance(metadata, datetime):
35+
return metadata.isoformat() # Convert datetime to ISO 8601 string
36+
return metadata
37+
38+
39+
def replace_url(
40+
data: dict[str, Any], output_path: Path, old_url: str, new_url: str
41+
) -> None:
42+
"""Replace URL in the JSON file."""
43+
for value in data.values():
44+
if isinstance(value, list) and value[0] == old_url:
45+
value[0] = new_url
46+
47+
with output_path.open("w") as json_file:
48+
json.dump(data, json_file, indent=2)
49+
50+
51+
def main(svs_file_path: str, json_file_path: str, final_url: str) -> None:
52+
"""Main function to process an SVS file.
53+
54+
Args:
55+
svs_file_path (str): The local file path of the SVS file to be processed.
56+
json_file_path (str): The file path where the output JSON will be saved.
57+
final_url (str): The URL where the SVS file is stored online
58+
and can be accessed via HTTP byte range API.
59+
60+
Example:
61+
main('/path/to/CMU-1-Small-Region.svs', '/path/to/CMU-1-Small-Region.json', 'https://tiatoolbox.dcs.warwick.ac.uk/sample_wsis/CMU-1-Small-Region.svs')
62+
63+
"""
64+
url_to_replace = f"{URL_PLACEHOLDER}{Path(svs_file_path).name}"
65+
66+
tiff = TiffFile(svs_file_path)
67+
68+
tiff_file_pages = tiff.pages
69+
70+
# Generate fsspec JSON
71+
tiff2fsspec(svs_file_path, url=URL_PLACEHOLDER, out=json_file_path)
72+
73+
if tiff.is_svs:
74+
metadata = TIFFWSIReaderDelegate.parse_svs_metadata(tiff_file_pages)
75+
else: # pragma: no cover
76+
metadata = TIFFWSIReaderDelegate.parse_generic_tiff_metadata(tiff_file_pages)
77+
78+
# Convert metadata to JSON-compatible format
79+
metadata_serializable = convert_metadata(metadata)
80+
81+
# Read the JSON data from the file
82+
json_path = Path(json_file_path)
83+
with json_path.open() as file:
84+
json_data = json.load(file)
85+
86+
# Decode `.zattrs` JSON string into a dictionary
87+
zattrs = json.loads(json_data[".zattrs"])
88+
89+
# Ensure "multiscales" exists and is a list
90+
if "multiscales" not in zattrs or not isinstance(
91+
zattrs["multiscales"], list
92+
): # pragma: no cover
93+
zattrs["multiscales"] = [{}] # Initialize as a list with an empty dictionary
94+
95+
# Update metadata into `.zattrs`
96+
zattrs["multiscales"][0]["metadata"] = metadata_serializable
97+
98+
# Convert back to a JSON string
99+
json_data[".zattrs"] = json.dumps(zattrs)
100+
101+
# Replace URLs in the JSON file
102+
replace_url(json_data, json_path, url_to_replace, final_url)
103+
104+
105+
if __name__ == "__main__":
106+
if len(sys.argv) != EXPECTED_ARG_COUNT:
107+
msg = " Usage: python script.py <svs_file_path> <json_file_path> <final_url>"
108+
raise ValueError(msg)
109+
110+
main(sys.argv[1], sys.argv[2], sys.argv[3])

0 commit comments

Comments
 (0)