Skip to content

Commit 6ec1083

Browse files
John-Pshaneahmed
andauthored
🩹 BUG: Fix Parsing Missing Omero Version NGFF Metadata (#568)
- [x] Handle missing version keys and null values. - [x] Add test for missing value - [x] Add test for non-numeric values e.g. "0.5-dev" [see current spec doc](https://ngff.openmicroscopy.org/latest/#omero-md) - [x] Warn if version < or > 0.4 - [x] Warn for inconsistent versions --------- Co-authored-by: Shan E Ahmed Raza <[email protected]>
1 parent 09bd197 commit 6ec1083

File tree

3 files changed

+232
-9
lines changed

3 files changed

+232
-9
lines changed

tests/test_wsireader.py

Lines changed: 172 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
"""Tests for reading whole-slide images."""
22

3+
import copy
34
import json
5+
import logging
46
import os
57
import pathlib
68
import random
79
import re
810
import shutil
911
from copy import deepcopy
12+
from pathlib import Path
1013
from time import time
1114

1215
# When no longer supporting Python <3.9 this should be collections.abc.Iterable
@@ -17,6 +20,7 @@
1720
import pytest
1821
import zarr
1922
from click.testing import CliRunner
23+
from packaging.version import Version
2024
from skimage.filters import threshold_otsu
2125
from skimage.metrics import peak_signal_noise_ratio, structural_similarity
2226
from skimage.morphology import binary_dilation, disk, remove_small_objects
@@ -2034,11 +2038,11 @@ def test_ngff_empty_datasets_mpp(tmp_path):
20342038
assert wsi.info.mpp is None
20352039

20362040

2037-
def test_nff_no_scale_transforms_mpp(tmp_path):
2041+
def test_ngff_no_scale_transforms_mpp(tmp_path):
20382042
"""Test that mpp is None if no scale transforms are present."""
20392043
sample = _fetch_remote_sample("ngff-1")
20402044
# Create a copy of the sample with no axes
2041-
sample_copy = tmp_path / "ngff-1"
2045+
sample_copy = tmp_path / "ngff-1.zarr"
20422046
shutil.copytree(sample, sample_copy)
20432047
with open(sample_copy / ".zattrs", "r") as fh:
20442048
zattrs = json.load(fh)
@@ -2051,6 +2055,172 @@ def test_nff_no_scale_transforms_mpp(tmp_path):
20512055
assert wsi.info.mpp is None
20522056

20532057

2058+
def test_ngff_missing_omero_version(tmp_path):
2059+
"""Test that the reader can handle missing omero version."""
2060+
sample = _fetch_remote_sample("ngff-1")
2061+
# Create a copy of the sample
2062+
sample_copy = tmp_path / "ngff-1.zarr"
2063+
shutil.copytree(sample, sample_copy)
2064+
with open(sample_copy / ".zattrs", "r") as fh:
2065+
zattrs = json.load(fh)
2066+
# Remove the omero version
2067+
del zattrs["omero"]["version"]
2068+
with open(sample_copy / ".zattrs", "w") as fh:
2069+
json.dump(zattrs, fh, indent=2)
2070+
wsireader.WSIReader.open(sample_copy)
2071+
2072+
2073+
def test_ngff_missing_multiscales_returns_false(tmp_path):
2074+
"""Test that missing multiscales key returns False for is_ngff."""
2075+
sample = _fetch_remote_sample("ngff-1")
2076+
# Create a copy of the sample
2077+
sample_copy = tmp_path / "ngff-1.zarr"
2078+
shutil.copytree(sample, sample_copy)
2079+
with open(sample_copy / ".zattrs", "r") as fh:
2080+
zattrs = json.load(fh)
2081+
# Remove the multiscales key
2082+
del zattrs["multiscales"]
2083+
with open(sample_copy / ".zattrs", "w") as fh:
2084+
json.dump(zattrs, fh, indent=2)
2085+
assert not wsireader.is_ngff(sample_copy)
2086+
2087+
2088+
def test_ngff_wrong_format_metadata(tmp_path, caplog):
2089+
"""Test that is_ngff is False and logs a warning if metadata is wrong."""
2090+
sample = _fetch_remote_sample("ngff-1")
2091+
# Create a copy of the sample
2092+
sample_copy = tmp_path / "ngff-1.zarr"
2093+
shutil.copytree(sample, sample_copy)
2094+
with open(sample_copy / ".zattrs", "r") as fh:
2095+
zattrs = json.load(fh)
2096+
# Change the format to something else
2097+
zattrs["multiscales"] = "foo"
2098+
with open(sample_copy / ".zattrs", "w") as fh:
2099+
json.dump(zattrs, fh, indent=2)
2100+
with caplog.at_level(logging.WARNING):
2101+
assert not wsireader.is_ngff(sample_copy)
2102+
assert "must be present and of the correct type" in caplog.text
2103+
2104+
2105+
def test_ngff_omero_below_min_version(tmp_path):
2106+
"""Test for FileNotSupported when omero version is below minimum."""
2107+
sample = _fetch_remote_sample("ngff-1")
2108+
# Create a copy of the sample
2109+
sample_copy = tmp_path / "ngff-1.zarr"
2110+
shutil.copytree(sample, sample_copy)
2111+
with open(sample_copy / ".zattrs", "r") as fh:
2112+
zattrs = json.load(fh)
2113+
# Change the format to something else
2114+
zattrs["omero"]["version"] = "0.0"
2115+
with open(sample_copy / ".zattrs", "w") as fh:
2116+
json.dump(zattrs, fh, indent=2)
2117+
with pytest.raises(FileNotSupported):
2118+
wsireader.WSIReader.open(sample_copy)
2119+
2120+
2121+
def test_ngff_omero_above_max_version(tmp_path):
2122+
"""Test for FileNotSupported when omero version is above maximum."""
2123+
sample = _fetch_remote_sample("ngff-1")
2124+
# Create a copy of the sample
2125+
sample_copy = tmp_path / "ngff-1.zarr"
2126+
shutil.copytree(sample, sample_copy)
2127+
with open(sample_copy / ".zattrs", "r") as fh:
2128+
zattrs = json.load(fh)
2129+
# Change the format to something else
2130+
zattrs["omero"]["version"] = "10.0"
2131+
with open(sample_copy / ".zattrs", "w") as fh:
2132+
json.dump(zattrs, fh, indent=2)
2133+
with pytest.raises(FileNotSupported):
2134+
wsireader.WSIReader.open(sample_copy)
2135+
2136+
2137+
def test_ngff_multiscales_below_min_version(tmp_path):
2138+
"""Test for FileNotSupported when multiscales version is below minimum."""
2139+
sample = _fetch_remote_sample("ngff-1")
2140+
# Create a copy of the sample
2141+
sample_copy = tmp_path / "ngff-1.zarr"
2142+
shutil.copytree(sample, sample_copy)
2143+
with open(sample_copy / ".zattrs", "r") as fh:
2144+
zattrs = json.load(fh)
2145+
# Change the format to something else
2146+
zattrs["multiscales"][0]["version"] = "0.0"
2147+
with open(sample_copy / ".zattrs", "w") as fh:
2148+
json.dump(zattrs, fh, indent=2)
2149+
with pytest.raises(FileNotSupported):
2150+
wsireader.WSIReader.open(sample_copy)
2151+
2152+
2153+
def test_ngff_multiscales_above_max_version(tmp_path):
2154+
"""Test for FileNotSupported when multiscales version is above maximum."""
2155+
sample = _fetch_remote_sample("ngff-1")
2156+
# Create a copy of the sample
2157+
sample_copy = tmp_path / "ngff-1.zarr"
2158+
shutil.copytree(sample, sample_copy)
2159+
with open(sample_copy / ".zattrs", "r") as fh:
2160+
zattrs = json.load(fh)
2161+
# Change the format to something else
2162+
zattrs["multiscales"][0]["version"] = "10.0"
2163+
with open(sample_copy / ".zattrs", "w") as fh:
2164+
json.dump(zattrs, fh, indent=2)
2165+
with pytest.raises(FileNotSupported):
2166+
wsireader.WSIReader.open(sample_copy)
2167+
2168+
2169+
def test_ngff_non_numeric_version(tmp_path, monkeypatch):
2170+
"""Test that the reader can handle non-numeric omero versions."""
2171+
2172+
# Patch the is_ngff function to change the min/max version
2173+
if_ngff = wsireader.is_ngff # noqa: F841
2174+
min_version = Version("0.4")
2175+
max_version = Version("0.5")
2176+
2177+
def patched_is_ngff(
2178+
path: Path,
2179+
min_version: Version = min_version,
2180+
max_version: Version = max_version,
2181+
) -> bool:
2182+
"""Patched is_ngff function with new min/max version."""
2183+
return is_ngff(path, min_version, max_version)
2184+
2185+
monkeypatch.setattr(wsireader, "is_ngff", patched_is_ngff)
2186+
2187+
sample = _fetch_remote_sample("ngff-1")
2188+
# Create a copy of the sample
2189+
sample_copy = tmp_path / "ngff-1.zarr"
2190+
shutil.copytree(sample, sample_copy)
2191+
with open(sample_copy / ".zattrs", "r") as fh:
2192+
zattrs = json.load(fh)
2193+
# Set the omero version to a non-numeric string
2194+
zattrs["omero"]["version"] = "0.5-dev"
2195+
with open(sample_copy / ".zattrs", "w") as fh:
2196+
json.dump(zattrs, fh, indent=2)
2197+
wsireader.WSIReader.open(sample_copy)
2198+
2199+
2200+
def test_ngff_inconsistent_multiscales_versions(tmp_path, caplog):
2201+
"""Test that the reader logs a warning inconsistent multiscales versions."""
2202+
sample = _fetch_remote_sample("ngff-1")
2203+
# Create a copy of the sample
2204+
sample_copy = tmp_path / "ngff-1.zarr"
2205+
shutil.copytree(sample, sample_copy)
2206+
with open(sample_copy / ".zattrs", "r") as fh:
2207+
zattrs = json.load(fh)
2208+
# Set the versions to be inconsistent
2209+
multiscales = zattrs["multiscales"]
2210+
# Needs at least 2 multiscales to be inconsistent
2211+
if len(multiscales) < 2:
2212+
multiscales.append(copy.deepcopy(multiscales[0]))
2213+
for i, _ in enumerate(multiscales):
2214+
multiscales[i]["version"] = f"0.{i}-dev"
2215+
zattrs["omero"]["multiscales"] = multiscales
2216+
with open(sample_copy / ".zattrs", "w") as fh:
2217+
json.dump(zattrs, fh, indent=2)
2218+
# Capture logger output to check for warning
2219+
with caplog.at_level(logging.WARNING), pytest.raises(FileNotSupported):
2220+
wsireader.WSIReader.open(sample_copy)
2221+
assert "multiple versions" in caplog.text
2222+
2223+
20542224
class TestReader:
20552225
scenarios = [
20562226
(

tiatoolbox/wsicore/wsireader.py

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import tifffile
2020
import zarr
2121
from defusedxml import ElementTree
22+
from packaging.version import Version
2223
from PIL import Image
2324

2425
from tiatoolbox import logger, utils
@@ -36,6 +37,8 @@
3637
Bounds = Tuple[Number, Number, Number, Number]
3738
IntBounds = Tuple[int, int, int, int]
3839
Resolution = Union[Number, Tuple[Number, Number], np.ndarray]
40+
MIN_NGFF_VERSION = Version("0.4")
41+
MAX_NGFF_VERSION = Version("0.4")
3942

4043

4144
def is_dicom(path: pathlib.Path) -> bool:
@@ -97,7 +100,11 @@ def is_zarr(path: pathlib.Path) -> bool:
97100
return False
98101

99102

100-
def is_ngff(path: pathlib.Path, min_version: Tuple[int, ...] = (0, 4)) -> bool:
103+
def is_ngff(
104+
path: pathlib.Path,
105+
min_version: Version = MIN_NGFF_VERSION,
106+
max_version: Version = MAX_NGFF_VERSION,
107+
) -> bool:
101108
"""Check if the input is a NGFF file.
102109
103110
Args:
@@ -129,18 +136,61 @@ def is_ngff(path: pathlib.Path, min_version: Tuple[int, ...] = (0, 4)) -> bool:
129136
all(isinstance(m, dict) for m in multiscales),
130137
]
131138
):
139+
logger.warning(
140+
"The NGFF file is not valid. "
141+
"The multiscales, _ARRAY_DIMENSIONS and omero attributes "
142+
"must be present and of the correct type."
143+
)
132144
return False
133145
except KeyError:
134146
return False
135-
multiscales_versions = tuple(
136-
tuple(int(part) for part in scale.get("version", "").split("."))
137-
for scale in multiscales
138-
)
139-
omero_version = tuple(int(part) for part in omero.get("version", "").split("."))
147+
multiscales_versions = {
148+
Version(scale["version"]) for scale in multiscales if "version" in scale
149+
}
150+
omero_version: Optional[str] = omero.get("version")
151+
if omero_version:
152+
omero_version: Version = Version(omero_version)
153+
if omero_version < min_version:
154+
logger.warning(
155+
"The minimum supported version of the NGFF file is %s. "
156+
"But the versions of the multiscales in the file are %s.",
157+
min_version,
158+
multiscales_versions,
159+
)
160+
return False
161+
if omero_version > max_version:
162+
logger.warning(
163+
"The maximum supported version of the NGFF file is %s. "
164+
"But the versions of the multiscales in the file are %s.",
165+
max_version,
166+
multiscales_versions,
167+
)
168+
return False
169+
170+
if len(multiscales_versions) > 1:
171+
logger.warning(
172+
"Found multiple versions for NGFF multiscales: %s",
173+
multiscales_versions,
174+
)
175+
140176
if any(version < min_version for version in multiscales_versions):
177+
logger.warning(
178+
"The minimum supported version of the NGFF file is %s. "
179+
"But the versions of the multiscales in the file are %s.",
180+
min_version,
181+
multiscales_versions,
182+
)
141183
return False
142-
if omero_version < min_version:
184+
185+
if any(version > max_version for version in multiscales_versions):
186+
logger.warning(
187+
"The maximum supported version of the NGFF file is %s. "
188+
"But the versions of the multiscales in the file are %s.",
189+
max_version,
190+
multiscales_versions,
191+
)
143192
return False
193+
144194
return is_zarr(path)
145195

146196

whitelist.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Nx4
4141
NxM
4242
NxN
4343
OME
44+
Omero
4445
Omnyx
4546
OpenCV
4647
Otsu's
@@ -128,8 +129,10 @@ macports
128129
memray
129130
mpp
130131
mse
132+
multiscales
131133
natively
132134
ndarray
135+
ngff
133136
nn
134137
noinspection
135138
normalizer

0 commit comments

Comments
 (0)