NKI-AI · jonasteuwen · Mar 13, 2024 · Mar 13, 2024 · Mar 15, 2024 · Mar 15, 2024
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -42,7 +42,7 @@ jobs:
  python -m pip install ninja Cython pybind11 numpy meson
  - name: Install additional dependencies
  run: |
- python -m pip install pylint pyhaloxml darwin-py ninja
+ python -m pip install pylint pyhaloxml darwin-py ninja asyncio aiohttp
  - name: Install package
  run: |
  python -m pip install pylint

diff --git a/.mypy.ini b/.mypy.ini
@@ -31,3 +31,6 @@ ignore_missing_imports = True
 
 [mypy-darwin.*]
 ignore_missing_imports = True
+
+[mypy-aiohttp.*]
+ignore_missing_imports = True
diff --git a/README.md b/README.md
@@ -11,8 +11,8 @@ Whole Slide Images.
 
 
 ## Features
-- Read whole-slide images at any arbitrary resolution by seamlessly interpolating between the pyramidal levels
-- Supports multiple backends, including [OpenSlide](https://openslide.org/) and [VIPS](https://libvips.github.io/libvips/), with the possibility to add custom backends
+- Read whole-slide images at any arbitrary resolution by seamlessly interpolating between the pyramidal levels.
+- Supports multiple backends, including [OpenSlide](https://openslide.org/), [VIPS](https://libvips.github.io/libvips/) and remote images in [SlideScore](https://slidescore.com), with the possibility to add custom backends.
 - Dataset classes to handle whole-slide images in a tile-by-tile manner compatible with pytorch
 - Annotation classes which can load GeoJSON, [V7 Darwin](https://www.v7labs.com/), [HALO](https://indicalab.com/halo/) and [ASAP](https://computationalpathologygroup.github.io/ASAP/) formats and read parts of it (e.g. a tile)
 - Transforms to handle annotations per tile, resulting, together with the dataset classes a dataset consisting of tiles of whole-slide images with corresponding masks as targets, readily useable with a pytorch dataloader

diff --git a/dlup/_image.py b/dlup/_image.py
@@ -28,6 +28,7 @@
 from dlup._region import BoundaryMode, RegionView
 from dlup._types import GenericFloatArray, GenericIntArray, GenericNumber, GenericNumberArray, PathLike
 from dlup.backends.common import AbstractSlideBackend
+from dlup.backends.remote_backends import RemoteSlideBackend
 from dlup.utils.backends import ImageBackend
 from dlup.utils.image import check_if_mpp_is_valid
 
@@ -275,21 +276,23 @@ def from_file_path(
  backend: ImageBackend | Type[AbstractSlideBackend] | str = ImageBackend.OPENSLIDE,
  **kwargs: Any,
  ) -> _TSlideImage:
- wsi_file_path = pathlib.Path(wsi_file_path).resolve()
- if not wsi_file_path.exists():
- raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(wsi_file_path))
-
  if isinstance(backend, str):
  backend = ImageBackend[backend]
 
+ # We don't convert to Path for RemoteSlideBackend
+ if not issubclass(backend.value if isinstance(backend, ImageBackend) else backend, RemoteSlideBackend):
+ wsi_file_path = pathlib.Path(wsi_file_path)
+ wsi_file_path = wsi_file_path.resolve()
+ if not wsi_file_path.exists():
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(wsi_file_path))
+
  # Adjust how the backend is used depending on its type
  if isinstance(backend, ImageBackend):
  backend_callable = backend.value # Get the callable from Enum
  elif issubclass(backend, AbstractSlideBackend):
  backend_callable = backend # Directly use the class if it's a subclass of AbstractSlideBackend
  else:
  raise TypeError("backend must be either an ImageBackend enum or a subclass of AbstractSlideBackend")
-
  try:
  wsi = backend_callable(wsi_file_path) # Instantiate the backend with the path
  except UnsupportedSlideError as exc:

diff --git a/dlup/backends/deepzoom_backend.py b/dlup/backends/deepzoom_backend.py
@@ -0,0 +1,289 @@
+from __future__ import annotations
+
+import functools
+import io
+import itertools
+import math
+from pathlib import Path
+from typing import Any, Union
+
+# TODO: Fix cmyk case in read_region so we can remove PIL and numpy
+# import PIL
+import numpy as np
+import pyvips
+
+from dlup._types import PathLike
+from dlup.backends.common import AbstractSlideBackend
+from dlup.utils.backends import dict_to_snake_case, parse_xml_to_dict
+
+METADATA_CACHE = 128
+RELEVANT_VIPS_PROPERTIES = {
+ "openslide.vendor": str,
+ "openslide.mpp-x": float,
+ "openslide.mpp-y": float,
+ "openslide.objective-power": int,
+ "openslide.bounds-height": int,
+ "openslide.bounds-width": int,
+ "openslide.bounds-x": int,
+ "openslide.bounds-y": int,
+ "openslide.quickhash-1": str,
+ "vips-loader": str,
+ "bands": int,
+}
+
+TileResponseTypes = Union[str, io.BytesIO]
+
+
+def open_slide(filename: PathLike) -> "DeepZoomSlide":
+ """
+ Read slide with DeepZoomSlide backend. The input file should be a <slide_name>.dzi file with the deep zoom tiles
+ in a folder <slide_name>_files
+
+ Parameters
+ ----------
+ filename : PathLike
+ DZI file for slide.
+ """
+ return DeepZoomSlide(filename)
+
+
+class DeepZoomSlide(AbstractSlideBackend):
+ _properties: dict[str, Any]
+ _dz_properties: dict[str, Any]
+
+ def __init__(self, filename: PathLike):
+ super().__init__(filename)
+ if self.properties.get("mpp_x") is not None and self.properties.get("mpp_y") is not None:
+ self._spacings = [(float(self.properties["mpp_x"]), float(self.properties["mpp_y"]))]
+
+ self._dz_level_count = math.ceil(
+ math.log2(
+ max(
+ self.dz_properties["image"]["size"]["width"],
+ self.dz_properties["image"]["size"]["height"],
+ )
+ )
+ )
+ self._tile_size = (self.dz_properties["image"]["tile_size"],) * 2
+ self._overlap = self.dz_properties["image"]["overlap"]
+
+ self._level_count = self._dz_level_count + 1
+ self._downsamples = [2**level for level in range(self._level_count)]
+ self._shapes = [
+ (
+ math.ceil(self.dz_properties["image"]["size"]["width"] / downsample),
+ math.ceil(self.dz_properties["image"]["size"]["height"] / downsample),
+ )
+ for downsample in self._downsamples
+ ]
+
+ self._num_cols_rows = [
+ (
+ width // self._tile_size[0] + int((width % self._tile_size[0]) > 0),
+ height // self._tile_size[1] + int((height % self._tile_size[1]) > 0),
+ )
+ for width, height in self._shapes
+ ]
+
+ @property
+ def properties(self) -> dict[str, Any]:
+ """Properties of slide"""
+ if not hasattr(self, "_properties"):
+ self._properties = self._fetch_properties()
+ return self._properties
+
+ @functools.lru_cache(maxsize=METADATA_CACHE)
+ def _fetch_properties(self) -> dict[str, Any]:
+ """Fetch properties of the slide. The `vips-properties.xml` file will be generated by vips when extracting
+ the pyramid. Correctness not tested for vips-loader other than `openslideload`
+ """
+ vips_properties_file = Path(self.tile_files) / "vips-properties.xml"
+ if not vips_properties_file.exists():
+ return {}
+ # Don't convert to snake case for now to keep original vips-property names
+ vips_properties = parse_xml_to_dict(vips_properties_file, _to_snake_case=False)["image"]["properties"]
+ relevant_properties = {
+ relevant_key.split("openslide.")[-1]: cast_fn(vips_properties[relevant_key])
+ for relevant_key, cast_fn in RELEVANT_VIPS_PROPERTIES.items()
+ if relevant_key in vips_properties
+ }
+ if relevant_properties.get("vips-loader", "") != "openslideload":
+ raise NotImplementedError(
+ f"Properties not implemented for vips-loader {relevant_properties.get('vips-loader')}."
+ )
+ # Convert to snake case naming convention in the end
+ return dict_to_snake_case(relevant_properties)
+
+ @property
+ def dz_properties(self) -> dict[str, Any]:
+ """DeepZoom properties of slide"""
+ if not hasattr(self, "_dz_properties"):
+ self._dz_properties = self._fetch_dz_properties()
+ return self._dz_properties
+
+ @functools.lru_cache(maxsize=METADATA_CACHE)
+ def _fetch_dz_properties(self) -> dict[str, Any]:
+ """Fetch DeepZoom properties from .dzi file. Cast every property, except for `Format`, to integers."""
+ return parse_xml_to_dict(self._filename)
+
+ @property
+ def magnification(self) -> float | None:
+ """Returns the objective power at which the WSI was sampled."""
+ value = self.properties.get("objective_power")
+ if value is not None:
+ return int(value)
+ return value
+
+ @property
+ def vendor(self) -> str | None:
+ """Returns the scanner vendor."""
+ return self.properties.get("vendor")
+
+ @property
+ def mode(self) -> str:
+ """Returns the mode of the deep zoom tiles.
+ NOTE: When generating deepzoom pyramid with VIPS, this could be CYMK and differ from the original slide
+ """
+ if not hasattr(self, "_mode"):
+ self._mode = self._fetch_mode()
+ return self._mode
+
+ @functools.lru_cache(maxsize=METADATA_CACHE)
+ def _fetch_mode(self) -> str:
+ """Returns the mode of the deepzoom tile at level 0. This is an image of size 1x1 that should exist."""
+ _tile_path = self.retrieve_deepzoom_tiles(0, [(0, 0)])[0]
+ if isinstance(_tile_path, (Path, str)):
+ _region: pyvips.Image = pyvips.Image.new_from_file(_tile_path)
+ elif isinstance(_tile_path, io.BytesIO):
+ _region = pyvips.Image.new_from_buffer(_tile_path.getvalue(), "")
+ else:
+ raise TypeError(f"Cannot open deepzoom tile of type {type(_tile_path)} using pyvips.")
+ mode: str = _region.interpretation
+ return mode
+
+ @property
+ def slide_bounds(self) -> tuple[tuple[int, int], tuple[int, int]]:
+ """Returns the bounds of the slide. These can be smaller than the image itself."""
+ if self.properties.get("bounds_x") is None or self.properties.get("bounds_y") is None:
+ return (0, 0), self.dimensions
+
+ # If MRXS file is generate with --angle d90, x and width should be switched with y and height respectively
+ bounds_offset = (self.properties["bounds_x"], self.properties["bounds_y"])
+ bounds_size = (self.properties["bounds_width"], self.properties["bounds_height"])
+ return bounds_offset, bounds_size
+
+ @property
+ def tile_files(self) -> PathLike:
+ """Returns path where deep zoom tiles are stored. Default is folder named `<file_name>_files` at the same
+ location where .dzi file is stored."""
+ return Path(self._filename).parent / f"{Path(self._filename).stem}_files"
+
+ def retrieve_deepzoom_tiles(self, level: int, indices: list[tuple[int, int]]) -> list[TileResponseTypes]:
+ """Retrieve paths or ByteIO objects for tile indices of deepzoom level. These tiles will be opened with Pillow
+ and stitched together in `read_region`
+
+ Parameters
+ ----------
+ level : int
+ Deep zoom level for tiles
+ indices : list[tuple[int, int]]
+ List of (row, col) tuples for column and row at specified deepzoom level
+
+ Returns
+ -------
+ list[Path | BytesIO]
+ List of file paths or ByteIO objects for unprocessed DeepZoom tiles.
+ """
+ tile_files_root = self.tile_files
+ file_format = self.dz_properties["image"]["format"]
+ return [f"{tile_files_root}/{level}/{col}_{row}.{file_format}" for row, col in indices]
+
+ def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int, int]) -> pyvips.Image:
+ """Read region by stitching DeepZoom tiles together.
+
+ Parameters
+ ----------
+ coordinates : tuple
+ Coordinates of the region in level 0.
+ level : int
+ Level of the image pyramid.
+ size : tuple
+ Size of the region to be extracted.
+
+ Returns
+ -------
+ PIL.Image
+ The requested region.
+ """
+ level_downsample = self._downsamples[level]
+ level_end_col, level_end_row = self._num_cols_rows[level]
+ tile_w, tile_h = self._tile_size
+
+ x, y = (coordinates[0] // level_downsample, coordinates[1] // level_downsample)
+ w, h = size
+ _overlap = self._overlap
+
+ # Calculate the range of rows and columns for tiles covering the specified region
+ start_row = y // tile_h
+ end_row = min(math.ceil((y + h) / tile_h), level_end_row)
+ start_col = x // tile_w
+ end_col = min(math.ceil((x + w) / tile_w), level_end_col)
+
+ indices = list(itertools.product(range(start_row, end_row), range(start_col, end_col)))
+ level_dz = self._level_count - level - 1
+ tile_files = self.retrieve_deepzoom_tiles(level_dz, indices)
+
+ # The number of bands can be in the vips-properties.xml, but otherwise we can interpret from image mode
+ num_bands = self.properties.get("bands", 3 if self.mode != "cmyk" else 4)
+ # We create an image from an array with zeros so we can give the correct interpretation. Arrayjoin would be
+ # faster, but does not work for unregular grids of images.
+ _region = pyvips.Image.new_from_array(np.zeros((h, w, num_bands), dtype=np.uint8), interpretation=self.mode)
+ for (row, col), tile_file in zip(indices, tile_files):
+ _region_tile: pyvips.Image = (
+ pyvips.Image.new_from_buffer(tile_file.getvalue(), "")
+ if isinstance(tile_file, io.BytesIO)
+ else pyvips.Image.new_from_file(tile_file)
+ )
+
+ start_x = col * tile_w - x
+ start_y = row * tile_h - y
+
+ img_start_x = max(0, start_x)
+ img_end_x = min(w, start_x + tile_w)
+ img_start_y = max(0, start_y)
+ img_end_y = min(h, start_y + tile_h)
+
+ crop_start_x = img_start_x - start_x
+ crop_end_x = img_end_x - start_x
+ crop_start_y = img_start_y - start_y
+ crop_end_y = img_end_y - start_y
+
+ # All but edge tiles have overlap pixels outside of tile
+ if col > 0:
+ crop_start_x += _overlap
+ crop_end_x += _overlap
+ if col == level_end_col - 1:
+ crop_end_x -= _overlap
+
+ if row > 0:
+ crop_start_y += _overlap
+ crop_end_y += _overlap
+ if row == level_end_row - 1:
+ crop_end_y -= _overlap
+ _cropped_region_tile = _region_tile.crop(
+ crop_start_x, crop_start_y, crop_end_x - crop_start_x, crop_end_y - crop_start_y
+ )
+ _region = _region.insert(_cropped_region_tile, img_start_x, img_start_y)
+
+ # # Should convert from cmyk to rgb or should a user do this afterwards theirself?
+ # if self.mode == "cmyk":
+ # # FIXME: This looks off when using pyvips (colourspace and icc_transform) but not when using PIL
+ # _region = _region.colourspace("srgb", source_space="cmyk") # _region = _region.icc_transform("srgb")
+ # # _region = pyvips.Image.new_from_array(
+ # # np.asarray(PIL.Image.fromarray(_region.numpy(), mode="CMYK").convert("RGB")), interpretation="srgb"
+ # # ) # <- This looks good, but seems like a roundabout way
+ return _region
+
+ def close(self) -> None:
+ """Close the underlying slide"""
+ return
diff --git a/dlup/backends/remote_backends.py b/dlup/backends/remote_backends.py
@@ -0,0 +1,26 @@
+from abc import abstractmethod
+from typing import Any
+
+from dlup._types import PathLike
+from dlup.backends.common import AbstractSlideBackend
+
+
+class RemoteSlideBackend(AbstractSlideBackend):
+ def __init__(self, filename: PathLike) -> None:
+ self._set_metadata()
+ super().__init__(filename)
+
+ @property
+ def properties(self) -> dict[str, Any]:
+ if not hasattr(self, "_properties"):
+ self._properties = self._fetch_properties()
+ return self._properties
+
+ @abstractmethod
+ def _fetch_properties(self) -> dict[str, Any]:
+ pass
+
+ @abstractmethod
+ def _set_metadata(self) -> None:
+ """Metadata needed for remote access"""
+ pass