Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/remote backends #221

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
python -m pip install ninja Cython pybind11 numpy meson
- name: Install additional dependencies
run: |
python -m pip install pylint pyhaloxml darwin-py ninja
python -m pip install pylint pyhaloxml darwin-py ninja asyncio aiohttp
- name: Install package
run: |
python -m pip install pylint
Expand Down
3 changes: 3 additions & 0 deletions .mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,6 @@ ignore_missing_imports = True

[mypy-darwin.*]
ignore_missing_imports = True

[mypy-aiohttp.*]
ignore_missing_imports = True
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ Whole Slide Images.


## Features
- Read whole-slide images at any arbitrary resolution by seamlessly interpolating between the pyramidal levels
- Supports multiple backends, including [OpenSlide](https://openslide.org/) and [VIPS](https://libvips.github.io/libvips/), with the possibility to add custom backends
- Read whole-slide images at any arbitrary resolution by seamlessly interpolating between the pyramidal levels.
- Supports multiple backends, including [OpenSlide](https://openslide.org/), [VIPS](https://libvips.github.io/libvips/) and remote images in [SlideScore](https://slidescore.com), with the possibility to add custom backends.
- Dataset classes to handle whole-slide images in a tile-by-tile manner compatible with pytorch
- Annotation classes which can load GeoJSON, [V7 Darwin](https://www.v7labs.com/), [HALO](https://indicalab.com/halo/) and [ASAP](https://computationalpathologygroup.github.io/ASAP/) formats and read parts of it (e.g. a tile)
- Transforms to handle annotations per tile, resulting, together with the dataset classes a dataset consisting of tiles of whole-slide images with corresponding masks as targets, readily useable with a pytorch dataloader
Expand Down
13 changes: 8 additions & 5 deletions dlup/_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from dlup._region import BoundaryMode, RegionView
from dlup._types import GenericFloatArray, GenericIntArray, GenericNumber, GenericNumberArray, PathLike
from dlup.backends.common import AbstractSlideBackend
from dlup.backends.remote_backends import RemoteSlideBackend
from dlup.utils.backends import ImageBackend
from dlup.utils.image import check_if_mpp_is_valid

Expand Down Expand Up @@ -275,21 +276,23 @@ def from_file_path(
backend: ImageBackend | Type[AbstractSlideBackend] | str = ImageBackend.OPENSLIDE,
**kwargs: Any,
) -> _TSlideImage:
wsi_file_path = pathlib.Path(wsi_file_path).resolve()
if not wsi_file_path.exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(wsi_file_path))

if isinstance(backend, str):
backend = ImageBackend[backend]

# We don't convert to Path for RemoteSlideBackend
if not issubclass(backend.value if isinstance(backend, ImageBackend) else backend, RemoteSlideBackend):
wsi_file_path = pathlib.Path(wsi_file_path)
wsi_file_path = wsi_file_path.resolve()
if not wsi_file_path.exists():
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), str(wsi_file_path))

# Adjust how the backend is used depending on its type
if isinstance(backend, ImageBackend):
backend_callable = backend.value # Get the callable from Enum
elif issubclass(backend, AbstractSlideBackend):
backend_callable = backend # Directly use the class if it's a subclass of AbstractSlideBackend
else:
raise TypeError("backend must be either an ImageBackend enum or a subclass of AbstractSlideBackend")

try:
wsi = backend_callable(wsi_file_path) # Instantiate the backend with the path
except UnsupportedSlideError as exc:
Expand Down
289 changes: 289 additions & 0 deletions dlup/backends/deepzoom_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
from __future__ import annotations

import functools
import io
import itertools
import math
from pathlib import Path
from typing import Any, Union

# TODO: Fix cmyk case in read_region so we can remove PIL and numpy
# import PIL
import numpy as np
import pyvips

from dlup._types import PathLike
from dlup.backends.common import AbstractSlideBackend
from dlup.utils.backends import dict_to_snake_case, parse_xml_to_dict

METADATA_CACHE = 128
RELEVANT_VIPS_PROPERTIES = {
"openslide.vendor": str,
"openslide.mpp-x": float,
"openslide.mpp-y": float,
"openslide.objective-power": int,
"openslide.bounds-height": int,
"openslide.bounds-width": int,
"openslide.bounds-x": int,
"openslide.bounds-y": int,
"openslide.quickhash-1": str,
"vips-loader": str,
"bands": int,
}

TileResponseTypes = Union[str, io.BytesIO]


def open_slide(filename: PathLike) -> "DeepZoomSlide":
"""
Read slide with DeepZoomSlide backend. The input file should be a <slide_name>.dzi file with the deep zoom tiles
in a folder <slide_name>_files

Parameters
----------
filename : PathLike
DZI file for slide.
"""
return DeepZoomSlide(filename)


class DeepZoomSlide(AbstractSlideBackend):
_properties: dict[str, Any]
_dz_properties: dict[str, Any]

def __init__(self, filename: PathLike):
super().__init__(filename)
if self.properties.get("mpp_x") is not None and self.properties.get("mpp_y") is not None:
self._spacings = [(float(self.properties["mpp_x"]), float(self.properties["mpp_y"]))]

self._dz_level_count = math.ceil(
math.log2(
max(
self.dz_properties["image"]["size"]["width"],
self.dz_properties["image"]["size"]["height"],
)
)
)
self._tile_size = (self.dz_properties["image"]["tile_size"],) * 2
self._overlap = self.dz_properties["image"]["overlap"]

self._level_count = self._dz_level_count + 1
self._downsamples = [2**level for level in range(self._level_count)]
self._shapes = [
(
math.ceil(self.dz_properties["image"]["size"]["width"] / downsample),
math.ceil(self.dz_properties["image"]["size"]["height"] / downsample),
)
for downsample in self._downsamples
]

self._num_cols_rows = [
(
width // self._tile_size[0] + int((width % self._tile_size[0]) > 0),
height // self._tile_size[1] + int((height % self._tile_size[1]) > 0),
)
for width, height in self._shapes
]

@property
def properties(self) -> dict[str, Any]:
"""Properties of slide"""
if not hasattr(self, "_properties"):
self._properties = self._fetch_properties()
return self._properties

@functools.lru_cache(maxsize=METADATA_CACHE)
def _fetch_properties(self) -> dict[str, Any]:
"""Fetch properties of the slide. The `vips-properties.xml` file will be generated by vips when extracting
the pyramid. Correctness not tested for vips-loader other than `openslideload`
"""
vips_properties_file = Path(self.tile_files) / "vips-properties.xml"
if not vips_properties_file.exists():
return {}
# Don't convert to snake case for now to keep original vips-property names
vips_properties = parse_xml_to_dict(vips_properties_file, _to_snake_case=False)["image"]["properties"]
relevant_properties = {
relevant_key.split("openslide.")[-1]: cast_fn(vips_properties[relevant_key])
for relevant_key, cast_fn in RELEVANT_VIPS_PROPERTIES.items()
if relevant_key in vips_properties
}
if relevant_properties.get("vips-loader", "") != "openslideload":
raise NotImplementedError(
f"Properties not implemented for vips-loader {relevant_properties.get('vips-loader')}."
)
# Convert to snake case naming convention in the end
return dict_to_snake_case(relevant_properties)

@property
def dz_properties(self) -> dict[str, Any]:
"""DeepZoom properties of slide"""
if not hasattr(self, "_dz_properties"):
self._dz_properties = self._fetch_dz_properties()
return self._dz_properties

@functools.lru_cache(maxsize=METADATA_CACHE)
def _fetch_dz_properties(self) -> dict[str, Any]:
"""Fetch DeepZoom properties from .dzi file. Cast every property, except for `Format`, to integers."""
return parse_xml_to_dict(self._filename)

@property
def magnification(self) -> float | None:
"""Returns the objective power at which the WSI was sampled."""
value = self.properties.get("objective_power")
if value is not None:
return int(value)
return value

@property
def vendor(self) -> str | None:
"""Returns the scanner vendor."""
return self.properties.get("vendor")

@property
def mode(self) -> str:
"""Returns the mode of the deep zoom tiles.
NOTE: When generating deepzoom pyramid with VIPS, this could be CYMK and differ from the original slide
"""
if not hasattr(self, "_mode"):
self._mode = self._fetch_mode()
return self._mode

@functools.lru_cache(maxsize=METADATA_CACHE)
def _fetch_mode(self) -> str:
"""Returns the mode of the deepzoom tile at level 0. This is an image of size 1x1 that should exist."""
_tile_path = self.retrieve_deepzoom_tiles(0, [(0, 0)])[0]
if isinstance(_tile_path, (Path, str)):
_region: pyvips.Image = pyvips.Image.new_from_file(_tile_path)
elif isinstance(_tile_path, io.BytesIO):
_region = pyvips.Image.new_from_buffer(_tile_path.getvalue(), "")
else:
raise TypeError(f"Cannot open deepzoom tile of type {type(_tile_path)} using pyvips.")
mode: str = _region.interpretation
return mode

@property
def slide_bounds(self) -> tuple[tuple[int, int], tuple[int, int]]:
"""Returns the bounds of the slide. These can be smaller than the image itself."""
if self.properties.get("bounds_x") is None or self.properties.get("bounds_y") is None:
return (0, 0), self.dimensions

# If MRXS file is generate with --angle d90, x and width should be switched with y and height respectively
bounds_offset = (self.properties["bounds_x"], self.properties["bounds_y"])
bounds_size = (self.properties["bounds_width"], self.properties["bounds_height"])
return bounds_offset, bounds_size

@property
def tile_files(self) -> PathLike:
"""Returns path where deep zoom tiles are stored. Default is folder named `<file_name>_files` at the same
location where .dzi file is stored."""
return Path(self._filename).parent / f"{Path(self._filename).stem}_files"

def retrieve_deepzoom_tiles(self, level: int, indices: list[tuple[int, int]]) -> list[TileResponseTypes]:
"""Retrieve paths or ByteIO objects for tile indices of deepzoom level. These tiles will be opened with Pillow
and stitched together in `read_region`

Parameters
----------
level : int
Deep zoom level for tiles
indices : list[tuple[int, int]]
List of (row, col) tuples for column and row at specified deepzoom level

Returns
-------
list[Path | BytesIO]
List of file paths or ByteIO objects for unprocessed DeepZoom tiles.
"""
tile_files_root = self.tile_files
file_format = self.dz_properties["image"]["format"]
return [f"{tile_files_root}/{level}/{col}_{row}.{file_format}" for row, col in indices]

def read_region(self, coordinates: tuple[Any, ...], level: int, size: tuple[int, int]) -> pyvips.Image:
"""Read region by stitching DeepZoom tiles together.

Parameters
----------
coordinates : tuple
Coordinates of the region in level 0.
level : int
Level of the image pyramid.
size : tuple
Size of the region to be extracted.

Returns
-------
PIL.Image
The requested region.
"""
level_downsample = self._downsamples[level]
level_end_col, level_end_row = self._num_cols_rows[level]
tile_w, tile_h = self._tile_size

x, y = (coordinates[0] // level_downsample, coordinates[1] // level_downsample)
w, h = size
_overlap = self._overlap

# Calculate the range of rows and columns for tiles covering the specified region
start_row = y // tile_h
end_row = min(math.ceil((y + h) / tile_h), level_end_row)
start_col = x // tile_w
end_col = min(math.ceil((x + w) / tile_w), level_end_col)

indices = list(itertools.product(range(start_row, end_row), range(start_col, end_col)))
level_dz = self._level_count - level - 1
tile_files = self.retrieve_deepzoom_tiles(level_dz, indices)

# The number of bands can be in the vips-properties.xml, but otherwise we can interpret from image mode
num_bands = self.properties.get("bands", 3 if self.mode != "cmyk" else 4)
# We create an image from an array with zeros so we can give the correct interpretation. Arrayjoin would be
# faster, but does not work for unregular grids of images.
_region = pyvips.Image.new_from_array(np.zeros((h, w, num_bands), dtype=np.uint8), interpretation=self.mode)
for (row, col), tile_file in zip(indices, tile_files):
_region_tile: pyvips.Image = (
pyvips.Image.new_from_buffer(tile_file.getvalue(), "")
if isinstance(tile_file, io.BytesIO)
else pyvips.Image.new_from_file(tile_file)
)

start_x = col * tile_w - x
start_y = row * tile_h - y

img_start_x = max(0, start_x)
img_end_x = min(w, start_x + tile_w)
img_start_y = max(0, start_y)
img_end_y = min(h, start_y + tile_h)

crop_start_x = img_start_x - start_x
crop_end_x = img_end_x - start_x
crop_start_y = img_start_y - start_y
crop_end_y = img_end_y - start_y

# All but edge tiles have overlap pixels outside of tile
if col > 0:
crop_start_x += _overlap
crop_end_x += _overlap
if col == level_end_col - 1:
crop_end_x -= _overlap

if row > 0:
crop_start_y += _overlap
crop_end_y += _overlap
if row == level_end_row - 1:
crop_end_y -= _overlap
_cropped_region_tile = _region_tile.crop(
crop_start_x, crop_start_y, crop_end_x - crop_start_x, crop_end_y - crop_start_y
)
_region = _region.insert(_cropped_region_tile, img_start_x, img_start_y)

# # Should convert from cmyk to rgb or should a user do this afterwards theirself?
# if self.mode == "cmyk":
# # FIXME: This looks off when using pyvips (colourspace and icc_transform) but not when using PIL
# _region = _region.colourspace("srgb", source_space="cmyk") # _region = _region.icc_transform("srgb")
# # _region = pyvips.Image.new_from_array(
# # np.asarray(PIL.Image.fromarray(_region.numpy(), mode="CMYK").convert("RGB")), interpretation="srgb"
# # ) # <- This looks good, but seems like a roundabout way
return _region

def close(self) -> None:
"""Close the underlying slide"""
return
26 changes: 26 additions & 0 deletions dlup/backends/remote_backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from abc import abstractmethod
from typing import Any

from dlup._types import PathLike
from dlup.backends.common import AbstractSlideBackend


class RemoteSlideBackend(AbstractSlideBackend):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what this is haha. Maybe some docs could help :)

def __init__(self, filename: PathLike) -> None:
self._set_metadata()
super().__init__(filename)

@property
def properties(self) -> dict[str, Any]:
if not hasattr(self, "_properties"):
self._properties = self._fetch_properties()
return self._properties

@abstractmethod
def _fetch_properties(self) -> dict[str, Any]:
pass

@abstractmethod
def _set_metadata(self) -> None:
"""Metadata needed for remote access"""
pass
Loading
Loading