diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py new file mode 100644 index 0000000..c7814f3 --- /dev/null +++ b/mesmerize_core/caiman_extensions/cache.py @@ -0,0 +1,188 @@ +from functools import wraps +from typing import Union, Optional + +import pandas as pd +import time +import numpy as np +import sys +from caiman.source_extraction.cnmf import CNMF +import re +from sys import getsizeof +import copy + + +def _check_arg_equality(args, cache_args): + if not type(args) == type(cache_args): + return False + if isinstance(cache_args, np.ndarray): + return np.array_equal(cache_args, args) + else: + return cache_args == args + + +def _check_args_equality(args, cache_args): + if len(args) != len(cache_args): + return False + equality = list() + if isinstance(args, tuple): + for arg, cache_arg in zip(args, cache_args): + equality.append(_check_arg_equality(arg, cache_arg)) + else: + for k in args.keys(): + equality.append(_check_arg_equality(args[k], cache_args[k])) + return all(equality) + + +def _return_wrapper(output, copy_bool): + if copy_bool == True: + return copy.deepcopy(output) + else: + return output + + +class Cache: + def __init__(self, cache_size: Optional[Union[int, str]] = None): + self.cache = pd.DataFrame( + data=None, + columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], + ) + self.set_maxsize(cache_size) + + def get_cache(self): + return self.cache + + def clear_cache(self): + while len(self.cache.index) != 0: + self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True) + + def set_maxsize(self, max_size: Union[int, str]): + if max_size is None: + self.storage_type = "RAM" + self.size = 1024**3 + elif isinstance(max_size, str): + self.storage_type = "RAM" + if max_size.endswith("G"): + self.size = int(max_size[:-1]) * 1024**3 + elif max_size.endswith("M"): + self.size = int(max_size[:-1]) * 1024**2 + else: + self.storage_type = "ITEMS" + self.size = max_size + + def _get_cache_size_bytes(self): + """Returns in bytes""" + cache_size = 0 + for i in range(len(self.cache.index)): + if isinstance(self.cache.iloc[i, 4], np.ndarray): + cache_size += self.cache.iloc[i, 4].data.nbytes + elif isinstance(self.cache.iloc[i, 4], (tuple, list)): + for lists in self.cache.iloc[i, 4]: + for array in lists: + cache_size += array.data.nbytes + elif isinstance(self.cache.iloc[i, 4], CNMF): + sizes = list() + for attr in self.cache.iloc[i, 4].estimates.__dict__.values(): + if isinstance(attr, np.ndarray): + sizes.append(attr.data.nbytes) + else: + sizes.append(getsizeof(attr)) + else: + cache_size += sys.getsizeof(self.cache.iloc[i, 4]) + + return cache_size + + def use_cache(self, func): + @wraps(func) + def _use_cache(instance, *args, **kwargs): + if "return_copy" in kwargs.keys(): + return_copy = kwargs["return_copy"] + else: + return_copy = True + + if self.size == 0: + self.clear_cache() + return _return_wrapper(func(instance, *args, **kwargs), return_copy) + + # if cache is empty, will always be a cache miss + if len(self.cache.index) == 0: + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + return _return_wrapper(return_val, copy_bool=return_copy) + + # checking to see if there is a cache hit + for i in range(len(self.cache.index)): + if ( + self.cache.iloc[i, 0] == instance._series["uuid"] + and self.cache.iloc[i, 1] == func.__name__ + and _check_args_equality(args, self.cache.iloc[i, 2]) + and _check_arg_equality(kwargs, self.cache.iloc[i, 3]) + ): + self.cache.iloc[i, 5] = time.time() + return_val = self.cache.iloc[i, 4] + return _return_wrapper(self.cache.iloc[i, 4], copy_bool=return_copy) + + # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry + # if memory type is 'ITEMS': drop the least recently used and then add new item + if self.storage_type == "ITEMS" and len(self.cache.index) >= self.size: + return_val = func(instance, *args, **kwargs) + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) + self.cache = self.cache.reset_index(drop=True) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + return _return_wrapper( + self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy + ) + # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again + elif self.storage_type == "RAM": + while self._get_cache_size_bytes() > self.size: + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) + self.cache = self.cache.reset_index(drop=True) + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache + else: + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + + return _return_wrapper(return_val, copy_bool=return_copy) + + return _use_cache diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index 0b1a1ff..e29caf6 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -10,6 +10,9 @@ from caiman.utils.visualization import get_contours as caiman_get_contours from .common import validate +from .cache import Cache + +cache = Cache() @pd.api.extensions.register_series_accessor("cnmf") @@ -21,6 +24,7 @@ class CNMFExtensions: def __init__(self, s: pd.Series): self._series = s + @validate("cnmf") def get_cnmf_memmap(self) -> np.ndarray: """ Get the CNMF memmap @@ -69,7 +73,8 @@ def get_output_path(self) -> Path: return self._series.paths.resolve(self._series["outputs"]["cnmf-hdf5-path"]) @validate("cnmf") - def get_output(self) -> CNMF: + @cache.use_cache + def get_output(self, return_copy=True) -> CNMF: """ Returns ------- @@ -82,8 +87,9 @@ def get_output(self) -> CNMF: # TODO: Make the ``ixs`` parameter for spatial stuff optional @validate("cnmf") + @cache.use_cache def get_spatial_masks( - self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01 + self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, return_copy=True ) -> np.ndarray: """ Get binary masks of the spatial components at the given `ixs` @@ -148,8 +154,9 @@ def _get_spatial_contours( return contours @validate("cnmf") + @cache.use_cache def get_spatial_contours( - self, ixs_components: Optional[np.ndarray] = None + self, ixs_components: Optional[np.ndarray] = None, return_copy=True ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Get the contour and center of mass for each spatial footprint @@ -181,8 +188,9 @@ def get_spatial_contours( return coordinates, coms @validate("cnmf") + @cache.use_cache def get_temporal_components( - self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False + self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, return_copy=True ) -> np.ndarray: """ Get the temporal components for this CNMF item @@ -250,8 +258,8 @@ def get_reconstructed_movie( if isinstance(ixs_frames, int): ixs_frames = (ixs_frames, ixs_frames + 1) - dn = cnmf_obj.estimates.A[:,idx_components].dot( - cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]] + dn = cnmf_obj.estimates.A[:, idx_components].dot( + cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]] ) if add_background: diff --git a/mesmerize_core/caiman_extensions/common.py b/mesmerize_core/caiman_extensions/common.py index 7f6e67c..3b3654c 100644 --- a/mesmerize_core/caiman_extensions/common.py +++ b/mesmerize_core/caiman_extensions/common.py @@ -270,10 +270,10 @@ def get_input_movie_path(self) -> Path: def get_input_movie(self) -> Union[np.ndarray, pims.FramesSequence]: extension = self.get_input_movie_path().suffixes[-1] - if extension in ['.tiff', '.tif', '.btf']: + if extension in [".tiff", ".tif", ".btf"]: return pims.open(str(self.get_input_movie_path())) - elif extension in ['.mmap', '.memmap']: + elif extension in [".mmap", ".memmap"]: Yr, dims, T = load_memmap(str(self.get_input_movie_path())) return np.reshape(Yr.T, [T] + list(dims), order="F") diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py index bbb6e9a..70c5866 100644 --- a/mesmerize_core/caiman_extensions/mcorr.py +++ b/mesmerize_core/caiman_extensions/mcorr.py @@ -6,6 +6,9 @@ from .common import validate from typing import * +from .cache import Cache + +cache = Cache() @pd.api.extensions.register_series_accessor("mcorr") @@ -45,6 +48,7 @@ def get_output(self) -> np.ndarray: return mc_movie @validate("mcorr") + @cache.use_cache def get_shifts( self, pw_rigid: bool = False ) -> Tuple[List[np.ndarray], List[np.ndarray]]: diff --git a/tests/test_core.py b/tests/test_core.py index e54e5ff..e4c5980 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,6 @@ import os +import numpy as np from caiman.utils.utils import load_dict_from_hdf5 from caiman.source_extraction.cnmf import cnmf import numpy.testing @@ -24,6 +25,8 @@ import shutil from zipfile import ZipFile from pprint import pprint +from mesmerize_core.caiman_extensions import cnmf +import time tmp_dir = Path(os.path.dirname(os.path.abspath(__file__)), "tmp") vid_dir = Path(os.path.dirname(os.path.abspath(__file__)), "videos") @@ -167,34 +170,34 @@ def test_mcorr(): # test that batch path is propagated to pd.Series assert ( - df.attrs["batch_path"] - == df.paths.get_batch_path() - == df.iloc[-1].paths.get_batch_path() - == df.iloc[-1].attrs["batch_path"] + df.attrs["batch_path"] + == df.paths.get_batch_path() + == df.iloc[-1].paths.get_batch_path() + == df.iloc[-1].attrs["batch_path"] ) # test that path resolve works for parent_raw_dir rel_input_movie_path = input_movie_path.relative_to(vid_dir) assert ( - df.paths.resolve(rel_input_movie_path) - == df.iloc[-1].paths.resolve(rel_input_movie_path) - == input_movie_path + df.paths.resolve(rel_input_movie_path) + == df.iloc[-1].paths.resolve(rel_input_movie_path) + == input_movie_path ) # test that path splitting works for parent_raw_dir split = (vid_dir, input_movie_path.relative_to(vid_dir)) assert ( - df.paths.split(input_movie_path) - == df.iloc[-1].paths.split(input_movie_path) - == split + df.paths.split(input_movie_path) + == df.iloc[-1].paths.split(input_movie_path) + == split ) # test that the input_movie_path in the DataFrame rows are relative assert Path(df.iloc[-1]["input_movie_path"]) == split[1] assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -217,76 +220,76 @@ def test_mcorr(): ) rel_mcorr_memmap_path = mcorr_memmap_path.relative_to(batch_dir) assert ( - df.paths.resolve(rel_mcorr_memmap_path) - == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path) - == mcorr_memmap_path + df.paths.resolve(rel_mcorr_memmap_path) + == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path) + == mcorr_memmap_path ) # test that path splitting works for batch_dir split = (batch_dir, mcorr_memmap_path.relative_to(batch_dir)) assert ( - df.paths.split(mcorr_memmap_path) - == df.iloc[-1].paths.split(mcorr_memmap_path) - == split + df.paths.split(mcorr_memmap_path) + == df.iloc[-1].paths.split(mcorr_memmap_path) + == split ) assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) # test to check mmap output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) # test to check mcorr get_output_path() assert ( - df.iloc[-1].mcorr.get_output_path() - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + df.iloc[-1].mcorr.get_output_path() + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) ) # test to check mcorr get_output() @@ -372,12 +375,12 @@ def test_cnmf(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) ) algo = "cnmf" @@ -417,59 +420,59 @@ def test_cnmf(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert ( - batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) ) # test to check mmap output path assert ( - batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_60_d2_80_d3_1_order_C_frames_2000_.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_60_d2_80_d3_1_order_C_frames_2000_.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) print("testing cnmf.get_cnmf_memmap()") @@ -493,10 +496,10 @@ def test_cnmf(): # test to check cnmf get_output_path() assert ( - df.iloc[-1].cnmf.get_output_path() - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') + df.iloc[-1].cnmf.get_output_path() + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') ) # test to check cnmf get_output() @@ -524,11 +527,11 @@ def test_cnmf(): allow_pickle=True, ) for contour, actual_contour in zip( - cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual + cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip( - cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual + cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual ): numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10) @@ -552,9 +555,9 @@ def test_cnmf(): # test to check caiman get_input_movie_path(), should be output of previous mcorr assert ( - df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) # test to check caiman get_correlation_img() @@ -604,7 +607,7 @@ def test_cnmf(): allow_pickle=True, ) for contour, actual_contour in zip( - ixs_contours_contours, ixs_contours_contours_actual + ixs_contours_contours, ixs_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual): @@ -675,9 +678,9 @@ def test_cnmfe(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -696,9 +699,9 @@ def test_cnmfe(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert batch_dir.joinpath( @@ -753,9 +756,9 @@ def test_cnmfe(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -774,9 +777,9 @@ def test_cnmfe(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert batch_dir.joinpath( @@ -785,53 +788,53 @@ def test_cnmfe(): # test to check mmap output path assert ( - batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_128_d2_128_d3_1_order_C_frames_1000_.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_128_d2_128_d3_1_order_C_frames_1000_.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) # test to check pnr image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["pnr-image-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["pnr-image-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_pn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["pnr-image-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["pnr-image-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_pn.npy') ) # extension tests - full @@ -855,9 +858,9 @@ def test_cnmfe(): # test to check cnmf get_output_path() assert ( - df.iloc[-1].cnmf.get_output_path() - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + df.iloc[-1].cnmf.get_output_path() + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) ) # test to check cnmf get_output() @@ -885,11 +888,11 @@ def test_cnmfe(): allow_pickle=True, ) for contour, actual_contour in zip( - cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual + cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip( - cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual + cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual ): numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10) @@ -944,7 +947,7 @@ def test_cnmfe(): allow_pickle=True, ) for contour, actual_contour in zip( - ixs_contours_contours, ixs_contours_contours_actual + ixs_contours_contours, ixs_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual): @@ -998,9 +1001,9 @@ def test_remove_item(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) df.caiman.add_item( @@ -1020,9 +1023,9 @@ def test_remove_item(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) # Check removing specific rows works assert df.iloc[0]["name"] == f"test-{algo}" @@ -1036,3 +1039,248 @@ def test_remove_item(): assert df.isin([f"test-{algo}"]).any().any() == False assert df.isin([f"test1-{algo}"]).any().any() == False assert df.empty == True + + +def test_cache(): + print("*** Testing cache ***") + cnmf.cache.clear_cache() + + set_parent_raw_data_path(vid_dir) + algo = "mcorr" + + df, batch_path = _create_tmp_batch() + + batch_path = Path(batch_path) + batch_dir = batch_path.parent + + input_movie_path = get_datafile(algo) + print(input_movie_path) + + df.caiman.add_item( + algo=algo, + name=f"test-{algo}", + input_movie_path=input_movie_path, + params=test_params[algo], + ) + + assert df.iloc[-1]["algo"] == algo + assert df.iloc[-1]["name"] == f"test-{algo}" + assert df.iloc[-1]["params"] == test_params[algo] + assert df.iloc[-1]["outputs"] is None + try: + UUID(df.iloc[-1]["uuid"]) + except: + pytest.fail("Something wrong with setting UUID for batch items") + + assert vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) == vid_dir.joinpath( + f"{algo}.tif" + ) + + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + with pd.option_context("display.max_rows", None, "display.max_columns", None): + print(df) + + pprint(df.iloc[-1]["outputs"], width=-1) + print(df.iloc[-1]["outputs"]["traceback"]) + assert df.iloc[-1]["outputs"]["success"] is True + assert df.iloc[-1]["outputs"]["traceback"] is None + + assert ( + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) + ) + + algo = "cnmf" + input_movie_path = df.iloc[-1].mcorr.get_output_path() + df.caiman.add_item( + algo=algo, + name=f"test-{algo}", + input_movie_path=input_movie_path, + params=test_params[algo], + ) + + assert df.iloc[-1]["algo"] == algo + assert df.iloc[-1]["name"] == f"test-{algo}" + assert df.iloc[-1]["params"] == test_params[algo] + assert df.iloc[-1]["outputs"] is None + try: + UUID(df.iloc[-1]["uuid"]) + except: + pytest.fail("Something wrong with setting UUID for batch items") + print("cnmf input_movie_path:", df.iloc[-1]["input_movie_path"]) + assert batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) == input_movie_path + + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + with pd.option_context("display.max_rows", None, "display.max_columns", None): + print(df) + + pprint(df.iloc[-1]["outputs"], width=-1) + print(df.iloc[-1]["outputs"]["traceback"]) + + # test that cache values are returned when calls are made to same function + + # testing that cache size limits work + cnmf.cache.set_maxsize("1M") + cnmf_output = df.iloc[-1].cnmf.get_output() + hex_get_output = hex(id(cnmf_output)) + cache = cnmf.cache.get_cache() + hex1 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) + # assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1) + # assert(hex_get_output != hex1) + time_stamp1 = cache[cache["function"] == "get_output"]["time_stamp"].item() + df.iloc[-1].cnmf.get_temporal_components() + df.iloc[-1].cnmf.get_spatial_contours() + df.iloc[-1].cnmf.get_spatial_masks() + df.iloc[-1].cnmf.get_temporal_components(np.arange(7)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(8)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(9)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(6)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(5)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(4)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(3)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(8)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(9)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(7)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(6)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(5)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(4)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(3)) + time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item() + hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) + assert (cache[cache["function"] == "get_output"].index.size == 1) + # after adding enough items for cache to exceed max size, cache should remove least recently used items until + # size is back under max + assert (len(cnmf.cache.get_cache().index) == 17) + # the time stamp to get_output the second time should be greater than the original time + # stamp because the cached item is being returned and therefore will have been accessed more recently + assert (time_stamp2 > time_stamp1) + # the hex id of the item in the cache when get_output is first called + # should be the same hex id of the item in the cache when get_output is called again + assert (hex1 == hex2) + + # test clear_cache() + cnmf.cache.clear_cache() + assert (len(cnmf.cache.get_cache().index) == 0) + + # checking that cache is cleared, checking speed at which item is returned + start = time.time() + df.iloc[-1].cnmf.get_output() + end = time.time() + assert (len(cnmf.cache.get_cache().index) == 1) + + # second call to item now added to cache, time to return item should be must faster than before because item has + # now been cached + start2 = time.time() + df.iloc[-1].cnmf.get_output() + end2 = time.time() + assert(end2-start2 < end-start) + + # testing clear_cache() again, length of dataframe should be zero + cnmf.cache.clear_cache() + assert (len(cnmf.cache.get_cache().index) == 0) + + # test setting maxsize as 0, should effectively disable the cache...additionally, time to return an item called + # twice should roughly be the same because item is not being stored in the cache + # cache length should remain zero throughout calls to extension functions + cnmf.cache.set_maxsize(0) + start = time.time() + df.iloc[-1].cnmf.get_output() + end = time.time() + assert (len(cnmf.cache.get_cache().index) == 0) + + start2 = time.time() + df.iloc[-1].cnmf.get_output() + end2 = time.time() + assert (len(cnmf.cache.get_cache().index) == 0) + assert(abs((end-start)-(end2-start2)) < 0.01) + + # test to check that separate cache items are being returned for different batch items + # must add another item to the batch, running cnmfe + + input_movie_path = get_datafile("cnmfe") + print(input_movie_path) + df.caiman.add_item( + algo="mcorr", + name=f"test-cnmfe-mcorr", + input_movie_path=input_movie_path, + params=test_params["mcorr"], + ) + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + algo = "cnmfe" + param_name = "cnmfe_full" + input_movie_path = df.iloc[-1].mcorr.get_output_path() + print(input_movie_path) + + df.caiman.add_item( + algo=algo, + name=f"test-{algo}", + input_movie_path=input_movie_path, + params=test_params[param_name], + ) + + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + cnmf.cache.set_maxsize("1M") + + df.iloc[1].cnmf.get_output() # cnmf output + df.iloc[-1].cnmf.get_output() # cnmfe output + + cache = cnmf.cache.get_cache() + + # checking that both outputs from different batch items are added to the cache + assert(len(cache.index) == 2) + + # checking that the uuid of each outputs from the different batch items are not the same + assert(cache.iloc[-1]["uuid"] != cache.iloc[-2]["uuid"]) + + # checking that the uuid of the output in the cache is the correct uuid of the batch item in the df + assert(cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"]) + + # call get output from cnmf, check that it is the most recent thing called in the cache + df.iloc[1].cnmf.get_output() + cnmf_uuid = df.iloc[1]["uuid"] + most_recently_called = cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1] + cache_uuid = most_recently_called["uuid"] + assert(cnmf_uuid == cache_uuid) + + # check to make sure by certain params that it is cnmf vs cnmfe + output = df.iloc[1].cnmf.get_output() + assert(output.params.patch["low_rank_background"] == True) + output2 = df.iloc[-1].cnmf.get_output() + assert(output2.params.patch["low_rank_background"] == False) + + # test for copy + # if return_copy=True, then hex id of calls to the same function should be false + output = df.iloc[1].cnmf.get_output() + assert(hex(id(output)) != hex(id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1]))) + # if return_copy=False, then hex id of calls to the same function should be true + output = df.iloc[1].cnmf.get_output(return_copy=False) + output2 = df.iloc[1].cnmf.get_output(return_copy=False) + assert(hex(id(output)) == hex(id(output2))) + assert(hex(id(cnmf.cache.get_cache().iloc[-1]["return_val"])) == hex(id(output))) + + + + + + +