From ebd2472e5f9b45481808b0c4ba873c2f3fbdf49a Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 28 Jun 2022 12:16:01 -0400 Subject: [PATCH 01/34] creating cache for faster access to results of previous cnmf extension calls --- mesmerize_core/caiman_extensions/cache.py | 43 +++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 mesmerize_core/caiman_extensions/cache.py diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py new file mode 100644 index 0000000..ba30429 --- /dev/null +++ b/mesmerize_core/caiman_extensions/cache.py @@ -0,0 +1,43 @@ +from functools import wraps +import pandas as pd +import time + +class Cache: + def __init__(self, cache_size=3): + self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp']) + self.cache_size = cache_size + + def get_cache(self): + print(self.cache) + + def use_cache(self, func): + @wraps(func) + def _use_cache(*args, **kwargs): + + # if cache is empty, will always be a cache miss + if len(self.cache.index) == 0: + self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()] + + # checking to see if there is a cache hit + for i in range(len(self.cache.index)): + if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[ + i, 2] == kwargs: + self.cache.iloc[i, 4] = time.time() + return self.cache.iloc[i, 3] + + # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry + if len(self.cache.index) == self.cache_size: + self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, + inplace=True) + self.cache = self.cache.reset_index(drop=True) + self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()] + return self.cache.iloc[len(self.cache.index) - 1, 3] + else: + self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()] + + return func(*args, **kwargs) + + return _use_cache + + +cache = Cache() \ No newline at end of file From 22c4452f7f67e83da1e6f7ea353291b820df1055 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 28 Jun 2022 13:47:25 -0400 Subject: [PATCH 02/34] instantiating cache object in extension files, adding directives --- mesmerize_core/caiman_extensions/cache.py | 12 ++++----- mesmerize_core/caiman_extensions/cnmf.py | 30 ++++++++++++++--------- mesmerize_core/caiman_extensions/mcorr.py | 6 +++++ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index ba30429..ff76b95 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -2,8 +2,9 @@ import pandas as pd import time + class Cache: - def __init__(self, cache_size=3): + def __init__(self, cache_size=10): self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp']) self.cache_size = cache_size @@ -20,12 +21,12 @@ def _use_cache(*args, **kwargs): # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[ - i, 2] == kwargs: + if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[i, 2] == kwargs: self.cache.iloc[i, 4] = time.time() return self.cache.iloc[i, 3] - # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry + # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used + # and add new entry if len(self.cache.index) == self.cache_size: self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, inplace=True) @@ -38,6 +39,3 @@ def _use_cache(*args, **kwargs): return func(*args, **kwargs) return _use_cache - - -cache = Cache() \ No newline at end of file diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index 0b1a1ff..c221774 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -10,6 +10,9 @@ from caiman.utils.visualization import get_contours as caiman_get_contours from .common import validate +from .cache import Cache + +cache = Cache() @pd.api.extensions.register_series_accessor("cnmf") @@ -59,6 +62,7 @@ def get_input_memmap(self) -> np.ndarray: # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items @validate("cnmf") + @cache.use_cache def get_output_path(self) -> Path: """ Returns @@ -69,6 +73,7 @@ def get_output_path(self) -> Path: return self._series.paths.resolve(self._series["outputs"]["cnmf-hdf5-path"]) @validate("cnmf") + @cache.use_cache def get_output(self) -> CNMF: """ Returns @@ -82,8 +87,9 @@ def get_output(self) -> CNMF: # TODO: Make the ``ixs`` parameter for spatial stuff optional @validate("cnmf") + @cache.use_cache def get_spatial_masks( - self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01 + self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01 ) -> np.ndarray: """ Get binary masks of the spatial components at the given `ixs` @@ -128,7 +134,7 @@ def get_spatial_masks( # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items @staticmethod def _get_spatial_contours( - cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None + cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None ): if ixs_components is None: ixs_components = cnmf_obj.estimates.idx_components @@ -148,8 +154,9 @@ def _get_spatial_contours( return contours @validate("cnmf") + @cache.use_cache def get_spatial_contours( - self, ixs_components: Optional[np.ndarray] = None + self, ixs_components: Optional[np.ndarray] = None ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Get the contour and center of mass for each spatial footprint @@ -181,8 +188,9 @@ def get_spatial_contours( return coordinates, coms @validate("cnmf") + @cache.use_cache def get_temporal_components( - self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False + self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False ) -> np.ndarray: """ Get the temporal components for this CNMF item @@ -216,10 +224,10 @@ def get_temporal_components( # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items @validate("cnmf") def get_reconstructed_movie( - self, - ixs_frames: Optional[Union[Tuple[int, int], int]] = None, - idx_components: np.ndarray = None, - add_background: bool = True, + self, + ixs_frames: Optional[Union[Tuple[int, int], int]] = None, + idx_components: np.ndarray = None, + add_background: bool = True, ) -> np.ndarray: """ Return the reconstructed movie, (A * C) + (b * f) @@ -250,12 +258,12 @@ def get_reconstructed_movie( if isinstance(ixs_frames, int): ixs_frames = (ixs_frames, ixs_frames + 1) - dn = cnmf_obj.estimates.A[:,idx_components].dot( - cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]] + dn = cnmf_obj.estimates.A[:, idx_components].dot( + cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]] ) if add_background: dn += cnmf_obj.estimates.b.dot( - cnmf_obj.estimates.f[:, ixs_frames[0] : ixs_frames[1]] + cnmf_obj.estimates.f[:, ixs_frames[0]: ixs_frames[1]] ) return dn.reshape(cnmf_obj.dims + (-1,), order="F").transpose([2, 0, 1]) diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py index bbb6e9a..0071eb6 100644 --- a/mesmerize_core/caiman_extensions/mcorr.py +++ b/mesmerize_core/caiman_extensions/mcorr.py @@ -6,6 +6,9 @@ from .common import validate from typing import * +from cache import Cache + +cache = Cache() @pd.api.extensions.register_series_accessor("mcorr") @@ -18,6 +21,7 @@ def __init__(self, s: pd.Series): self._series = s @validate("mcorr") + @cache.use_cache def get_output_path(self) -> Path: """ Get the path to the motion corrected output memmap file @@ -30,6 +34,7 @@ def get_output_path(self) -> Path: return self._series.paths.resolve(self._series["outputs"]["mcorr-output-path"]) @validate("mcorr") + @cache.use_cache def get_output(self) -> np.ndarray: """ Get the motion corrected output as a memmaped numpy array, allows fast random-access scrolling. @@ -45,6 +50,7 @@ def get_output(self) -> np.ndarray: return mc_movie @validate("mcorr") + @cache.use_cache def get_shifts( self, pw_rigid: bool = False ) -> Tuple[List[np.ndarray], List[np.ndarray]]: From 12e4ff149a764ca2eac6953357b41ee040f82717 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 28 Jun 2022 13:48:35 -0400 Subject: [PATCH 03/34] reformatting files --- mesmerize_core/caiman_extensions/cache.py | 44 ++++++++++++++++++---- mesmerize_core/caiman_extensions/cnmf.py | 20 +++++----- mesmerize_core/caiman_extensions/common.py | 4 +- 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index ff76b95..e16d496 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -5,7 +5,10 @@ class Cache: def __init__(self, cache_size=10): - self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp']) + self.cache = pd.DataFrame( + data=None, + columns=["function", "args", "kwargs", "return_val", "time_stamp"], + ) self.cache_size = cache_size def get_cache(self): @@ -17,24 +20,51 @@ def _use_cache(*args, **kwargs): # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: - self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()] + self.cache.loc[len(self.cache.index)] = [ + func.__name__, + args, + kwargs, + func(args, kwargs), + time.time(), + ] # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[i, 2] == kwargs: + if ( + self.cache.iloc[i, 0] == func.__name__ + and self.cache.iloc[i, 1] == args + and self.cache.iloc[i, 2] == kwargs + ): self.cache.iloc[i, 4] = time.time() return self.cache.iloc[i, 3] # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used # and add new entry if len(self.cache.index) == self.cache_size: - self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, - inplace=True) + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()] + self.cache.loc[len(self.cache.index)] = [ + func.__name__, + args, + kwargs, + func(args, kwargs), + time.time(), + ] return self.cache.iloc[len(self.cache.index) - 1, 3] else: - self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()] + self.cache.loc[len(self.cache.index)] = [ + func.__name__, + args, + kwargs, + func(args, kwargs), + time.time(), + ] return func(*args, **kwargs) diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index c221774..6108f88 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -89,7 +89,7 @@ def get_output(self) -> CNMF: @validate("cnmf") @cache.use_cache def get_spatial_masks( - self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01 + self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01 ) -> np.ndarray: """ Get binary masks of the spatial components at the given `ixs` @@ -134,7 +134,7 @@ def get_spatial_masks( # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items @staticmethod def _get_spatial_contours( - cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None + cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None ): if ixs_components is None: ixs_components = cnmf_obj.estimates.idx_components @@ -156,7 +156,7 @@ def _get_spatial_contours( @validate("cnmf") @cache.use_cache def get_spatial_contours( - self, ixs_components: Optional[np.ndarray] = None + self, ixs_components: Optional[np.ndarray] = None ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Get the contour and center of mass for each spatial footprint @@ -190,7 +190,7 @@ def get_spatial_contours( @validate("cnmf") @cache.use_cache def get_temporal_components( - self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False + self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False ) -> np.ndarray: """ Get the temporal components for this CNMF item @@ -224,10 +224,10 @@ def get_temporal_components( # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items @validate("cnmf") def get_reconstructed_movie( - self, - ixs_frames: Optional[Union[Tuple[int, int], int]] = None, - idx_components: np.ndarray = None, - add_background: bool = True, + self, + ixs_frames: Optional[Union[Tuple[int, int], int]] = None, + idx_components: np.ndarray = None, + add_background: bool = True, ) -> np.ndarray: """ Return the reconstructed movie, (A * C) + (b * f) @@ -259,11 +259,11 @@ def get_reconstructed_movie( ixs_frames = (ixs_frames, ixs_frames + 1) dn = cnmf_obj.estimates.A[:, idx_components].dot( - cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]] + cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]] ) if add_background: dn += cnmf_obj.estimates.b.dot( - cnmf_obj.estimates.f[:, ixs_frames[0]: ixs_frames[1]] + cnmf_obj.estimates.f[:, ixs_frames[0] : ixs_frames[1]] ) return dn.reshape(cnmf_obj.dims + (-1,), order="F").transpose([2, 0, 1]) diff --git a/mesmerize_core/caiman_extensions/common.py b/mesmerize_core/caiman_extensions/common.py index 7f6e67c..3b3654c 100644 --- a/mesmerize_core/caiman_extensions/common.py +++ b/mesmerize_core/caiman_extensions/common.py @@ -270,10 +270,10 @@ def get_input_movie_path(self) -> Path: def get_input_movie(self) -> Union[np.ndarray, pims.FramesSequence]: extension = self.get_input_movie_path().suffixes[-1] - if extension in ['.tiff', '.tif', '.btf']: + if extension in [".tiff", ".tif", ".btf"]: return pims.open(str(self.get_input_movie_path())) - elif extension in ['.mmap', '.memmap']: + elif extension in [".mmap", ".memmap"]: Yr, dims, T = load_memmap(str(self.get_input_movie_path())) return np.reshape(Yr.T, [T] + list(dims), order="F") From 5bf7fd9c10a5113a75efe3bb98211c5437488cd1 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 28 Jun 2022 15:38:04 -0400 Subject: [PATCH 04/34] finishing up cache impl, adding clear_cache() and set_maxcache() capabilities --- mesmerize_core/caiman_extensions/cache.py | 68 +++++++++-------------- mesmerize_core/caiman_extensions/cnmf.py | 3 + mesmerize_core/caiman_extensions/mcorr.py | 2 +- 3 files changed, 30 insertions(+), 43 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index e16d496..27ad28c 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -5,67 +5,51 @@ class Cache: def __init__(self, cache_size=10): - self.cache = pd.DataFrame( - data=None, - columns=["function", "args", "kwargs", "return_val", "time_stamp"], - ) + self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp']) self.cache_size = cache_size def get_cache(self): print(self.cache) + def clear_cache(self): + while len(self.cache.index) != 0: + self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True) + + def set_maxsize(self, max_size: int): + self.cache_size = max_size + def use_cache(self, func): @wraps(func) - def _use_cache(*args, **kwargs): + def _use_cache(instance, *args, **kwargs): + + print(instance) + print(args, kwargs) # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: - self.cache.loc[len(self.cache.index)] = [ - func.__name__, - args, - kwargs, - func(args, kwargs), - time.time(), - ] + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()] # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if ( - self.cache.iloc[i, 0] == func.__name__ - and self.cache.iloc[i, 1] == args - and self.cache.iloc[i, 2] == kwargs - ): + if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[ + i, 2] == kwargs: self.cache.iloc[i, 4] = time.time() + return_val = self.cache.iloc[i, 3] return self.cache.iloc[i, 3] - # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used - # and add new entry + # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry if len(self.cache.index) == self.cache_size: - self.cache.drop( - index=self.cache.sort_values( - by=["time_stamp"], ascending=False - ).index[-1], - axis=0, - inplace=True, - ) + return_val = func(instance, *args, **kwargs) + self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, + inplace=True) self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [ - func.__name__, - args, - kwargs, - func(args, kwargs), - time.time(), - ] + self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()] return self.cache.iloc[len(self.cache.index) - 1, 3] else: - self.cache.loc[len(self.cache.index)] = [ - func.__name__, - args, - kwargs, - func(args, kwargs), - time.time(), - ] + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()] - return func(*args, **kwargs) + return return_val - return _use_cache + return _use_cache \ No newline at end of file diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index 6108f88..b0ed5e1 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -24,6 +24,8 @@ class CNMFExtensions: def __init__(self, s: pd.Series): self._series = s + @validate("cnmf") + @cache.use_cache def get_cnmf_memmap(self) -> np.ndarray: """ Get the CNMF memmap @@ -39,6 +41,7 @@ def get_cnmf_memmap(self) -> np.ndarray: images = np.reshape(Yr.T, [T] + list(dims), order="F") return images + @cache.use_cache def get_input_memmap(self) -> np.ndarray: """ Return the F-order memmap if the input to the diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py index 0071eb6..c46e412 100644 --- a/mesmerize_core/caiman_extensions/mcorr.py +++ b/mesmerize_core/caiman_extensions/mcorr.py @@ -6,7 +6,7 @@ from .common import validate from typing import * -from cache import Cache +from .cache import Cache cache = Cache() From eb62b656950522866a773031527d87d3cbf4b18f Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Thu, 30 Jun 2022 20:35:40 -0400 Subject: [PATCH 05/34] adding uuid column to cache so that different batch items can be delineated --- mesmerize_core/caiman_extensions/cache.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 27ad28c..5455cd7 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -5,7 +5,7 @@ class Cache: def __init__(self, cache_size=10): - self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp']) + self.cache = pd.DataFrame(data=None, columns=['uuid', 'function', 'args', 'kwargs', 'return_val', 'time_stamp']) self.cache_size = cache_size def get_cache(self): @@ -22,21 +22,18 @@ def use_cache(self, func): @wraps(func) def _use_cache(instance, *args, **kwargs): - print(instance) - print(args, kwargs) - # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()] + self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()] # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[ - i, 2] == kwargs: - self.cache.iloc[i, 4] = time.time() - return_val = self.cache.iloc[i, 3] - return self.cache.iloc[i, 3] + if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and self.cache.iloc[i, 2] == args and self.cache.iloc[ + i, 3] == kwargs: + self.cache.iloc[i, 5] = time.time() + return_val = self.cache.iloc[i, 4] + return self.cache.iloc[i, 4] # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry if len(self.cache.index) == self.cache_size: @@ -44,11 +41,11 @@ def _use_cache(instance, *args, **kwargs): self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, inplace=True) self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()] - return self.cache.iloc[len(self.cache.index) - 1, 3] + self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()] + return self.cache.iloc[len(self.cache.index) - 1, 4] else: return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()] + self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()] return return_val From a40e6c4b634fb01b3e08721e95808a54238be550 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Thu, 30 Jun 2022 21:03:06 -0400 Subject: [PATCH 06/34] updating kwarg comparison to handle numpy arrays --- mesmerize_core/caiman_extensions/cache.py | 26 +++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 5455cd7..ebf149f 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -1,6 +1,17 @@ from functools import wraps import pandas as pd import time +import numpy as np + + +def check_kwarg_equality(kwargs, cache_kwargs): + if not type(kwargs) == type(cache_kwargs): + return False + + if isinstance(cache_kwargs, np.ndarray): + return np.array_equal(cache_kwargs, kwargs) + else: + return cache_kwargs == kwargs class Cache: @@ -25,12 +36,12 @@ def _use_cache(instance, *args, **kwargs): # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()] + self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, + return_val, time.time()] # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and self.cache.iloc[i, 2] == args and self.cache.iloc[ - i, 3] == kwargs: + if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and self.cache.iloc[i, 2] == args and check_kwarg_equality(kwargs, self.cache.iloc[i, 3]): self.cache.iloc[i, 5] = time.time() return_val = self.cache.iloc[i, 4] return self.cache.iloc[i, 4] @@ -41,12 +52,15 @@ def _use_cache(instance, *args, **kwargs): self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, inplace=True) self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()] + self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, + time.time()] return self.cache.iloc[len(self.cache.index) - 1, 4] else: return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()] + self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, + time.time()] return return_val - return _use_cache \ No newline at end of file + return _use_cache + From 3c5bd91f19d1f0dce4d07a76248d65140854ea33 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Thu, 30 Jun 2022 21:43:22 -0400 Subject: [PATCH 07/34] checking if args are equal when numpy array is passed as arg --- mesmerize_core/caiman_extensions/cache.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index ebf149f..3c4db13 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -4,14 +4,14 @@ import numpy as np -def check_kwarg_equality(kwargs, cache_kwargs): - if not type(kwargs) == type(cache_kwargs): +def check_arg_equality(args, cache_args): + if not type(args) == type(cache_args): return False - if isinstance(cache_kwargs, np.ndarray): - return np.array_equal(cache_kwargs, kwargs) + if isinstance(cache_args, np.ndarray): + return np.array_equal(cache_args, args) else: - return cache_kwargs == kwargs + return cache_args == args class Cache: @@ -41,7 +41,7 @@ def _use_cache(instance, *args, **kwargs): # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and self.cache.iloc[i, 2] == args and check_kwarg_equality(kwargs, self.cache.iloc[i, 3]): + if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and check_arg_equality(args, self.cache.iloc[i, 2]) and check_arg_equality(kwargs, self.cache.iloc[i, 3]): self.cache.iloc[i, 5] = time.time() return_val = self.cache.iloc[i, 4] return self.cache.iloc[i, 4] From ed2fa186a6abf4a5c9921acdce7b5f3906d006ff Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Fri, 1 Jul 2022 11:02:55 -0400 Subject: [PATCH 08/34] updating check arg function so that arg equality is checked for every entry of array or dict --- mesmerize_core/caiman_extensions/cache.py | 66 ++++++++++++++++++----- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 3c4db13..1761ab0 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -4,19 +4,32 @@ import numpy as np -def check_arg_equality(args, cache_args): +def _check_arg_equality(args, cache_args): if not type(args) == type(cache_args): return False - if isinstance(cache_args, np.ndarray): return np.array_equal(cache_args, args) else: return cache_args == args +def _check_args_equality(args, cache_args): + equality = list() + if isinstance(args, tuple): + for arg, cache_arg in zip(args, cache_args): + equality.append(_check_arg_equality(arg, cache_arg)) + else: + for k in args.keys(): + equality.append(_check_arg_equality(args[k], cache_args[k])) + return all(equality) + + class Cache: def __init__(self, cache_size=10): - self.cache = pd.DataFrame(data=None, columns=['uuid', 'function', 'args', 'kwargs', 'return_val', 'time_stamp']) + self.cache = pd.DataFrame( + data=None, + columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], + ) self.cache_size = cache_size def get_cache(self): @@ -36,12 +49,23 @@ def _use_cache(instance, *args, **kwargs): # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, - return_val, time.time()] + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] # checking to see if there is a cache hit for i in range(len(self.cache.index)): - if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and check_arg_equality(args, self.cache.iloc[i, 2]) and check_arg_equality(kwargs, self.cache.iloc[i, 3]): + if ( + self.cache.iloc[i, 0] == instance._series["uuid"] + and self.cache.iloc[i, 1] == func.__name__ + and _check_args_equality(args, self.cache.iloc[i, 2]) + and _check_arg_equality(kwargs, self.cache.iloc[i, 3]) + ): self.cache.iloc[i, 5] = time.time() return_val = self.cache.iloc[i, 4] return self.cache.iloc[i, 4] @@ -49,18 +73,34 @@ def _use_cache(instance, *args, **kwargs): # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry if len(self.cache.index) == self.cache_size: return_val = func(instance, *args, **kwargs) - self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0, - inplace=True) + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, - time.time()] + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] return self.cache.iloc[len(self.cache.index) - 1, 4] else: return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, - time.time()] + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] return return_val return _use_cache - From 52be0aeb57eef53138b01420537ef1ad23836686 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sun, 3 Jul 2022 08:14:36 -0400 Subject: [PATCH 09/34] returning func(*args, **kwargs) if len(cache)==0 --- mesmerize_core/caiman_extensions/cache.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 1761ab0..62b0a49 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -57,6 +57,7 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] + return return_val # checking to see if there is a cache hit for i in range(len(self.cache.index)): From 35897a8af9703aabc10162266c82c18470dba022 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sun, 3 Jul 2022 09:03:48 -0400 Subject: [PATCH 10/34] adding ability for cache size to be controlled by memory size as opposed to item number --- mesmerize_core/caiman_extensions/cache.py | 111 +++++++++++++++------- 1 file changed, 77 insertions(+), 34 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 62b0a49..291f011 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -2,6 +2,7 @@ import pandas as pd import time import numpy as np +import sys def _check_arg_equality(args, cache_args): @@ -25,12 +26,16 @@ def _check_args_equality(args, cache_args): class Cache: - def __init__(self, cache_size=10): + def __init__(self, cache_size=10, length_storage=True): self.cache = pd.DataFrame( data=None, columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], ) self.cache_size = cache_size + if length_storage == True: + self.storage_type = 'ITEMS' + else: + self.storage_type = 'RAM' def get_cache(self): print(self.cache) @@ -42,6 +47,12 @@ def clear_cache(self): def set_maxsize(self, max_size: int): self.cache_size = max_size + def _get_cache_size(self): + cache_size = 0 + for i in range(len(self.cache.index)): + cache_size += sys.getsizeof(self.cache.iloc[i, 4]) + return cache_size + def use_cache(self, func): @wraps(func) def _use_cache(instance, *args, **kwargs): @@ -62,45 +73,77 @@ def _use_cache(instance, *args, **kwargs): # checking to see if there is a cache hit for i in range(len(self.cache.index)): if ( - self.cache.iloc[i, 0] == instance._series["uuid"] - and self.cache.iloc[i, 1] == func.__name__ - and _check_args_equality(args, self.cache.iloc[i, 2]) - and _check_arg_equality(kwargs, self.cache.iloc[i, 3]) + self.cache.iloc[i, 0] == instance._series["uuid"] + and self.cache.iloc[i, 1] == func.__name__ + and _check_args_equality(args, self.cache.iloc[i, 2]) + and _check_arg_equality(kwargs, self.cache.iloc[i, 3]) ): self.cache.iloc[i, 5] = time.time() return_val = self.cache.iloc[i, 4] return self.cache.iloc[i, 4] # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry - if len(self.cache.index) == self.cache_size: - return_val = func(instance, *args, **kwargs) - self.cache.drop( - index=self.cache.sort_values( - by=["time_stamp"], ascending=False - ).index[-1], - axis=0, - inplace=True, - ) - self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] - return self.cache.iloc[len(self.cache.index) - 1, 4] - else: - return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] + # check which type of memory + if self.storage_type == 'ITEMS': + if len(self.cache.index) == self.cache_size: + return_val = func(instance, *args, **kwargs) + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) + self.cache = self.cache.reset_index(drop=True) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + return self.cache.iloc[len(self.cache.index) - 1, 4] + else: + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + elif self.storage_type == 'RAM': + if self._get_cache_size() >= self.cache_size: + return_val = func(instance, *args, **kwargs) + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) + self.cache = self.cache.reset_index(drop=True) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + return self.cache.iloc[len(self.cache.index) - 1, 4] + else: + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] return return_val From fa68603fbc42ddc192ffb13f412ffa18d93309f4 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 09:34:46 -0400 Subject: [PATCH 11/34] work in progress --- mesmerize_core/caiman_extensions/cache.py | 33 ++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 291f011..5e4d2c4 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -1,4 +1,6 @@ from functools import wraps +from typing import Union, Optional + import pandas as pd import time import numpy as np @@ -26,17 +28,21 @@ def _check_args_equality(args, cache_args): class Cache: - def __init__(self, cache_size=10, length_storage=True): + def __init__(self, cache_size: Optional[Union[int, str]] = None): self.cache = pd.DataFrame( data=None, columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], ) - self.cache_size = cache_size - if length_storage == True: + self.size = cache_size + if isinstance(cache_size, int): self.storage_type = 'ITEMS' else: self.storage_type = 'RAM' + if cache_size is None: + self.size = '1G' + self.storage_type = 'RAM' + def get_cache(self): print(self.cache) @@ -44,13 +50,23 @@ def clear_cache(self): while len(self.cache.index) != 0: self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True) - def set_maxsize(self, max_size: int): - self.cache_size = max_size + def set_maxsize(self, max_size: Union[int, str]): + if isinstance(max_size, str): + self.storage_type = 'RAM' + else: + self.storage_type = 'ITEMS' + self.size = max_size - def _get_cache_size(self): + def _get_cache_size_bytes(self, return_gig=True): + """Returns in GiB or MB""" cache_size = 0 for i in range(len(self.cache.index)): cache_size += sys.getsizeof(self.cache.iloc[i, 4]) + # need to fix how size of an output is calculated to handle non-built-in types + if return_gig: + cache_size = cache_size / 1024**3 + else: + cache_size = cache_size / 1024**2 return cache_size def use_cache(self, func): @@ -68,7 +84,6 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] - return return_val # checking to see if there is a cache hit for i in range(len(self.cache.index)): @@ -85,7 +100,7 @@ def _use_cache(instance, *args, **kwargs): # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry # check which type of memory if self.storage_type == 'ITEMS': - if len(self.cache.index) == self.cache_size: + if len(self.cache.index) == self.size: return_val = func(instance, *args, **kwargs) self.cache.drop( index=self.cache.sort_values( @@ -115,7 +130,7 @@ def _use_cache(instance, *args, **kwargs): time.time(), ] elif self.storage_type == 'RAM': - if self._get_cache_size() >= self.cache_size: + if self._get_cache_size_bytes() >= self.size: return_val = func(instance, *args, **kwargs) self.cache.drop( index=self.cache.sort_values( From 34ca881de7efc94e9dfc880569e2b09e7f4c65a8 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 10:53:06 -0400 Subject: [PATCH 12/34] fixing how size of item in cache are calculated for non-built in types --- mesmerize_core/caiman_extensions/cache.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 5e4d2c4..4dc9534 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -1,10 +1,12 @@ from functools import wraps -from typing import Union, Optional +from typing import Union, Optional, Tuple import pandas as pd import time import numpy as np import sys +from pathlib import Path +from caiman.source_extraction.cnmf import CNMF def _check_arg_equality(args, cache_args): @@ -61,8 +63,18 @@ def _get_cache_size_bytes(self, return_gig=True): """Returns in GiB or MB""" cache_size = 0 for i in range(len(self.cache.index)): - cache_size += sys.getsizeof(self.cache.iloc[i, 4]) - # need to fix how size of an output is calculated to handle non-built-in types + if isinstance(self.cache.iloc[i, 4], np.ndarray): + cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i,4].itemsize) + elif isinstance(self.cache.iloc[i, 4], Tuple): + cache_size += (self.cache.iloc[i, 4][0].size * self.cache.iloc[i, 4][0].itemsize) + \ + (self.cache.iloc[i, 4][1].size * self.cache.iloc[i, 4][1].itemsize) + elif isinstance(self.cache.iloc[i, 4], Path): + cache_size += 0 + elif isinstance(self.cache.iloc[i, 4], CNMF): + cache_size += sys.getsizeof(self.cache.iloc[i,4].estimates) + else: + cache_size += sys.getsizeof(self.cache.iloc[i, 4]) + if return_gig: cache_size = cache_size / 1024**3 else: From 98dcd3b2a1c48a4e554ddd48e43938866c43284f Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 11:09:37 -0400 Subject: [PATCH 13/34] trying to make code more elegant for kushal --- mesmerize_core/caiman_extensions/cache.py | 92 ++++++++--------------- 1 file changed, 31 insertions(+), 61 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 4dc9534..942ef75 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -71,7 +71,7 @@ def _get_cache_size_bytes(self, return_gig=True): elif isinstance(self.cache.iloc[i, 4], Path): cache_size += 0 elif isinstance(self.cache.iloc[i, 4], CNMF): - cache_size += sys.getsizeof(self.cache.iloc[i,4].estimates) + cache_size += sys.getsizeof(self.cache.iloc[i, 4].estimates) else: cache_size += sys.getsizeof(self.cache.iloc[i, 4]) @@ -111,66 +111,36 @@ def _use_cache(instance, *args, **kwargs): # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry # check which type of memory - if self.storage_type == 'ITEMS': - if len(self.cache.index) == self.size: - return_val = func(instance, *args, **kwargs) - self.cache.drop( - index=self.cache.sort_values( - by=["time_stamp"], ascending=False - ).index[-1], - axis=0, - inplace=True, - ) - self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] - return self.cache.iloc[len(self.cache.index) - 1, 4] - else: - return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] - elif self.storage_type == 'RAM': - if self._get_cache_size_bytes() >= self.size: - return_val = func(instance, *args, **kwargs) - self.cache.drop( - index=self.cache.sort_values( - by=["time_stamp"], ascending=False - ).index[-1], - axis=0, - inplace=True, - ) - self.cache = self.cache.reset_index(drop=True) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] - return self.cache.iloc[len(self.cache.index) - 1, 4] - else: - return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] + if (self.storage_type == 'ITEMS' and len(self.cache.index) == self.size) or (self.storage_type == 'RAM' and self._get_cache_size_bytes() >= self.size): + return_val = func(instance, *args, **kwargs) + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) + self.cache = self.cache.reset_index(drop=True) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + return self.cache.iloc[len(self.cache.index) - 1, 4] + + else: + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] return return_val From 19203fb626f9aef807d15419e693e7da60b8d67b Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 11:55:21 -0400 Subject: [PATCH 14/34] cache should not work with whether size is based on number of items or size in memory --- mesmerize_core/caiman_extensions/cache.py | 32 +++++++++++++++++++---- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 942ef75..74a26c5 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -7,6 +7,7 @@ import sys from pathlib import Path from caiman.source_extraction.cnmf import CNMF +import re def _check_arg_equality(args, cache_args): @@ -42,7 +43,7 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): self.storage_type = 'RAM' if cache_size is None: - self.size = '1G' + self.size = 1 self.storage_type = 'RAM' def get_cache(self): @@ -55,9 +56,10 @@ def clear_cache(self): def set_maxsize(self, max_size: Union[int, str]): if isinstance(max_size, str): self.storage_type = 'RAM' + self.size = int(re.split('\d+', max_size)[0]) else: self.storage_type = 'ITEMS' - self.size = max_size + self.size = max_size def _get_cache_size_bytes(self, return_gig=True): """Returns in GiB or MB""" @@ -110,8 +112,8 @@ def _use_cache(instance, *args, **kwargs): return self.cache.iloc[i, 4] # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry - # check which type of memory - if (self.storage_type == 'ITEMS' and len(self.cache.index) == self.size) or (self.storage_type == 'RAM' and self._get_cache_size_bytes() >= self.size): + # if memory type is 'ITEMS': drop the least recently used and then add new item + if self.storage_type == 'ITEMS' and len(self.cache.index) == self.size: return_val = func(instance, *args, **kwargs) self.cache.drop( index=self.cache.sort_values( @@ -130,7 +132,27 @@ def _use_cache(instance, *args, **kwargs): time.time(), ] return self.cache.iloc[len(self.cache.index) - 1, 4] - + # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again + elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > self.size: + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] + while self._get_cache_size_bytes() > self.size: + self.cache.drop( + index=self.cache.sort_values( + by=["time_stamp"], ascending=False + ).index[-1], + axis=0, + inplace=True, + ) + self.cache = self.cache.reset_index(drop=True) + # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache else: return_val = func(instance, *args, **kwargs) self.cache.loc[len(self.cache.index)] = [ From 7fd26307adfcd9fe86393cc731bf4eb6460373af Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 12:20:47 -0400 Subject: [PATCH 15/34] debugging cache --- mesmerize_core/caiman_extensions/cache.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 74a26c5..807eaa7 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -36,15 +36,16 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): data=None, columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], ) - self.size = cache_size + if cache_size is None: + self.size = 1 + self.storage_type = 'RAM' + if isinstance(cache_size, int): self.storage_type = 'ITEMS' + self.size = cache_size else: self.storage_type = 'RAM' - - if cache_size is None: - self.size = 1 - self.storage_type = 'RAM' + self.size = int(re.split('[a-zA-Z]', cache_size)[0]) def get_cache(self): print(self.cache) @@ -56,7 +57,7 @@ def clear_cache(self): def set_maxsize(self, max_size: Union[int, str]): if isinstance(max_size, str): self.storage_type = 'RAM' - self.size = int(re.split('\d+', max_size)[0]) + self.size = int(re.split('[a-zA-Z]', max_size)[0]) else: self.storage_type = 'ITEMS' self.size = max_size From 0157fc4d5de31118cc21775e0eaa4f5c2568cab1 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 12:23:34 -0400 Subject: [PATCH 16/34] further debugging --- mesmerize_core/caiman_extensions/cache.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 807eaa7..bbdfe29 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -39,8 +39,7 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): if cache_size is None: self.size = 1 self.storage_type = 'RAM' - - if isinstance(cache_size, int): + elif isinstance(cache_size, int): self.storage_type = 'ITEMS' self.size = cache_size else: From 77d13f7789b7604e9055efccbaf3b4b9545890a9 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Tue, 5 Jul 2022 13:28:17 -0400 Subject: [PATCH 17/34] further changes to cache --- mesmerize_core/caiman_extensions/cache.py | 40 +++++++++++------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index bbdfe29..13678da 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -37,14 +37,14 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], ) if cache_size is None: - self.size = 1 + self.size = '1G' self.storage_type = 'RAM' elif isinstance(cache_size, int): self.storage_type = 'ITEMS' self.size = cache_size else: self.storage_type = 'RAM' - self.size = int(re.split('[a-zA-Z]', cache_size)[0]) + self.size = cache_size def get_cache(self): print(self.cache) @@ -56,28 +56,28 @@ def clear_cache(self): def set_maxsize(self, max_size: Union[int, str]): if isinstance(max_size, str): self.storage_type = 'RAM' - self.size = int(re.split('[a-zA-Z]', max_size)[0]) + self.size = max_size else: self.storage_type = 'ITEMS' self.size = max_size - def _get_cache_size_bytes(self, return_gig=True): + def _get_cache_size_bytes(self): """Returns in GiB or MB""" cache_size = 0 for i in range(len(self.cache.index)): if isinstance(self.cache.iloc[i, 4], np.ndarray): - cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i,4].itemsize) + cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i, 4].itemsize) elif isinstance(self.cache.iloc[i, 4], Tuple): - cache_size += (self.cache.iloc[i, 4][0].size * self.cache.iloc[i, 4][0].itemsize) + \ - (self.cache.iloc[i, 4][1].size * self.cache.iloc[i, 4][1].itemsize) + for array in self.cache.iloc[i, 4]: + cache_size += (array.size * array.itemsize) elif isinstance(self.cache.iloc[i, 4], Path): cache_size += 0 elif isinstance(self.cache.iloc[i, 4], CNMF): - cache_size += sys.getsizeof(self.cache.iloc[i, 4].estimates) + cache_size += (self.cache.iloc[i, 4].estimates.A.data.nbytes + self.cache.iloc[i, 4].estimates.C.data.nbytes + self.cache.iloc[i, 4].estimates.b.data.nbytes + self.cache.iloc[i, 4].estimates.f.data.nbytes) else: cache_size += sys.getsizeof(self.cache.iloc[i, 4]) - if return_gig: + if self.size.endswith('G'): cache_size = cache_size / 1024**3 else: cache_size = cache_size / 1024**2 @@ -113,7 +113,7 @@ def _use_cache(instance, *args, **kwargs): # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry # if memory type is 'ITEMS': drop the least recently used and then add new item - if self.storage_type == 'ITEMS' and len(self.cache.index) == self.size: + if self.storage_type == 'ITEMS' and len(self.cache.index) >= self.size: return_val = func(instance, *args, **kwargs) self.cache.drop( index=self.cache.sort_values( @@ -133,16 +133,7 @@ def _use_cache(instance, *args, **kwargs): ] return self.cache.iloc[len(self.cache.index) - 1, 4] # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again - elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > self.size: - return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] + elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > int(re.split('[a-zA-Z]', self.size)[0]): while self._get_cache_size_bytes() > self.size: self.cache.drop( index=self.cache.sort_values( @@ -152,6 +143,15 @@ def _use_cache(instance, *args, **kwargs): inplace=True, ) self.cache = self.cache.reset_index(drop=True) + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache else: return_val = func(instance, *args, **kwargs) From ee9b5d563f556017608282fb4b5b96517162dd0f Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Wed, 6 Jul 2022 09:00:11 -0400 Subject: [PATCH 18/34] updates to cache and extensions which extensions use cache --- mesmerize_core/caiman_extensions/cache.py | 30 +++++++++++++---------- mesmerize_core/caiman_extensions/cnmf.py | 3 --- mesmerize_core/caiman_extensions/mcorr.py | 1 - 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 13678da..67200f0 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -1,5 +1,6 @@ from functools import wraps from typing import Union, Optional, Tuple +from builtins import list import pandas as pd import time @@ -65,11 +66,14 @@ def _get_cache_size_bytes(self): """Returns in GiB or MB""" cache_size = 0 for i in range(len(self.cache.index)): + if isinstance(self.cache.iloc[i, 4], list): + for array in self.cache.iloc[i, 4]: + cache_size += array.data.nbytes if isinstance(self.cache.iloc[i, 4], np.ndarray): - cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i, 4].itemsize) - elif isinstance(self.cache.iloc[i, 4], Tuple): + cache_size += self.cache.iloc[i, 4].data.nbytes + elif isinstance(self.cache.iloc[i, 4], tuple): for array in self.cache.iloc[i, 4]: - cache_size += (array.size * array.itemsize) + cache_size += array.data.nbytes elif isinstance(self.cache.iloc[i, 4], Path): cache_size += 0 elif isinstance(self.cache.iloc[i, 4], CNMF): @@ -79,7 +83,7 @@ def _get_cache_size_bytes(self): if self.size.endswith('G'): cache_size = cache_size / 1024**3 - else: + elif self.size.endswith('M'): cache_size = cache_size / 1024**2 return cache_size @@ -143,15 +147,15 @@ def _use_cache(instance, *args, **kwargs): inplace=True, ) self.cache = self.cache.reset_index(drop=True) - return_val = func(instance, *args, **kwargs) - self.cache.loc[len(self.cache.index)] = [ - instance._series["uuid"], - func.__name__, - args, - kwargs, - return_val, - time.time(), - ] + return_val = func(instance, *args, **kwargs) + self.cache.loc[len(self.cache.index)] = [ + instance._series["uuid"], + func.__name__, + args, + kwargs, + return_val, + time.time(), + ] # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache else: return_val = func(instance, *args, **kwargs) diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index b0ed5e1..cfda76a 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -25,7 +25,6 @@ def __init__(self, s: pd.Series): self._series = s @validate("cnmf") - @cache.use_cache def get_cnmf_memmap(self) -> np.ndarray: """ Get the CNMF memmap @@ -41,7 +40,6 @@ def get_cnmf_memmap(self) -> np.ndarray: images = np.reshape(Yr.T, [T] + list(dims), order="F") return images - @cache.use_cache def get_input_memmap(self) -> np.ndarray: """ Return the F-order memmap if the input to the @@ -65,7 +63,6 @@ def get_input_memmap(self) -> np.ndarray: # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items @validate("cnmf") - @cache.use_cache def get_output_path(self) -> Path: """ Returns diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py index c46e412..fa14418 100644 --- a/mesmerize_core/caiman_extensions/mcorr.py +++ b/mesmerize_core/caiman_extensions/mcorr.py @@ -21,7 +21,6 @@ def __init__(self, s: pd.Series): self._series = s @validate("mcorr") - @cache.use_cache def get_output_path(self) -> Path: """ Get the path to the motion corrected output memmap file From cc68e2f747b3baa0b5d6224bb0fce2282d9333ab Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Fri, 8 Jul 2022 15:22:14 -0400 Subject: [PATCH 19/34] final changes to cache, tests should pass except for linter --- mesmerize_core/caiman_extensions/cache.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 67200f0..2a6e8e7 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -1,12 +1,10 @@ from functools import wraps -from typing import Union, Optional, Tuple -from builtins import list +from typing import Union, Optional import pandas as pd import time import numpy as np import sys -from pathlib import Path from caiman.source_extraction.cnmf import CNMF import re @@ -21,6 +19,8 @@ def _check_arg_equality(args, cache_args): def _check_args_equality(args, cache_args): + if len(args) != len(cache_args): + return False equality = list() if isinstance(args, tuple): for arg, cache_arg in zip(args, cache_args): @@ -66,16 +66,12 @@ def _get_cache_size_bytes(self): """Returns in GiB or MB""" cache_size = 0 for i in range(len(self.cache.index)): - if isinstance(self.cache.iloc[i, 4], list): - for array in self.cache.iloc[i, 4]: - cache_size += array.data.nbytes if isinstance(self.cache.iloc[i, 4], np.ndarray): cache_size += self.cache.iloc[i, 4].data.nbytes elif isinstance(self.cache.iloc[i, 4], tuple): - for array in self.cache.iloc[i, 4]: - cache_size += array.data.nbytes - elif isinstance(self.cache.iloc[i, 4], Path): - cache_size += 0 + for lists in self.cache.iloc[i, 4]: + for array in lists: + cache_size += array.data.nbytes elif isinstance(self.cache.iloc[i, 4], CNMF): cache_size += (self.cache.iloc[i, 4].estimates.A.data.nbytes + self.cache.iloc[i, 4].estimates.C.data.nbytes + self.cache.iloc[i, 4].estimates.b.data.nbytes + self.cache.iloc[i, 4].estimates.f.data.nbytes) else: @@ -102,6 +98,7 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] + return return_val # checking to see if there is a cache hit for i in range(len(self.cache.index)): From 398d952eecc3eccca907b955662b6a08e985e271 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Fri, 8 Jul 2022 16:15:35 -0400 Subject: [PATCH 20/34] kushal requested changes --- mesmerize_core/caiman_extensions/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 2a6e8e7..6511768 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -68,7 +68,7 @@ def _get_cache_size_bytes(self): for i in range(len(self.cache.index)): if isinstance(self.cache.iloc[i, 4], np.ndarray): cache_size += self.cache.iloc[i, 4].data.nbytes - elif isinstance(self.cache.iloc[i, 4], tuple): + elif isinstance(self.cache.iloc[i, 4], (tuple, list)): for lists in self.cache.iloc[i, 4]: for array in lists: cache_size += array.data.nbytes From b4bf027937129dd41805a54170a41733634dc478 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Fri, 8 Jul 2022 19:33:05 -0400 Subject: [PATCH 21/34] returning copies from cache for future downstream analysis, fixing cnmf object size computation, linter --- mesmerize_core/caiman_extensions/cache.py | 51 ++++++++++++++--------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 6511768..e1e1c20 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -7,6 +7,8 @@ import sys from caiman.source_extraction.cnmf import CNMF import re +from sys import getsizeof +import copy def _check_arg_equality(args, cache_args): @@ -31,6 +33,10 @@ def _check_args_equality(args, cache_args): return all(equality) +def _return_wrapper(output): + return copy.deepcopy(output) + + class Cache: def __init__(self, cache_size: Optional[Union[int, str]] = None): self.cache = pd.DataFrame( @@ -38,13 +44,13 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], ) if cache_size is None: - self.size = '1G' - self.storage_type = 'RAM' + self.size = "1G" + self.storage_type = "RAM" elif isinstance(cache_size, int): - self.storage_type = 'ITEMS' + self.storage_type = "ITEMS" self.size = cache_size else: - self.storage_type = 'RAM' + self.storage_type = "RAM" self.size = cache_size def get_cache(self): @@ -56,10 +62,10 @@ def clear_cache(self): def set_maxsize(self, max_size: Union[int, str]): if isinstance(max_size, str): - self.storage_type = 'RAM' + self.storage_type = "RAM" self.size = max_size else: - self.storage_type = 'ITEMS' + self.storage_type = "ITEMS" self.size = max_size def _get_cache_size_bytes(self): @@ -73,13 +79,18 @@ def _get_cache_size_bytes(self): for array in lists: cache_size += array.data.nbytes elif isinstance(self.cache.iloc[i, 4], CNMF): - cache_size += (self.cache.iloc[i, 4].estimates.A.data.nbytes + self.cache.iloc[i, 4].estimates.C.data.nbytes + self.cache.iloc[i, 4].estimates.b.data.nbytes + self.cache.iloc[i, 4].estimates.f.data.nbytes) + sizes = list() + for attr in self.cache.iloc[i, 4].estimates.__dict__.values(): + if isinstance(attr, np.ndarray): + sizes.append(attr.data.nbytes) + else: + sizes.append(getsizeof(attr)) else: cache_size += sys.getsizeof(self.cache.iloc[i, 4]) - if self.size.endswith('G'): + if self.size.endswith("G"): cache_size = cache_size / 1024**3 - elif self.size.endswith('M'): + elif self.size.endswith("M"): cache_size = cache_size / 1024**2 return cache_size @@ -98,23 +109,23 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] - return return_val + return _return_wrapper(return_val) # checking to see if there is a cache hit for i in range(len(self.cache.index)): if ( - self.cache.iloc[i, 0] == instance._series["uuid"] - and self.cache.iloc[i, 1] == func.__name__ - and _check_args_equality(args, self.cache.iloc[i, 2]) - and _check_arg_equality(kwargs, self.cache.iloc[i, 3]) + self.cache.iloc[i, 0] == instance._series["uuid"] + and self.cache.iloc[i, 1] == func.__name__ + and _check_args_equality(args, self.cache.iloc[i, 2]) + and _check_arg_equality(kwargs, self.cache.iloc[i, 3]) ): self.cache.iloc[i, 5] = time.time() return_val = self.cache.iloc[i, 4] - return self.cache.iloc[i, 4] + return _return_wrapper(self.cache.iloc[i, 4]) # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry # if memory type is 'ITEMS': drop the least recently used and then add new item - if self.storage_type == 'ITEMS' and len(self.cache.index) >= self.size: + if self.storage_type == "ITEMS" and len(self.cache.index) >= self.size: return_val = func(instance, *args, **kwargs) self.cache.drop( index=self.cache.sort_values( @@ -132,9 +143,11 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] - return self.cache.iloc[len(self.cache.index) - 1, 4] + return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4]) # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again - elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > int(re.split('[a-zA-Z]', self.size)[0]): + elif self.storage_type == "RAM" and self._get_cache_size_bytes() > int( + re.split("[a-zA-Z]", self.size)[0] + ): while self._get_cache_size_bytes() > self.size: self.cache.drop( index=self.cache.sort_values( @@ -165,6 +178,6 @@ def _use_cache(instance, *args, **kwargs): time.time(), ] - return return_val + return _return_wrapper(return_val) return _use_cache From 0da8b552579c28308f0b5725178eca44ee645f54 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sat, 9 Jul 2022 08:18:56 -0400 Subject: [PATCH 22/34] updates to returning a copy or original of extension outputs --- mesmerize_core/caiman_extensions/cache.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index e1e1c20..5c231b3 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -33,8 +33,11 @@ def _check_args_equality(args, cache_args): return all(equality) -def _return_wrapper(output): - return copy.deepcopy(output) +def _return_wrapper(output, copy_bool): + if copy_bool == True: + return copy.deepcopy(output) + else: + return output class Cache: @@ -97,6 +100,10 @@ def _get_cache_size_bytes(self): def use_cache(self, func): @wraps(func) def _use_cache(instance, *args, **kwargs): + if "copy" in kwargs.keys(): + return_copy = kwargs["copy"] + else: + return_copy = True # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: @@ -109,7 +116,7 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] - return _return_wrapper(return_val) + return _return_wrapper(return_val, copy_bool=return_copy) # checking to see if there is a cache hit for i in range(len(self.cache.index)): @@ -121,7 +128,7 @@ def _use_cache(instance, *args, **kwargs): ): self.cache.iloc[i, 5] = time.time() return_val = self.cache.iloc[i, 4] - return _return_wrapper(self.cache.iloc[i, 4]) + return _return_wrapper(self.cache.iloc[i, 4], copy_bool=return_copy) # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry # if memory type is 'ITEMS': drop the least recently used and then add new item @@ -143,7 +150,7 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] - return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4]) + return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy) # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again elif self.storage_type == "RAM" and self._get_cache_size_bytes() > int( re.split("[a-zA-Z]", self.size)[0] @@ -178,6 +185,6 @@ def _use_cache(instance, *args, **kwargs): time.time(), ] - return _return_wrapper(return_val) + return _return_wrapper(return_val, copy_bool=return_copy) return _use_cache From e0e80defdd9496bb7bfbb124ec71701c96b69d0d Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sat, 9 Jul 2022 14:41:46 -0400 Subject: [PATCH 23/34] final changes to cache, need to write tests still --- mesmerize_core/caiman_extensions/cache.py | 31 ++++++++++------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 5c231b3..8ba0d93 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -46,27 +46,28 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): data=None, columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"], ) - if cache_size is None: - self.size = "1G" - self.storage_type = "RAM" - elif isinstance(cache_size, int): - self.storage_type = "ITEMS" - self.size = cache_size - else: - self.storage_type = "RAM" - self.size = cache_size + self.set_maxsize(cache_size) def get_cache(self): print(self.cache) + def get_cache2(self): + return self.cache + def clear_cache(self): while len(self.cache.index) != 0: self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True) def set_maxsize(self, max_size: Union[int, str]): - if isinstance(max_size, str): + if max_size is None: self.storage_type = "RAM" - self.size = max_size + self.size = 1024**3 + elif isinstance(max_size, str): + self.storage_type = "RAM" + if max_size.endswith("G"): + self.size = int(max_size[:-1]) * 1024**3 + elif max_size.endswith("M"): + self.size = int(max_size[:-1]) * 1024**2 else: self.storage_type = "ITEMS" self.size = max_size @@ -91,10 +92,6 @@ def _get_cache_size_bytes(self): else: cache_size += sys.getsizeof(self.cache.iloc[i, 4]) - if self.size.endswith("G"): - cache_size = cache_size / 1024**3 - elif self.size.endswith("M"): - cache_size = cache_size / 1024**2 return cache_size def use_cache(self, func): @@ -152,9 +149,7 @@ def _use_cache(instance, *args, **kwargs): ] return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy) # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again - elif self.storage_type == "RAM" and self._get_cache_size_bytes() > int( - re.split("[a-zA-Z]", self.size)[0] - ): + elif self.storage_type == "RAM": while self._get_cache_size_bytes() > self.size: self.cache.drop( index=self.cache.sort_values( From 34d32136eb00e99c685b979ee18014a08f56e2db Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sat, 9 Jul 2022 15:29:24 -0400 Subject: [PATCH 24/34] removing get_cache2() --- mesmerize_core/caiman_extensions/cache.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index 8ba0d93..adada39 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -49,9 +49,6 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None): self.set_maxsize(cache_size) def get_cache(self): - print(self.cache) - - def get_cache2(self): return self.cache def clear_cache(self): @@ -73,7 +70,7 @@ def set_maxsize(self, max_size: Union[int, str]): self.size = max_size def _get_cache_size_bytes(self): - """Returns in GiB or MB""" + """Returns in bytes""" cache_size = 0 for i in range(len(self.cache.index)): if isinstance(self.cache.iloc[i, 4], np.ndarray): @@ -147,7 +144,9 @@ def _use_cache(instance, *args, **kwargs): return_val, time.time(), ] - return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy) + return _return_wrapper( + self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy + ) # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again elif self.storage_type == "RAM": while self._get_cache_size_bytes() > self.size: From 8de3b151f9f2c13ea66059e3a8e487772b0226f2 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sat, 9 Jul 2022 15:30:59 -0400 Subject: [PATCH 25/34] setting default copy = true --- mesmerize_core/caiman_extensions/cnmf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index cfda76a..053125b 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -74,7 +74,7 @@ def get_output_path(self) -> Path: @validate("cnmf") @cache.use_cache - def get_output(self) -> CNMF: + def get_output(self, copy=True) -> CNMF: """ Returns ------- @@ -89,7 +89,7 @@ def get_output(self) -> CNMF: @validate("cnmf") @cache.use_cache def get_spatial_masks( - self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01 + self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, copy=True ) -> np.ndarray: """ Get binary masks of the spatial components at the given `ixs` @@ -156,7 +156,7 @@ def _get_spatial_contours( @validate("cnmf") @cache.use_cache def get_spatial_contours( - self, ixs_components: Optional[np.ndarray] = None + self, ixs_components: Optional[np.ndarray] = None, copy=True ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Get the contour and center of mass for each spatial footprint @@ -190,7 +190,7 @@ def get_spatial_contours( @validate("cnmf") @cache.use_cache def get_temporal_components( - self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False + self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, copy=True ) -> np.ndarray: """ Get the temporal components for this CNMF item From 62755a7183170f4c4d7808b65347f5184423da79 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sat, 9 Jul 2022 15:31:53 -0400 Subject: [PATCH 26/34] adding tests for cache, still need to debug --- tests/test_core.py | 129 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index e54e5ff..33bfc30 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,6 @@ import os +import numpy as np from caiman.utils.utils import load_dict_from_hdf5 from caiman.source_extraction.cnmf import cnmf import numpy.testing @@ -24,6 +25,7 @@ import shutil from zipfile import ZipFile from pprint import pprint +from mesmerize_core.caiman_extensions import mcorr, cnmf tmp_dir = Path(os.path.dirname(os.path.abspath(__file__)), "tmp") vid_dir = Path(os.path.dirname(os.path.abspath(__file__)), "videos") @@ -1036,3 +1038,130 @@ def test_remove_item(): assert df.isin([f"test-{algo}"]).any().any() == False assert df.isin([f"test1-{algo}"]).any().any() == False assert df.empty == True + +def test_cache(): + set_parent_raw_data_path(vid_dir) + algo = "mcorr" + + df, batch_path = _create_tmp_batch() + + batch_path = Path(batch_path) + batch_dir = batch_path.parent + + input_movie_path = get_datafile(algo) + print(input_movie_path) + + df.caiman.add_item( + algo=algo, + name=f"test-{algo}", + input_movie_path=input_movie_path, + params=test_params[algo], + ) + + assert df.iloc[-1]["algo"] == algo + assert df.iloc[-1]["name"] == f"test-{algo}" + assert df.iloc[-1]["params"] == test_params[algo] + assert df.iloc[-1]["outputs"] is None + try: + UUID(df.iloc[-1]["uuid"]) + except: + pytest.fail("Something wrong with setting UUID for batch items") + + assert vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) == vid_dir.joinpath( + f"{algo}.tif" + ) + + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + with pd.option_context("display.max_rows", None, "display.max_columns", None): + print(df) + + pprint(df.iloc[-1]["outputs"], width=-1) + print(df.iloc[-1]["outputs"]["traceback"]) + assert df.iloc[-1]["outputs"]["success"] is True + assert df.iloc[-1]["outputs"]["traceback"] is None + + assert ( + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) + ) + + algo = "cnmf" + print("Testing cnmf") + input_movie_path = df.iloc[-1].mcorr.get_output_path() + df.caiman.add_item( + algo=algo, + name=f"test-{algo}", + input_movie_path=input_movie_path, + params=test_params[algo], + ) + + assert df.iloc[-1]["algo"] == algo + assert df.iloc[-1]["name"] == f"test-{algo}" + assert df.iloc[-1]["params"] == test_params[algo] + assert df.iloc[-1]["outputs"] is None + try: + UUID(df.iloc[-1]["uuid"]) + except: + pytest.fail("Something wrong with setting UUID for batch items") + print("cnmf input_movie_path:", df.iloc[-1]["input_movie_path"]) + assert batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) == input_movie_path + + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + with pd.option_context("display.max_rows", None, "display.max_columns", None): + print(df) + + pprint(df.iloc[-1]["outputs"], width=-1) + print(df.iloc[-1]["outputs"]["traceback"]) + + # test that cache values are returned when calls are made to same function + + # testing that cache size limits work + cnmf.cache.set_maxsize("1M") + cnmf_output = df.iloc[-1].cnmf.get_output() + hex_get_output = hex(id(cnmf_output)) + cache = cnmf.cache.get_cache() + hex1 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) + #assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1) + #assert(hex_get_output != hex1) + time_stamp1 = cache[cache["function"] == "get_output"]["time_stamp"].item() + df.iloc[-1].cnmf.get_temporal_components() + df.iloc[-1].cnmf.get_spatial_contours() + df.iloc[-1].cnmf.get_spatial_masks() + df.iloc[-1].cnmf.get_temporal_components(np.arange(7)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(8)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(9)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(6)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(5)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(4)) + df.iloc[-1].cnmf.get_temporal_components(np.arange(3)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(8)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(9)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(7)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(6)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(5)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(4)) + df.iloc[-1].cnmf.get_spatial_masks(np.arange(3)) + time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item() + hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) + assert(cache[cache["function"] == "get_output"].index.size == 1) + assert(len(cnmf.cache.get_cache().index) == 17) + assert(time_stamp2 > time_stamp1) + assert(hex1 == hex2) + + + + + + From 7ee7df8ebd6a4a1931cdb1be28e8966416ec85db Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Wed, 13 Jul 2022 10:17:49 -0400 Subject: [PATCH 27/34] tests for cache, still need to fix issue with maxsize=0 --- tests/test_core.py | 337 ++++++++++++++++++++++++--------------------- 1 file changed, 181 insertions(+), 156 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index 33bfc30..ec07fb6 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -169,34 +169,34 @@ def test_mcorr(): # test that batch path is propagated to pd.Series assert ( - df.attrs["batch_path"] - == df.paths.get_batch_path() - == df.iloc[-1].paths.get_batch_path() - == df.iloc[-1].attrs["batch_path"] + df.attrs["batch_path"] + == df.paths.get_batch_path() + == df.iloc[-1].paths.get_batch_path() + == df.iloc[-1].attrs["batch_path"] ) # test that path resolve works for parent_raw_dir rel_input_movie_path = input_movie_path.relative_to(vid_dir) assert ( - df.paths.resolve(rel_input_movie_path) - == df.iloc[-1].paths.resolve(rel_input_movie_path) - == input_movie_path + df.paths.resolve(rel_input_movie_path) + == df.iloc[-1].paths.resolve(rel_input_movie_path) + == input_movie_path ) # test that path splitting works for parent_raw_dir split = (vid_dir, input_movie_path.relative_to(vid_dir)) assert ( - df.paths.split(input_movie_path) - == df.iloc[-1].paths.split(input_movie_path) - == split + df.paths.split(input_movie_path) + == df.iloc[-1].paths.split(input_movie_path) + == split ) # test that the input_movie_path in the DataFrame rows are relative assert Path(df.iloc[-1]["input_movie_path"]) == split[1] assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -219,76 +219,76 @@ def test_mcorr(): ) rel_mcorr_memmap_path = mcorr_memmap_path.relative_to(batch_dir) assert ( - df.paths.resolve(rel_mcorr_memmap_path) - == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path) - == mcorr_memmap_path + df.paths.resolve(rel_mcorr_memmap_path) + == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path) + == mcorr_memmap_path ) # test that path splitting works for batch_dir split = (batch_dir, mcorr_memmap_path.relative_to(batch_dir)) assert ( - df.paths.split(mcorr_memmap_path) - == df.iloc[-1].paths.split(mcorr_memmap_path) - == split + df.paths.split(mcorr_memmap_path) + == df.iloc[-1].paths.split(mcorr_memmap_path) + == split ) assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) # test to check mmap output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) # test to check mcorr get_output_path() assert ( - df.iloc[-1].mcorr.get_output_path() - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + df.iloc[-1].mcorr.get_output_path() + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) ) # test to check mcorr get_output() @@ -374,12 +374,12 @@ def test_cnmf(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap', + ) ) algo = "cnmf" @@ -419,59 +419,59 @@ def test_cnmf(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert ( - batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) ) # test to check mmap output path assert ( - batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_60_d2_80_d3_1_order_C_frames_2000_.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_60_d2_80_d3_1_order_C_frames_2000_.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) print("testing cnmf.get_cnmf_memmap()") @@ -495,10 +495,10 @@ def test_cnmf(): # test to check cnmf get_output_path() assert ( - df.iloc[-1].cnmf.get_output_path() - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') + df.iloc[-1].cnmf.get_output_path() + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5') ) # test to check cnmf get_output() @@ -526,11 +526,11 @@ def test_cnmf(): allow_pickle=True, ) for contour, actual_contour in zip( - cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual + cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip( - cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual + cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual ): numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10) @@ -554,9 +554,9 @@ def test_cnmf(): # test to check caiman get_input_movie_path(), should be output of previous mcorr assert ( - df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) # test to check caiman get_correlation_img() @@ -606,7 +606,7 @@ def test_cnmf(): allow_pickle=True, ) for contour, actual_contour in zip( - ixs_contours_contours, ixs_contours_contours_actual + ixs_contours_contours, ixs_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual): @@ -677,9 +677,9 @@ def test_cnmfe(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -698,9 +698,9 @@ def test_cnmfe(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert batch_dir.joinpath( @@ -755,9 +755,9 @@ def test_cnmfe(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) - == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) + == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path()) + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) process = df.iloc[-1].caiman.run() @@ -776,9 +776,9 @@ def test_cnmfe(): assert df.iloc[-1]["outputs"]["traceback"] is None assert ( - input_movie_path - == df.iloc[-1].caiman.get_input_movie_path() - == df.paths.resolve(df.iloc[-1]["input_movie_path"]) + input_movie_path + == df.iloc[-1].caiman.get_input_movie_path() + == df.paths.resolve(df.iloc[-1]["input_movie_path"]) ) assert batch_dir.joinpath( @@ -787,53 +787,53 @@ def test_cnmfe(): # test to check mmap output path assert ( - batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), - f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_128_d2_128_d3_1_order_C_frames_1000_.mmap', - ) - == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), + f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_128_d2_128_d3_1_order_C_frames_1000_.mmap', + ) + == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"]) ) # test to check mean-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy' + ) ) # test to check std-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy' + ) ) # test to check max-projection output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) - == batch_dir.joinpath( - str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' - ) + batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"]) + == batch_dir.joinpath( + str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy' + ) ) # test to check correlation image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy') ) # test to check pnr image output path assert ( - batch_dir.joinpath(df.iloc[-1]["outputs"]["pnr-image-path"]) - == df.paths.resolve(df.iloc[-1]["outputs"]["pnr-image-path"]) - == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_pn.npy') + batch_dir.joinpath(df.iloc[-1]["outputs"]["pnr-image-path"]) + == df.paths.resolve(df.iloc[-1]["outputs"]["pnr-image-path"]) + == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_pn.npy') ) # extension tests - full @@ -857,9 +857,9 @@ def test_cnmfe(): # test to check cnmf get_output_path() assert ( - df.iloc[-1].cnmf.get_output_path() - == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) - == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + df.iloc[-1].cnmf.get_output_path() + == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) + == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"]) ) # test to check cnmf get_output() @@ -887,11 +887,11 @@ def test_cnmfe(): allow_pickle=True, ) for contour, actual_contour in zip( - cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual + cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip( - cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual + cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual ): numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10) @@ -946,7 +946,7 @@ def test_cnmfe(): allow_pickle=True, ) for contour, actual_contour in zip( - ixs_contours_contours, ixs_contours_contours_actual + ixs_contours_contours, ixs_contours_contours_actual ): numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10) for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual): @@ -1000,9 +1000,9 @@ def test_remove_item(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) df.caiman.add_item( @@ -1022,9 +1022,9 @@ def test_remove_item(): pytest.fail("Something wrong with setting UUID for batch items") assert ( - get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) - == vid_dir.joinpath(f"{algo}.tif") - == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) + get_full_raw_data_path(df.iloc[-1]["input_movie_path"]) + == vid_dir.joinpath(f"{algo}.tif") + == vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) ) # Check removing specific rows works assert df.iloc[0]["name"] == f"test-{algo}" @@ -1039,6 +1039,7 @@ def test_remove_item(): assert df.isin([f"test1-{algo}"]).any().any() == False assert df.empty == True + def test_cache(): set_parent_raw_data_path(vid_dir) algo = "mcorr" @@ -1133,8 +1134,8 @@ def test_cache(): hex_get_output = hex(id(cnmf_output)) cache = cnmf.cache.get_cache() hex1 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) - #assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1) - #assert(hex_get_output != hex1) + # assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1) + # assert(hex_get_output != hex1) time_stamp1 = cache[cache["function"] == "get_output"]["time_stamp"].item() df.iloc[-1].cnmf.get_temporal_components() df.iloc[-1].cnmf.get_spatial_contours() @@ -1155,12 +1156,36 @@ def test_cache(): df.iloc[-1].cnmf.get_spatial_masks(np.arange(3)) time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item() hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) - assert(cache[cache["function"] == "get_output"].index.size == 1) - assert(len(cnmf.cache.get_cache().index) == 17) - assert(time_stamp2 > time_stamp1) - assert(hex1 == hex2) + assert (cache[cache["function"] == "get_output"].index.size == 1) + assert (len(cnmf.cache.get_cache().index) == 17) + assert (time_stamp2 > time_stamp1) + assert (hex1 == hex2) + + # test clear_cache() + cnmf.cache.clear_cache() + assert (len(cnmf.cache.get_cache().index) == 0) + + import time + + start = time.time() + df.iloc[-1].cnmf.get_output() + end = time.time() + assert (len(cnmf.cache.get_cache().index) == 1) + + start2 = time.time() + df.iloc[-1].cnmf.get_output() + end2 = time.time() + + assert(end2-start2 < end-start) + # test setting maxsize as 0 + cnmf.cache.clear_cache() + assert (len(cnmf.cache.get_cache().index) == 0) + cnmf.cache.set_maxsize(0) + df.iloc[-1].cnmf.get_output() + print(cnmf.cache.get_cache()) + # assert (len(cnmf.cache.get_cache().index) == 0) From 2d53a51923ac90fbcc604d0112f6402571a2b699 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Wed, 13 Jul 2022 14:25:53 -0400 Subject: [PATCH 28/34] updating tests and cache to handle when cache is set to size 0 --- mesmerize_core/caiman_extensions/cache.py | 4 ++++ tests/test_core.py | 12 ++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index adada39..b01d69a 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -99,6 +99,10 @@ def _use_cache(instance, *args, **kwargs): else: return_copy = True + if self.size == 0: + self.clear_cache() + return _return_wrapper(func(instance, *args, **kwargs), return_copy) + # if cache is empty, will always be a cache miss if len(self.cache.index) == 0: return_val = func(instance, *args, **kwargs) diff --git a/tests/test_core.py b/tests/test_core.py index ec07fb6..e09d754 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1183,9 +1183,17 @@ def test_cache(): assert (len(cnmf.cache.get_cache().index) == 0) cnmf.cache.set_maxsize(0) + start = time.time() df.iloc[-1].cnmf.get_output() - print(cnmf.cache.get_cache()) - # assert (len(cnmf.cache.get_cache().index) == 0) + end = time.time() + assert (len(cnmf.cache.get_cache().index) == 0) + + start2 = time.time() + df.iloc[-1].cnmf.get_output() + end2 = time.time() + assert (len(cnmf.cache.get_cache().index) == 0) + assert(abs((end-start)-(end2-start2)) < 0.01) + From 66292165c91c0976cd5e1f9f186918eb3a80f12f Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Fri, 15 Jul 2022 18:29:06 -0400 Subject: [PATCH 29/34] updates to cache tests --- tests/test_core.py | 70 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index e09d754..eb6ed50 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -25,7 +25,8 @@ import shutil from zipfile import ZipFile from pprint import pprint -from mesmerize_core.caiman_extensions import mcorr, cnmf +from mesmerize_core.caiman_extensions import cnmf +import time tmp_dir = Path(os.path.dirname(os.path.abspath(__file__)), "tmp") vid_dir = Path(os.path.dirname(os.path.abspath(__file__)), "videos") @@ -1157,31 +1158,40 @@ def test_cache(): time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item() hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item())) assert (cache[cache["function"] == "get_output"].index.size == 1) + # after adding enough items for cache to exceed max size, cache should remove least recently used items until + # size is back under max assert (len(cnmf.cache.get_cache().index) == 17) + # the time stamp to get_output the second time should be greater than the original time + # stamp because the cached item is being returned and therefore will have been accessed more recently assert (time_stamp2 > time_stamp1) + # the hex id of the item in the cache when get_output is first called + # should be the same hex id of the item in the cache when get_output is called again assert (hex1 == hex2) # test clear_cache() cnmf.cache.clear_cache() assert (len(cnmf.cache.get_cache().index) == 0) - import time - + # checking that cache is cleared, checking speed at which item is returned start = time.time() df.iloc[-1].cnmf.get_output() end = time.time() assert (len(cnmf.cache.get_cache().index) == 1) + # second call to item now added to cache, time to return item should be must faster than before because item has + # now been cached start2 = time.time() df.iloc[-1].cnmf.get_output() end2 = time.time() - assert(end2-start2 < end-start) - # test setting maxsize as 0 + # testing clear_cache() again, length of dataframe should be zero cnmf.cache.clear_cache() assert (len(cnmf.cache.get_cache().index) == 0) + # test setting maxsize as 0, should effectively disable the cache...additionally, time to return an item called + # twice should roughly be the same because item is not being stored in the cache + # cache length should remain zero throughout calls to extension functions cnmf.cache.set_maxsize(0) start = time.time() df.iloc[-1].cnmf.get_output() @@ -1194,6 +1204,56 @@ def test_cache(): assert (len(cnmf.cache.get_cache().index) == 0) assert(abs((end-start)-(end2-start2)) < 0.01) + # test to check that separate cache items are being returned for different batch items + # must add another item to the batch, running cnmfe + + input_movie_path = get_datafile("cnmfe") + print(input_movie_path) + df.caiman.add_item( + algo="mcorr", + name=f"test-cnmfe-mcorr", + input_movie_path=input_movie_path, + params=test_params["mcorr"], + ) + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + algo = "cnmfe" + param_name = "cnmfe_full" + input_movie_path = df.iloc[-1].mcorr.get_output_path() + print(input_movie_path) + + df.caiman.add_item( + algo=algo, + name=f"test-{algo}", + input_movie_path=input_movie_path, + params=test_params[param_name], + ) + + process = df.iloc[-1].caiman.run() + process.wait() + + df = load_batch(batch_path) + + cnmf.cache.set_maxsize("1M") + + df.iloc[1].cnmf.get_output() # cnmf output + df.iloc[-1].cnmf.get_output() # cnmfe output + + cache = cnmf.cache.get_cache() + + # checking that both outputs from different batch items are added to the cache + assert(len(cache.index) == 2) + + # checking that the uuid of each outputs from the different batch items are not the same + assert(cache.iloc[-1]["uuid"] != cache.iloc[-2]["uuid"]) + + # checking that the uuid of the output in the cache is the correct uuid of the batch item in the df + assert(cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"]) + + From a1d30da78a14ec8b162b392a2662711b0d358a54 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sun, 17 Jul 2022 22:05:21 -0400 Subject: [PATCH 30/34] updates to cache tests --- mesmerize_core/caiman_extensions/cache.py | 4 ++-- mesmerize_core/caiman_extensions/cnmf.py | 8 ++++---- tests/test_core.py | 24 +++++++++++++++++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py index b01d69a..c7814f3 100644 --- a/mesmerize_core/caiman_extensions/cache.py +++ b/mesmerize_core/caiman_extensions/cache.py @@ -94,8 +94,8 @@ def _get_cache_size_bytes(self): def use_cache(self, func): @wraps(func) def _use_cache(instance, *args, **kwargs): - if "copy" in kwargs.keys(): - return_copy = kwargs["copy"] + if "return_copy" in kwargs.keys(): + return_copy = kwargs["return_copy"] else: return_copy = True diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index 053125b..3c853c3 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -74,7 +74,7 @@ def get_output_path(self) -> Path: @validate("cnmf") @cache.use_cache - def get_output(self, copy=True) -> CNMF: + def get_output(self, return_copy=True) -> CNMF: """ Returns ------- @@ -89,7 +89,7 @@ def get_output(self, copy=True) -> CNMF: @validate("cnmf") @cache.use_cache def get_spatial_masks( - self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, copy=True + self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, return_copy=True ) -> np.ndarray: """ Get binary masks of the spatial components at the given `ixs` @@ -156,7 +156,7 @@ def _get_spatial_contours( @validate("cnmf") @cache.use_cache def get_spatial_contours( - self, ixs_components: Optional[np.ndarray] = None, copy=True + self, ixs_components: Optional[np.ndarray] = None, return_copy=True ) -> Tuple[List[np.ndarray], List[np.ndarray]]: """ Get the contour and center of mass for each spatial footprint @@ -190,7 +190,7 @@ def get_spatial_contours( @validate("cnmf") @cache.use_cache def get_temporal_components( - self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, copy=True + self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, return_copy=True ) -> np.ndarray: """ Get the temporal components for this CNMF item diff --git a/tests/test_core.py b/tests/test_core.py index eb6ed50..c756217 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1253,6 +1253,30 @@ def test_cache(): # checking that the uuid of the output in the cache is the correct uuid of the batch item in the df assert(cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"]) + # call get output from cnmf, check that it is the most recent thing called in the cache + df.iloc[1].cnmf.get_output() + cnmf_uuid = df.iloc[1]["uuid"] + most_recently_called = cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1] + cache_uuid = most_recently_called["uuid"] + assert(cnmf_uuid == cache_uuid) + + # check to make sure by certain params that it is cnmf vs cnmfe + output = df.iloc[1].cnmf.get_output() + assert(output.params.patch["low_rank_background"] == True) + output2 = df.iloc[-1].cnmf.get_output() + assert(output2.params.patch["low_rank_background"] == False) + + # test for copy + # if return_copy=True, then hex id of calls to the same function should be false + assert(hex(id(output)) != hex(id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1]))) + # if return_copy=False, then hex id of calls to the same function should be true + df.iloc[1].cnmf.get_output(return_copy=False) + df.iloc[1].cnmf.get_output(return_copy=False) + output = df.iloc[1].cnmf.get_output(return_copy=False) + output2 = df.iloc[1].cnmf.get_output(return_copy=False) + assert(hex(id(output)) == hex(id(output2))) + + From 6e3437329b067fc1c5c5c41f1238db3a55aa03a2 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sun, 17 Jul 2022 22:21:27 -0400 Subject: [PATCH 31/34] dumb kushal tests --- tests/test_core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_core.py b/tests/test_core.py index c756217..9aa95d7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1268,13 +1268,13 @@ def test_cache(): # test for copy # if return_copy=True, then hex id of calls to the same function should be false + output = df.iloc[1].cnmf.get_output() assert(hex(id(output)) != hex(id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1]))) # if return_copy=False, then hex id of calls to the same function should be true - df.iloc[1].cnmf.get_output(return_copy=False) - df.iloc[1].cnmf.get_output(return_copy=False) output = df.iloc[1].cnmf.get_output(return_copy=False) output2 = df.iloc[1].cnmf.get_output(return_copy=False) assert(hex(id(output)) == hex(id(output2))) + assert(hex(id(cnmf.cache.get_cache().iloc[-1]["return_val"])) == hex(id(output))) From 0928f24c044d44f974cfb1b615532751250e4d5d Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Sun, 17 Jul 2022 22:26:44 -0400 Subject: [PATCH 32/34] insignificant merge conflict --- mesmerize_core/caiman_extensions/cnmf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py index 3c853c3..e29caf6 100644 --- a/mesmerize_core/caiman_extensions/cnmf.py +++ b/mesmerize_core/caiman_extensions/cnmf.py @@ -259,7 +259,7 @@ def get_reconstructed_movie( ixs_frames = (ixs_frames, ixs_frames + 1) dn = cnmf_obj.estimates.A[:, idx_components].dot( - cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]] + cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]] ) if add_background: From c13888c7e2b98b53d52d99f9fcf4273b1b013b30 Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Thu, 21 Jul 2022 10:26:46 -0400 Subject: [PATCH 33/34] fixing cache tests --- tests/test_core.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_core.py b/tests/test_core.py index 9aa95d7..058d7ce 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -633,6 +633,8 @@ def test_cnmf(): ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10 ) + cnmf.cache.clear_cache() + def test_cnmfe(): set_parent_raw_data_path(vid_dir) @@ -977,6 +979,8 @@ def test_cnmfe(): ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10 ) + cnmf.cache.clear_cache() + def test_remove_item(): set_parent_raw_data_path(vid_dir) From 98b78d3d506339b211d5378b495011a290c0698e Mon Sep 17 00:00:00 2001 From: Caitlin Lewis Date: Thu, 21 Jul 2022 17:18:42 -0400 Subject: [PATCH 34/34] hopefully the last changes to cache as of now --- mesmerize_core/caiman_extensions/mcorr.py | 1 - tests/test_core.py | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py index fa14418..70c5866 100644 --- a/mesmerize_core/caiman_extensions/mcorr.py +++ b/mesmerize_core/caiman_extensions/mcorr.py @@ -33,7 +33,6 @@ def get_output_path(self) -> Path: return self._series.paths.resolve(self._series["outputs"]["mcorr-output-path"]) @validate("mcorr") - @cache.use_cache def get_output(self) -> np.ndarray: """ Get the motion corrected output as a memmaped numpy array, allows fast random-access scrolling. diff --git a/tests/test_core.py b/tests/test_core.py index 058d7ce..e4c5980 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -633,8 +633,6 @@ def test_cnmf(): ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10 ) - cnmf.cache.clear_cache() - def test_cnmfe(): set_parent_raw_data_path(vid_dir) @@ -979,8 +977,6 @@ def test_cnmfe(): ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10 ) - cnmf.cache.clear_cache() - def test_remove_item(): set_parent_raw_data_path(vid_dir) @@ -1046,6 +1042,9 @@ def test_remove_item(): def test_cache(): + print("*** Testing cache ***") + cnmf.cache.clear_cache() + set_parent_raw_data_path(vid_dir) algo = "mcorr" @@ -1100,7 +1099,6 @@ def test_cache(): ) algo = "cnmf" - print("Testing cnmf") input_movie_path = df.iloc[-1].mcorr.get_output_path() df.caiman.add_item( algo=algo,