From ebd2472e5f9b45481808b0c4ba873c2f3fbdf49a Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 28 Jun 2022 12:16:01 -0400
Subject: [PATCH 01/34] creating cache for faster access to results of previous
 cnmf extension calls

---
 mesmerize_core/caiman_extensions/cache.py | 43 +++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 mesmerize_core/caiman_extensions/cache.py

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
new file mode 100644
index 0000000..ba30429
--- /dev/null
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -0,0 +1,43 @@
+from functools import wraps
+import pandas as pd
+import time
+
+class Cache:
+    def __init__(self, cache_size=3):
+        self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp'])
+        self.cache_size = cache_size
+
+    def get_cache(self):
+        print(self.cache)
+
+    def use_cache(self, func):
+        @wraps(func)
+        def _use_cache(*args, **kwargs):
+
+            # if cache is empty, will always be a cache miss
+            if len(self.cache.index) == 0:
+                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()]
+
+            # checking to see if there is a cache hit
+            for i in range(len(self.cache.index)):
+                if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[
+                    i, 2] == kwargs:
+                    self.cache.iloc[i, 4] = time.time()
+                    return self.cache.iloc[i, 3]
+
+            # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
+            if len(self.cache.index) == self.cache_size:
+                self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
+                                inplace=True)
+                self.cache = self.cache.reset_index(drop=True)
+                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()]
+                return self.cache.iloc[len(self.cache.index) - 1, 3]
+            else:
+                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()]
+
+            return func(*args, **kwargs)
+
+        return _use_cache
+
+
+cache = Cache()
\ No newline at end of file

From 22c4452f7f67e83da1e6f7ea353291b820df1055 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 28 Jun 2022 13:47:25 -0400
Subject: [PATCH 02/34] instantiating cache object in extension files, adding
 directives

---
 mesmerize_core/caiman_extensions/cache.py | 12 ++++-----
 mesmerize_core/caiman_extensions/cnmf.py  | 30 ++++++++++++++---------
 mesmerize_core/caiman_extensions/mcorr.py |  6 +++++
 3 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index ba30429..ff76b95 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -2,8 +2,9 @@
 import pandas as pd
 import time
 
+
 class Cache:
-    def __init__(self, cache_size=3):
+    def __init__(self, cache_size=10):
         self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp'])
         self.cache_size = cache_size
 
@@ -20,12 +21,12 @@ def _use_cache(*args, **kwargs):
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[
-                    i, 2] == kwargs:
+                if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[i, 2] == kwargs:
                     self.cache.iloc[i, 4] = time.time()
                     return self.cache.iloc[i, 3]
 
-            # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
+            # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used
+            # and add new entry
             if len(self.cache.index) == self.cache_size:
                 self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
                                 inplace=True)
@@ -38,6 +39,3 @@ def _use_cache(*args, **kwargs):
             return func(*args, **kwargs)
 
         return _use_cache
-
-
-cache = Cache()
\ No newline at end of file
diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index 0b1a1ff..c221774 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -10,6 +10,9 @@
 from caiman.utils.visualization import get_contours as caiman_get_contours
 
 from .common import validate
+from .cache import Cache
+
+cache = Cache()
 
 
 @pd.api.extensions.register_series_accessor("cnmf")
@@ -59,6 +62,7 @@ def get_input_memmap(self) -> np.ndarray:
 
     # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items
     @validate("cnmf")
+    @cache.use_cache
     def get_output_path(self) -> Path:
         """
         Returns
@@ -69,6 +73,7 @@ def get_output_path(self) -> Path:
         return self._series.paths.resolve(self._series["outputs"]["cnmf-hdf5-path"])
 
     @validate("cnmf")
+    @cache.use_cache
     def get_output(self) -> CNMF:
         """
         Returns
@@ -82,8 +87,9 @@ def get_output(self) -> CNMF:
 
     # TODO: Make the ``ixs`` parameter for spatial stuff optional
     @validate("cnmf")
+    @cache.use_cache
     def get_spatial_masks(
-        self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01
+            self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01
     ) -> np.ndarray:
         """
         Get binary masks of the spatial components at the given `ixs`
@@ -128,7 +134,7 @@ def get_spatial_masks(
     # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items
     @staticmethod
     def _get_spatial_contours(
-        cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None
+            cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None
     ):
         if ixs_components is None:
             ixs_components = cnmf_obj.estimates.idx_components
@@ -148,8 +154,9 @@ def _get_spatial_contours(
         return contours
 
     @validate("cnmf")
+    @cache.use_cache
     def get_spatial_contours(
-        self, ixs_components: Optional[np.ndarray] = None
+            self, ixs_components: Optional[np.ndarray] = None
     ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         """
         Get the contour and center of mass for each spatial footprint
@@ -181,8 +188,9 @@ def get_spatial_contours(
         return coordinates, coms
 
     @validate("cnmf")
+    @cache.use_cache
     def get_temporal_components(
-        self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False
+            self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False
     ) -> np.ndarray:
         """
         Get the temporal components for this CNMF item
@@ -216,10 +224,10 @@ def get_temporal_components(
     # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items
     @validate("cnmf")
     def get_reconstructed_movie(
-        self,
-        ixs_frames: Optional[Union[Tuple[int, int], int]] = None,
-        idx_components: np.ndarray = None,
-        add_background: bool = True,
+            self,
+            ixs_frames: Optional[Union[Tuple[int, int], int]] = None,
+            idx_components: np.ndarray = None,
+            add_background: bool = True,
     ) -> np.ndarray:
         """
         Return the reconstructed movie, (A * C) + (b * f)
@@ -250,12 +258,12 @@ def get_reconstructed_movie(
         if isinstance(ixs_frames, int):
             ixs_frames = (ixs_frames, ixs_frames + 1)
 
-        dn = cnmf_obj.estimates.A[:,idx_components].dot(
-            cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]]
+        dn = cnmf_obj.estimates.A[:, idx_components].dot(
+            cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]]
         )
 
         if add_background:
             dn += cnmf_obj.estimates.b.dot(
-                cnmf_obj.estimates.f[:, ixs_frames[0] : ixs_frames[1]]
+                cnmf_obj.estimates.f[:, ixs_frames[0]: ixs_frames[1]]
             )
         return dn.reshape(cnmf_obj.dims + (-1,), order="F").transpose([2, 0, 1])
diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py
index bbb6e9a..0071eb6 100644
--- a/mesmerize_core/caiman_extensions/mcorr.py
+++ b/mesmerize_core/caiman_extensions/mcorr.py
@@ -6,6 +6,9 @@
 
 from .common import validate
 from typing import *
+from cache import Cache
+
+cache = Cache()
 
 
 @pd.api.extensions.register_series_accessor("mcorr")
@@ -18,6 +21,7 @@ def __init__(self, s: pd.Series):
         self._series = s
 
     @validate("mcorr")
+    @cache.use_cache
     def get_output_path(self) -> Path:
         """
         Get the path to the motion corrected output memmap file
@@ -30,6 +34,7 @@ def get_output_path(self) -> Path:
         return self._series.paths.resolve(self._series["outputs"]["mcorr-output-path"])
 
     @validate("mcorr")
+    @cache.use_cache
     def get_output(self) -> np.ndarray:
         """
         Get the motion corrected output as a memmaped numpy array, allows fast random-access scrolling.
@@ -45,6 +50,7 @@ def get_output(self) -> np.ndarray:
         return mc_movie
 
     @validate("mcorr")
+    @cache.use_cache
     def get_shifts(
         self, pw_rigid: bool = False
     ) -> Tuple[List[np.ndarray], List[np.ndarray]]:

From 12e4ff149a764ca2eac6953357b41ee040f82717 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 28 Jun 2022 13:48:35 -0400
Subject: [PATCH 03/34] reformatting files

---
 mesmerize_core/caiman_extensions/cache.py  | 44 ++++++++++++++++++----
 mesmerize_core/caiman_extensions/cnmf.py   | 20 +++++-----
 mesmerize_core/caiman_extensions/common.py |  4 +-
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index ff76b95..e16d496 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -5,7 +5,10 @@
 
 class Cache:
     def __init__(self, cache_size=10):
-        self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp'])
+        self.cache = pd.DataFrame(
+            data=None,
+            columns=["function", "args", "kwargs", "return_val", "time_stamp"],
+        )
         self.cache_size = cache_size
 
     def get_cache(self):
@@ -17,24 +20,51 @@ def _use_cache(*args, **kwargs):
 
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
-                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()]
+                self.cache.loc[len(self.cache.index)] = [
+                    func.__name__,
+                    args,
+                    kwargs,
+                    func(args, kwargs),
+                    time.time(),
+                ]
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[i, 2] == kwargs:
+                if (
+                    self.cache.iloc[i, 0] == func.__name__
+                    and self.cache.iloc[i, 1] == args
+                    and self.cache.iloc[i, 2] == kwargs
+                ):
                     self.cache.iloc[i, 4] = time.time()
                     return self.cache.iloc[i, 3]
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used
             # and add new entry
             if len(self.cache.index) == self.cache_size:
-                self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
-                                inplace=True)
+                self.cache.drop(
+                    index=self.cache.sort_values(
+                        by=["time_stamp"], ascending=False
+                    ).index[-1],
+                    axis=0,
+                    inplace=True,
+                )
                 self.cache = self.cache.reset_index(drop=True)
-                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()]
+                self.cache.loc[len(self.cache.index)] = [
+                    func.__name__,
+                    args,
+                    kwargs,
+                    func(args, kwargs),
+                    time.time(),
+                ]
                 return self.cache.iloc[len(self.cache.index) - 1, 3]
             else:
-                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, func(args, kwargs), time.time()]
+                self.cache.loc[len(self.cache.index)] = [
+                    func.__name__,
+                    args,
+                    kwargs,
+                    func(args, kwargs),
+                    time.time(),
+                ]
 
             return func(*args, **kwargs)
 
diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index c221774..6108f88 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -89,7 +89,7 @@ def get_output(self) -> CNMF:
     @validate("cnmf")
     @cache.use_cache
     def get_spatial_masks(
-            self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01
+        self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01
     ) -> np.ndarray:
         """
         Get binary masks of the spatial components at the given `ixs`
@@ -134,7 +134,7 @@ def get_spatial_masks(
     # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items
     @staticmethod
     def _get_spatial_contours(
-            cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None
+        cnmf_obj: CNMF, ixs_components: Optional[np.ndarray] = None
     ):
         if ixs_components is None:
             ixs_components = cnmf_obj.estimates.idx_components
@@ -156,7 +156,7 @@ def _get_spatial_contours(
     @validate("cnmf")
     @cache.use_cache
     def get_spatial_contours(
-            self, ixs_components: Optional[np.ndarray] = None
+        self, ixs_components: Optional[np.ndarray] = None
     ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         """
         Get the contour and center of mass for each spatial footprint
@@ -190,7 +190,7 @@ def get_spatial_contours(
     @validate("cnmf")
     @cache.use_cache
     def get_temporal_components(
-            self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False
+        self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False
     ) -> np.ndarray:
         """
         Get the temporal components for this CNMF item
@@ -224,10 +224,10 @@ def get_temporal_components(
     # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items
     @validate("cnmf")
     def get_reconstructed_movie(
-            self,
-            ixs_frames: Optional[Union[Tuple[int, int], int]] = None,
-            idx_components: np.ndarray = None,
-            add_background: bool = True,
+        self,
+        ixs_frames: Optional[Union[Tuple[int, int], int]] = None,
+        idx_components: np.ndarray = None,
+        add_background: bool = True,
     ) -> np.ndarray:
         """
         Return the reconstructed movie, (A * C) + (b * f)
@@ -259,11 +259,11 @@ def get_reconstructed_movie(
             ixs_frames = (ixs_frames, ixs_frames + 1)
 
         dn = cnmf_obj.estimates.A[:, idx_components].dot(
-            cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]]
+            cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]]
         )
 
         if add_background:
             dn += cnmf_obj.estimates.b.dot(
-                cnmf_obj.estimates.f[:, ixs_frames[0]: ixs_frames[1]]
+                cnmf_obj.estimates.f[:, ixs_frames[0] : ixs_frames[1]]
             )
         return dn.reshape(cnmf_obj.dims + (-1,), order="F").transpose([2, 0, 1])
diff --git a/mesmerize_core/caiman_extensions/common.py b/mesmerize_core/caiman_extensions/common.py
index 7f6e67c..3b3654c 100644
--- a/mesmerize_core/caiman_extensions/common.py
+++ b/mesmerize_core/caiman_extensions/common.py
@@ -270,10 +270,10 @@ def get_input_movie_path(self) -> Path:
     def get_input_movie(self) -> Union[np.ndarray, pims.FramesSequence]:
         extension = self.get_input_movie_path().suffixes[-1]
 
-        if extension in ['.tiff', '.tif', '.btf']:
+        if extension in [".tiff", ".tif", ".btf"]:
             return pims.open(str(self.get_input_movie_path()))
 
-        elif extension in ['.mmap', '.memmap']:
+        elif extension in [".mmap", ".memmap"]:
             Yr, dims, T = load_memmap(str(self.get_input_movie_path()))
             return np.reshape(Yr.T, [T] + list(dims), order="F")
 

From 5bf7fd9c10a5113a75efe3bb98211c5437488cd1 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 28 Jun 2022 15:38:04 -0400
Subject: [PATCH 04/34] finishing up cache impl, adding clear_cache() and
 set_maxcache() capabilities

---
 mesmerize_core/caiman_extensions/cache.py | 68 +++++++++--------------
 mesmerize_core/caiman_extensions/cnmf.py  |  3 +
 mesmerize_core/caiman_extensions/mcorr.py |  2 +-
 3 files changed, 30 insertions(+), 43 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index e16d496..27ad28c 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -5,67 +5,51 @@
 
 class Cache:
     def __init__(self, cache_size=10):
-        self.cache = pd.DataFrame(
-            data=None,
-            columns=["function", "args", "kwargs", "return_val", "time_stamp"],
-        )
+        self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp'])
         self.cache_size = cache_size
 
     def get_cache(self):
         print(self.cache)
 
+    def clear_cache(self):
+        while len(self.cache.index) != 0:
+            self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True)
+
+    def set_maxsize(self, max_size: int):
+        self.cache_size = max_size
+
     def use_cache(self, func):
         @wraps(func)
-        def _use_cache(*args, **kwargs):
+        def _use_cache(instance, *args, **kwargs):
+
+            print(instance)
+            print(args, kwargs)
 
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
-                self.cache.loc[len(self.cache.index)] = [
-                    func.__name__,
-                    args,
-                    kwargs,
-                    func(args, kwargs),
-                    time.time(),
-                ]
+                return_val = func(instance, *args, **kwargs)
+                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()]
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if (
-                    self.cache.iloc[i, 0] == func.__name__
-                    and self.cache.iloc[i, 1] == args
-                    and self.cache.iloc[i, 2] == kwargs
-                ):
+                if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[
+                    i, 2] == kwargs:
                     self.cache.iloc[i, 4] = time.time()
+                    return_val = self.cache.iloc[i, 3]
                     return self.cache.iloc[i, 3]
 
-            # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used
-            # and add new entry
+            # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             if len(self.cache.index) == self.cache_size:
-                self.cache.drop(
-                    index=self.cache.sort_values(
-                        by=["time_stamp"], ascending=False
-                    ).index[-1],
-                    axis=0,
-                    inplace=True,
-                )
+                return_val = func(instance, *args, **kwargs)
+                self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
+                                inplace=True)
                 self.cache = self.cache.reset_index(drop=True)
-                self.cache.loc[len(self.cache.index)] = [
-                    func.__name__,
-                    args,
-                    kwargs,
-                    func(args, kwargs),
-                    time.time(),
-                ]
+                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()]
                 return self.cache.iloc[len(self.cache.index) - 1, 3]
             else:
-                self.cache.loc[len(self.cache.index)] = [
-                    func.__name__,
-                    args,
-                    kwargs,
-                    func(args, kwargs),
-                    time.time(),
-                ]
+                return_val = func(instance, *args, **kwargs)
+                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()]
 
-            return func(*args, **kwargs)
+            return return_val
 
-        return _use_cache
+        return _use_cache
\ No newline at end of file
diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index 6108f88..b0ed5e1 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -24,6 +24,8 @@ class CNMFExtensions:
     def __init__(self, s: pd.Series):
         self._series = s
 
+    @validate("cnmf")
+    @cache.use_cache
     def get_cnmf_memmap(self) -> np.ndarray:
         """
         Get the CNMF memmap
@@ -39,6 +41,7 @@ def get_cnmf_memmap(self) -> np.ndarray:
         images = np.reshape(Yr.T, [T] + list(dims), order="F")
         return images
 
+    @cache.use_cache
     def get_input_memmap(self) -> np.ndarray:
         """
         Return the F-order memmap if the input to the
diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py
index 0071eb6..c46e412 100644
--- a/mesmerize_core/caiman_extensions/mcorr.py
+++ b/mesmerize_core/caiman_extensions/mcorr.py
@@ -6,7 +6,7 @@
 
 from .common import validate
 from typing import *
-from cache import Cache
+from .cache import Cache
 
 cache = Cache()
 

From eb62b656950522866a773031527d87d3cbf4b18f Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Thu, 30 Jun 2022 20:35:40 -0400
Subject: [PATCH 05/34] adding uuid column to cache so that different batch
 items can be delineated

---
 mesmerize_core/caiman_extensions/cache.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 27ad28c..5455cd7 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -5,7 +5,7 @@
 
 class Cache:
     def __init__(self, cache_size=10):
-        self.cache = pd.DataFrame(data=None, columns=['function', 'args', 'kwargs', 'return_val', 'time_stamp'])
+        self.cache = pd.DataFrame(data=None, columns=['uuid', 'function', 'args', 'kwargs', 'return_val', 'time_stamp'])
         self.cache_size = cache_size
 
     def get_cache(self):
@@ -22,21 +22,18 @@ def use_cache(self, func):
         @wraps(func)
         def _use_cache(instance, *args, **kwargs):
 
-            print(instance)
-            print(args, kwargs)
-
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()]
+                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()]
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if self.cache.iloc[i, 0] == func.__name__ and self.cache.iloc[i, 1] == args and self.cache.iloc[
-                    i, 2] == kwargs:
-                    self.cache.iloc[i, 4] = time.time()
-                    return_val = self.cache.iloc[i, 3]
-                    return self.cache.iloc[i, 3]
+                if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and self.cache.iloc[i, 2] == args and self.cache.iloc[
+                    i, 3] == kwargs:
+                    self.cache.iloc[i, 5] = time.time()
+                    return_val = self.cache.iloc[i, 4]
+                    return self.cache.iloc[i, 4]
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             if len(self.cache.index) == self.cache_size:
@@ -44,11 +41,11 @@ def _use_cache(instance, *args, **kwargs):
                 self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
                                 inplace=True)
                 self.cache = self.cache.reset_index(drop=True)
-                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()]
-                return self.cache.iloc[len(self.cache.index) - 1, 3]
+                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()]
+                return self.cache.iloc[len(self.cache.index) - 1, 4]
             else:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [func.__name__, args, kwargs, return_val, time.time()]
+                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()]
 
             return return_val
 

From a40e6c4b634fb01b3e08721e95808a54238be550 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Thu, 30 Jun 2022 21:03:06 -0400
Subject: [PATCH 06/34] updating kwarg comparison to handle numpy arrays

---
 mesmerize_core/caiman_extensions/cache.py | 26 +++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 5455cd7..ebf149f 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -1,6 +1,17 @@
 from functools import wraps
 import pandas as pd
 import time
+import numpy as np
+
+
+def check_kwarg_equality(kwargs, cache_kwargs):
+    if not type(kwargs) == type(cache_kwargs):
+        return False
+
+    if isinstance(cache_kwargs, np.ndarray):
+        return np.array_equal(cache_kwargs, kwargs)
+    else:
+        return cache_kwargs == kwargs
 
 
 class Cache:
@@ -25,12 +36,12 @@ def _use_cache(instance, *args, **kwargs):
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()]
+                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs,
+                                                         return_val, time.time()]
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and self.cache.iloc[i, 2] == args and self.cache.iloc[
-                    i, 3] == kwargs:
+                if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__  and self.cache.iloc[i, 2] == args and check_kwarg_equality(kwargs, self.cache.iloc[i, 3]):
                     self.cache.iloc[i, 5] = time.time()
                     return_val = self.cache.iloc[i, 4]
                     return self.cache.iloc[i, 4]
@@ -41,12 +52,15 @@ def _use_cache(instance, *args, **kwargs):
                 self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
                                 inplace=True)
                 self.cache = self.cache.reset_index(drop=True)
-                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()]
+                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val,
+                                                         time.time()]
                 return self.cache.iloc[len(self.cache.index) - 1, 4]
             else:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val, time.time()]
+                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val,
+                                                         time.time()]
 
             return return_val
 
-        return _use_cache
\ No newline at end of file
+        return _use_cache
+

From 3c5bd91f19d1f0dce4d07a76248d65140854ea33 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Thu, 30 Jun 2022 21:43:22 -0400
Subject: [PATCH 07/34] checking if args are equal when numpy array is passed
 as arg

---
 mesmerize_core/caiman_extensions/cache.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index ebf149f..3c4db13 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -4,14 +4,14 @@
 import numpy as np
 
 
-def check_kwarg_equality(kwargs, cache_kwargs):
-    if not type(kwargs) == type(cache_kwargs):
+def check_arg_equality(args, cache_args):
+    if not type(args) == type(cache_args):
         return False
 
-    if isinstance(cache_kwargs, np.ndarray):
-        return np.array_equal(cache_kwargs, kwargs)
+    if isinstance(cache_args, np.ndarray):
+        return np.array_equal(cache_args, args)
     else:
-        return cache_kwargs == kwargs
+        return cache_args == args
 
 
 class Cache:
@@ -41,7 +41,7 @@ def _use_cache(instance, *args, **kwargs):
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__  and self.cache.iloc[i, 2] == args and check_kwarg_equality(kwargs, self.cache.iloc[i, 3]):
+                if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and check_arg_equality(args, self.cache.iloc[i, 2]) and check_arg_equality(kwargs, self.cache.iloc[i, 3]):
                     self.cache.iloc[i, 5] = time.time()
                     return_val = self.cache.iloc[i, 4]
                     return self.cache.iloc[i, 4]

From ed2fa186a6abf4a5c9921acdce7b5f3906d006ff Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Fri, 1 Jul 2022 11:02:55 -0400
Subject: [PATCH 08/34] updating check arg function so that arg equality is
 checked for every entry of array or dict

---
 mesmerize_core/caiman_extensions/cache.py | 66 ++++++++++++++++++-----
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 3c4db13..1761ab0 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -4,19 +4,32 @@
 import numpy as np
 
 
-def check_arg_equality(args, cache_args):
+def _check_arg_equality(args, cache_args):
     if not type(args) == type(cache_args):
         return False
-
     if isinstance(cache_args, np.ndarray):
         return np.array_equal(cache_args, args)
     else:
         return cache_args == args
 
 
+def _check_args_equality(args, cache_args):
+    equality = list()
+    if isinstance(args, tuple):
+        for arg, cache_arg in zip(args, cache_args):
+            equality.append(_check_arg_equality(arg, cache_arg))
+    else:
+        for k in args.keys():
+            equality.append(_check_arg_equality(args[k], cache_args[k]))
+    return all(equality)
+
+
 class Cache:
     def __init__(self, cache_size=10):
-        self.cache = pd.DataFrame(data=None, columns=['uuid', 'function', 'args', 'kwargs', 'return_val', 'time_stamp'])
+        self.cache = pd.DataFrame(
+            data=None,
+            columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
+        )
         self.cache_size = cache_size
 
     def get_cache(self):
@@ -36,12 +49,23 @@ def _use_cache(instance, *args, **kwargs):
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs,
-                                                         return_val, time.time()]
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
-                if self.cache.iloc[i, 0] == instance._series['uuid'] and self.cache.iloc[i, 1] == func.__name__ and check_arg_equality(args, self.cache.iloc[i, 2]) and check_arg_equality(kwargs, self.cache.iloc[i, 3]):
+                if (
+                    self.cache.iloc[i, 0] == instance._series["uuid"]
+                    and self.cache.iloc[i, 1] == func.__name__
+                    and _check_args_equality(args, self.cache.iloc[i, 2])
+                    and _check_arg_equality(kwargs, self.cache.iloc[i, 3])
+                ):
                     self.cache.iloc[i, 5] = time.time()
                     return_val = self.cache.iloc[i, 4]
                     return self.cache.iloc[i, 4]
@@ -49,18 +73,34 @@ def _use_cache(instance, *args, **kwargs):
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             if len(self.cache.index) == self.cache_size:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.drop(index=self.cache.sort_values(by=['time_stamp'], ascending=False).index[-1], axis=0,
-                                inplace=True)
+                self.cache.drop(
+                    index=self.cache.sort_values(
+                        by=["time_stamp"], ascending=False
+                    ).index[-1],
+                    axis=0,
+                    inplace=True,
+                )
                 self.cache = self.cache.reset_index(drop=True)
-                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val,
-                                                         time.time()]
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
                 return self.cache.iloc[len(self.cache.index) - 1, 4]
             else:
                 return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [instance._series['uuid'], func.__name__, args, kwargs, return_val,
-                                                         time.time()]
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
 
             return return_val
 
         return _use_cache
-

From 52be0aeb57eef53138b01420537ef1ad23836686 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sun, 3 Jul 2022 08:14:36 -0400
Subject: [PATCH 09/34] returning func(*args, **kwargs) if len(cache)==0

---
 mesmerize_core/caiman_extensions/cache.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 1761ab0..62b0a49 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -57,6 +57,7 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
+                return return_val
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):

From 35897a8af9703aabc10162266c82c18470dba022 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sun, 3 Jul 2022 09:03:48 -0400
Subject: [PATCH 10/34] adding ability for cache size to be controlled by
 memory size as opposed to item number

---
 mesmerize_core/caiman_extensions/cache.py | 111 +++++++++++++++-------
 1 file changed, 77 insertions(+), 34 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 62b0a49..291f011 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import time
 import numpy as np
+import sys
 
 
 def _check_arg_equality(args, cache_args):
@@ -25,12 +26,16 @@ def _check_args_equality(args, cache_args):
 
 
 class Cache:
-    def __init__(self, cache_size=10):
+    def __init__(self, cache_size=10, length_storage=True):
         self.cache = pd.DataFrame(
             data=None,
             columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
         )
         self.cache_size = cache_size
+        if length_storage == True:
+            self.storage_type = 'ITEMS'
+        else:
+            self.storage_type = 'RAM'
 
     def get_cache(self):
         print(self.cache)
@@ -42,6 +47,12 @@ def clear_cache(self):
     def set_maxsize(self, max_size: int):
         self.cache_size = max_size
 
+    def _get_cache_size(self):
+        cache_size = 0
+        for i in range(len(self.cache.index)):
+            cache_size += sys.getsizeof(self.cache.iloc[i, 4])
+        return cache_size
+
     def use_cache(self, func):
         @wraps(func)
         def _use_cache(instance, *args, **kwargs):
@@ -62,45 +73,77 @@ def _use_cache(instance, *args, **kwargs):
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
                 if (
-                    self.cache.iloc[i, 0] == instance._series["uuid"]
-                    and self.cache.iloc[i, 1] == func.__name__
-                    and _check_args_equality(args, self.cache.iloc[i, 2])
-                    and _check_arg_equality(kwargs, self.cache.iloc[i, 3])
+                        self.cache.iloc[i, 0] == instance._series["uuid"]
+                        and self.cache.iloc[i, 1] == func.__name__
+                        and _check_args_equality(args, self.cache.iloc[i, 2])
+                        and _check_arg_equality(kwargs, self.cache.iloc[i, 3])
                 ):
                     self.cache.iloc[i, 5] = time.time()
                     return_val = self.cache.iloc[i, 4]
                     return self.cache.iloc[i, 4]
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
-            if len(self.cache.index) == self.cache_size:
-                return_val = func(instance, *args, **kwargs)
-                self.cache.drop(
-                    index=self.cache.sort_values(
-                        by=["time_stamp"], ascending=False
-                    ).index[-1],
-                    axis=0,
-                    inplace=True,
-                )
-                self.cache = self.cache.reset_index(drop=True)
-                self.cache.loc[len(self.cache.index)] = [
-                    instance._series["uuid"],
-                    func.__name__,
-                    args,
-                    kwargs,
-                    return_val,
-                    time.time(),
-                ]
-                return self.cache.iloc[len(self.cache.index) - 1, 4]
-            else:
-                return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [
-                    instance._series["uuid"],
-                    func.__name__,
-                    args,
-                    kwargs,
-                    return_val,
-                    time.time(),
-                ]
+            # check which type of memory
+            if self.storage_type == 'ITEMS':
+                if len(self.cache.index) == self.cache_size:
+                    return_val = func(instance, *args, **kwargs)
+                    self.cache.drop(
+                        index=self.cache.sort_values(
+                            by=["time_stamp"], ascending=False
+                        ).index[-1],
+                        axis=0,
+                        inplace=True,
+                    )
+                    self.cache = self.cache.reset_index(drop=True)
+                    self.cache.loc[len(self.cache.index)] = [
+                        instance._series["uuid"],
+                        func.__name__,
+                        args,
+                        kwargs,
+                        return_val,
+                        time.time(),
+                    ]
+                    return self.cache.iloc[len(self.cache.index) - 1, 4]
+                else:
+                    return_val = func(instance, *args, **kwargs)
+                    self.cache.loc[len(self.cache.index)] = [
+                        instance._series["uuid"],
+                        func.__name__,
+                        args,
+                        kwargs,
+                        return_val,
+                        time.time(),
+                    ]
+            elif self.storage_type == 'RAM':
+                if self._get_cache_size() >= self.cache_size:
+                    return_val = func(instance, *args, **kwargs)
+                    self.cache.drop(
+                        index=self.cache.sort_values(
+                            by=["time_stamp"], ascending=False
+                        ).index[-1],
+                        axis=0,
+                        inplace=True,
+                    )
+                    self.cache = self.cache.reset_index(drop=True)
+                    self.cache.loc[len(self.cache.index)] = [
+                        instance._series["uuid"],
+                        func.__name__,
+                        args,
+                        kwargs,
+                        return_val,
+                        time.time(),
+                    ]
+                    return self.cache.iloc[len(self.cache.index) - 1, 4]
+                else:
+                    return_val = func(instance, *args, **kwargs)
+                    self.cache.loc[len(self.cache.index)] = [
+                        instance._series["uuid"],
+                        func.__name__,
+                        args,
+                        kwargs,
+                        return_val,
+                        time.time(),
+                    ]
 
             return return_val
 

From fa68603fbc42ddc192ffb13f412ffa18d93309f4 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 09:34:46 -0400
Subject: [PATCH 11/34] work in progress

---
 mesmerize_core/caiman_extensions/cache.py | 33 ++++++++++++++++-------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 291f011..5e4d2c4 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -1,4 +1,6 @@
 from functools import wraps
+from typing import Union, Optional
+
 import pandas as pd
 import time
 import numpy as np
@@ -26,17 +28,21 @@ def _check_args_equality(args, cache_args):
 
 
 class Cache:
-    def __init__(self, cache_size=10, length_storage=True):
+    def __init__(self, cache_size: Optional[Union[int, str]] = None):
         self.cache = pd.DataFrame(
             data=None,
             columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
         )
-        self.cache_size = cache_size
-        if length_storage == True:
+        self.size = cache_size
+        if isinstance(cache_size, int):
             self.storage_type = 'ITEMS'
         else:
             self.storage_type = 'RAM'
 
+        if cache_size is None:
+            self.size = '1G'
+            self.storage_type = 'RAM'
+
     def get_cache(self):
         print(self.cache)
 
@@ -44,13 +50,23 @@ def clear_cache(self):
         while len(self.cache.index) != 0:
             self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True)
 
-    def set_maxsize(self, max_size: int):
-        self.cache_size = max_size
+    def set_maxsize(self, max_size: Union[int, str]):
+        if isinstance(max_size, str):
+            self.storage_type = 'RAM'
+        else:
+            self.storage_type = 'ITEMS'
+        self.size = max_size
 
-    def _get_cache_size(self):
+    def _get_cache_size_bytes(self, return_gig=True):
+        """Returns in GiB or MB"""
         cache_size = 0
         for i in range(len(self.cache.index)):
             cache_size += sys.getsizeof(self.cache.iloc[i, 4])
+        # need to fix how size of an output is calculated to handle non-built-in types
+        if return_gig:
+            cache_size = cache_size / 1024**3
+        else:
+            cache_size = cache_size / 1024**2
         return cache_size
 
     def use_cache(self, func):
@@ -68,7 +84,6 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
-                return return_val
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
@@ -85,7 +100,7 @@ def _use_cache(instance, *args, **kwargs):
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             # check which type of memory
             if self.storage_type == 'ITEMS':
-                if len(self.cache.index) == self.cache_size:
+                if len(self.cache.index) == self.size:
                     return_val = func(instance, *args, **kwargs)
                     self.cache.drop(
                         index=self.cache.sort_values(
@@ -115,7 +130,7 @@ def _use_cache(instance, *args, **kwargs):
                         time.time(),
                     ]
             elif self.storage_type == 'RAM':
-                if self._get_cache_size() >= self.cache_size:
+                if self._get_cache_size_bytes() >= self.size:
                     return_val = func(instance, *args, **kwargs)
                     self.cache.drop(
                         index=self.cache.sort_values(

From 34ca881de7efc94e9dfc880569e2b09e7f4c65a8 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 10:53:06 -0400
Subject: [PATCH 12/34] fixing how size of item in cache are calculated for
 non-built in types

---
 mesmerize_core/caiman_extensions/cache.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 5e4d2c4..4dc9534 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -1,10 +1,12 @@
 from functools import wraps
-from typing import Union, Optional
+from typing import Union, Optional, Tuple
 
 import pandas as pd
 import time
 import numpy as np
 import sys
+from pathlib import Path
+from caiman.source_extraction.cnmf import CNMF
 
 
 def _check_arg_equality(args, cache_args):
@@ -61,8 +63,18 @@ def _get_cache_size_bytes(self, return_gig=True):
         """Returns in GiB or MB"""
         cache_size = 0
         for i in range(len(self.cache.index)):
-            cache_size += sys.getsizeof(self.cache.iloc[i, 4])
-        # need to fix how size of an output is calculated to handle non-built-in types
+            if isinstance(self.cache.iloc[i, 4], np.ndarray):
+                cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i,4].itemsize)
+            elif isinstance(self.cache.iloc[i, 4], Tuple):
+                cache_size += (self.cache.iloc[i, 4][0].size * self.cache.iloc[i, 4][0].itemsize) + \
+                              (self.cache.iloc[i, 4][1].size * self.cache.iloc[i, 4][1].itemsize)
+            elif isinstance(self.cache.iloc[i, 4], Path):
+                cache_size += 0
+            elif isinstance(self.cache.iloc[i, 4], CNMF):
+                cache_size += sys.getsizeof(self.cache.iloc[i,4].estimates)
+            else:
+                cache_size += sys.getsizeof(self.cache.iloc[i, 4])
+
         if return_gig:
             cache_size = cache_size / 1024**3
         else:

From 98dcd3b2a1c48a4e554ddd48e43938866c43284f Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 11:09:37 -0400
Subject: [PATCH 13/34] trying to make code more elegant for kushal

---
 mesmerize_core/caiman_extensions/cache.py | 92 ++++++++---------------
 1 file changed, 31 insertions(+), 61 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 4dc9534..942ef75 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -71,7 +71,7 @@ def _get_cache_size_bytes(self, return_gig=True):
             elif isinstance(self.cache.iloc[i, 4], Path):
                 cache_size += 0
             elif isinstance(self.cache.iloc[i, 4], CNMF):
-                cache_size += sys.getsizeof(self.cache.iloc[i,4].estimates)
+                cache_size += sys.getsizeof(self.cache.iloc[i, 4].estimates)
             else:
                 cache_size += sys.getsizeof(self.cache.iloc[i, 4])
 
@@ -111,66 +111,36 @@ def _use_cache(instance, *args, **kwargs):
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             # check which type of memory
-            if self.storage_type == 'ITEMS':
-                if len(self.cache.index) == self.size:
-                    return_val = func(instance, *args, **kwargs)
-                    self.cache.drop(
-                        index=self.cache.sort_values(
-                            by=["time_stamp"], ascending=False
-                        ).index[-1],
-                        axis=0,
-                        inplace=True,
-                    )
-                    self.cache = self.cache.reset_index(drop=True)
-                    self.cache.loc[len(self.cache.index)] = [
-                        instance._series["uuid"],
-                        func.__name__,
-                        args,
-                        kwargs,
-                        return_val,
-                        time.time(),
-                    ]
-                    return self.cache.iloc[len(self.cache.index) - 1, 4]
-                else:
-                    return_val = func(instance, *args, **kwargs)
-                    self.cache.loc[len(self.cache.index)] = [
-                        instance._series["uuid"],
-                        func.__name__,
-                        args,
-                        kwargs,
-                        return_val,
-                        time.time(),
-                    ]
-            elif self.storage_type == 'RAM':
-                if self._get_cache_size_bytes() >= self.size:
-                    return_val = func(instance, *args, **kwargs)
-                    self.cache.drop(
-                        index=self.cache.sort_values(
-                            by=["time_stamp"], ascending=False
-                        ).index[-1],
-                        axis=0,
-                        inplace=True,
-                    )
-                    self.cache = self.cache.reset_index(drop=True)
-                    self.cache.loc[len(self.cache.index)] = [
-                        instance._series["uuid"],
-                        func.__name__,
-                        args,
-                        kwargs,
-                        return_val,
-                        time.time(),
-                    ]
-                    return self.cache.iloc[len(self.cache.index) - 1, 4]
-                else:
-                    return_val = func(instance, *args, **kwargs)
-                    self.cache.loc[len(self.cache.index)] = [
-                        instance._series["uuid"],
-                        func.__name__,
-                        args,
-                        kwargs,
-                        return_val,
-                        time.time(),
-                    ]
+            if (self.storage_type == 'ITEMS' and len(self.cache.index) == self.size) or (self.storage_type == 'RAM' and self._get_cache_size_bytes() >= self.size):
+                return_val = func(instance, *args, **kwargs)
+                self.cache.drop(
+                    index=self.cache.sort_values(
+                        by=["time_stamp"], ascending=False
+                    ).index[-1],
+                    axis=0,
+                    inplace=True,
+                )
+                self.cache = self.cache.reset_index(drop=True)
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
+                return self.cache.iloc[len(self.cache.index) - 1, 4]
+
+            else:
+                return_val = func(instance, *args, **kwargs)
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
 
             return return_val
 

From 19203fb626f9aef807d15419e693e7da60b8d67b Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 11:55:21 -0400
Subject: [PATCH 14/34] cache should not work with whether size is based on
 number of items or size in memory

---
 mesmerize_core/caiman_extensions/cache.py | 32 +++++++++++++++++++----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 942ef75..74a26c5 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -7,6 +7,7 @@
 import sys
 from pathlib import Path
 from caiman.source_extraction.cnmf import CNMF
+import re
 
 
 def _check_arg_equality(args, cache_args):
@@ -42,7 +43,7 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
             self.storage_type = 'RAM'
 
         if cache_size is None:
-            self.size = '1G'
+            self.size = 1
             self.storage_type = 'RAM'
 
     def get_cache(self):
@@ -55,9 +56,10 @@ def clear_cache(self):
     def set_maxsize(self, max_size: Union[int, str]):
         if isinstance(max_size, str):
             self.storage_type = 'RAM'
+            self.size = int(re.split('\d+', max_size)[0])
         else:
             self.storage_type = 'ITEMS'
-        self.size = max_size
+            self.size = max_size
 
     def _get_cache_size_bytes(self, return_gig=True):
         """Returns in GiB or MB"""
@@ -110,8 +112,8 @@ def _use_cache(instance, *args, **kwargs):
                     return self.cache.iloc[i, 4]
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
-            # check which type of memory
-            if (self.storage_type == 'ITEMS' and len(self.cache.index) == self.size) or (self.storage_type == 'RAM' and self._get_cache_size_bytes() >= self.size):
+            # if memory type is 'ITEMS': drop the least recently used and then add new item
+            if self.storage_type == 'ITEMS' and len(self.cache.index) == self.size:
                 return_val = func(instance, *args, **kwargs)
                 self.cache.drop(
                     index=self.cache.sort_values(
@@ -130,7 +132,27 @@ def _use_cache(instance, *args, **kwargs):
                     time.time(),
                 ]
                 return self.cache.iloc[len(self.cache.index) - 1, 4]
-
+            # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again
+            elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > self.size:
+                return_val = func(instance, *args, **kwargs)
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
+                while self._get_cache_size_bytes() > self.size:
+                    self.cache.drop(
+                        index=self.cache.sort_values(
+                            by=["time_stamp"], ascending=False
+                        ).index[-1],
+                        axis=0,
+                        inplace=True,
+                    )
+                    self.cache = self.cache.reset_index(drop=True)
+            # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache
             else:
                 return_val = func(instance, *args, **kwargs)
                 self.cache.loc[len(self.cache.index)] = [

From 7fd26307adfcd9fe86393cc731bf4eb6460373af Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 12:20:47 -0400
Subject: [PATCH 15/34] debugging cache

---
 mesmerize_core/caiman_extensions/cache.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 74a26c5..807eaa7 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -36,15 +36,16 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
             data=None,
             columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
         )
-        self.size = cache_size
+        if cache_size is None:
+            self.size = 1
+            self.storage_type = 'RAM'
+
         if isinstance(cache_size, int):
             self.storage_type = 'ITEMS'
+            self.size = cache_size
         else:
             self.storage_type = 'RAM'
-
-        if cache_size is None:
-            self.size = 1
-            self.storage_type = 'RAM'
+            self.size = int(re.split('[a-zA-Z]', cache_size)[0])
 
     def get_cache(self):
         print(self.cache)
@@ -56,7 +57,7 @@ def clear_cache(self):
     def set_maxsize(self, max_size: Union[int, str]):
         if isinstance(max_size, str):
             self.storage_type = 'RAM'
-            self.size = int(re.split('\d+', max_size)[0])
+            self.size = int(re.split('[a-zA-Z]', max_size)[0])
         else:
             self.storage_type = 'ITEMS'
             self.size = max_size

From 0157fc4d5de31118cc21775e0eaa4f5c2568cab1 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 12:23:34 -0400
Subject: [PATCH 16/34] further debugging

---
 mesmerize_core/caiman_extensions/cache.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 807eaa7..bbdfe29 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -39,8 +39,7 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
         if cache_size is None:
             self.size = 1
             self.storage_type = 'RAM'
-
-        if isinstance(cache_size, int):
+        elif isinstance(cache_size, int):
             self.storage_type = 'ITEMS'
             self.size = cache_size
         else:

From 77d13f7789b7604e9055efccbaf3b4b9545890a9 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Tue, 5 Jul 2022 13:28:17 -0400
Subject: [PATCH 17/34] further changes to cache

---
 mesmerize_core/caiman_extensions/cache.py | 40 +++++++++++------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index bbdfe29..13678da 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -37,14 +37,14 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
             columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
         )
         if cache_size is None:
-            self.size = 1
+            self.size = '1G'
             self.storage_type = 'RAM'
         elif isinstance(cache_size, int):
             self.storage_type = 'ITEMS'
             self.size = cache_size
         else:
             self.storage_type = 'RAM'
-            self.size = int(re.split('[a-zA-Z]', cache_size)[0])
+            self.size = cache_size
 
     def get_cache(self):
         print(self.cache)
@@ -56,28 +56,28 @@ def clear_cache(self):
     def set_maxsize(self, max_size: Union[int, str]):
         if isinstance(max_size, str):
             self.storage_type = 'RAM'
-            self.size = int(re.split('[a-zA-Z]', max_size)[0])
+            self.size = max_size
         else:
             self.storage_type = 'ITEMS'
             self.size = max_size
 
-    def _get_cache_size_bytes(self, return_gig=True):
+    def _get_cache_size_bytes(self):
         """Returns in GiB or MB"""
         cache_size = 0
         for i in range(len(self.cache.index)):
             if isinstance(self.cache.iloc[i, 4], np.ndarray):
-                cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i,4].itemsize)
+                cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i, 4].itemsize)
             elif isinstance(self.cache.iloc[i, 4], Tuple):
-                cache_size += (self.cache.iloc[i, 4][0].size * self.cache.iloc[i, 4][0].itemsize) + \
-                              (self.cache.iloc[i, 4][1].size * self.cache.iloc[i, 4][1].itemsize)
+                for array in self.cache.iloc[i, 4]:
+                    cache_size += (array.size * array.itemsize)
             elif isinstance(self.cache.iloc[i, 4], Path):
                 cache_size += 0
             elif isinstance(self.cache.iloc[i, 4], CNMF):
-                cache_size += sys.getsizeof(self.cache.iloc[i, 4].estimates)
+                cache_size += (self.cache.iloc[i, 4].estimates.A.data.nbytes + self.cache.iloc[i, 4].estimates.C.data.nbytes + self.cache.iloc[i, 4].estimates.b.data.nbytes + self.cache.iloc[i, 4].estimates.f.data.nbytes)
             else:
                 cache_size += sys.getsizeof(self.cache.iloc[i, 4])
 
-        if return_gig:
+        if self.size.endswith('G'):
             cache_size = cache_size / 1024**3
         else:
             cache_size = cache_size / 1024**2
@@ -113,7 +113,7 @@ def _use_cache(instance, *args, **kwargs):
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             # if memory type is 'ITEMS': drop the least recently used and then add new item
-            if self.storage_type == 'ITEMS' and len(self.cache.index) == self.size:
+            if self.storage_type == 'ITEMS' and len(self.cache.index) >= self.size:
                 return_val = func(instance, *args, **kwargs)
                 self.cache.drop(
                     index=self.cache.sort_values(
@@ -133,16 +133,7 @@ def _use_cache(instance, *args, **kwargs):
                 ]
                 return self.cache.iloc[len(self.cache.index) - 1, 4]
             # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again
-            elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > self.size:
-                return_val = func(instance, *args, **kwargs)
-                self.cache.loc[len(self.cache.index)] = [
-                    instance._series["uuid"],
-                    func.__name__,
-                    args,
-                    kwargs,
-                    return_val,
-                    time.time(),
-                ]
+            elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > int(re.split('[a-zA-Z]', self.size)[0]):
                 while self._get_cache_size_bytes() > self.size:
                     self.cache.drop(
                         index=self.cache.sort_values(
@@ -152,6 +143,15 @@ def _use_cache(instance, *args, **kwargs):
                         inplace=True,
                     )
                     self.cache = self.cache.reset_index(drop=True)
+                    return_val = func(instance, *args, **kwargs)
+                    self.cache.loc[len(self.cache.index)] = [
+                        instance._series["uuid"],
+                        func.__name__,
+                        args,
+                        kwargs,
+                        return_val,
+                        time.time(),
+                    ]
             # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache
             else:
                 return_val = func(instance, *args, **kwargs)

From ee9b5d563f556017608282fb4b5b96517162dd0f Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Wed, 6 Jul 2022 09:00:11 -0400
Subject: [PATCH 18/34] updates to cache and extensions which extensions use
 cache

---
 mesmerize_core/caiman_extensions/cache.py | 30 +++++++++++++----------
 mesmerize_core/caiman_extensions/cnmf.py  |  3 ---
 mesmerize_core/caiman_extensions/mcorr.py |  1 -
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 13678da..67200f0 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -1,5 +1,6 @@
 from functools import wraps
 from typing import Union, Optional, Tuple
+from builtins import list
 
 import pandas as pd
 import time
@@ -65,11 +66,14 @@ def _get_cache_size_bytes(self):
         """Returns in GiB or MB"""
         cache_size = 0
         for i in range(len(self.cache.index)):
+            if isinstance(self.cache.iloc[i, 4], list):
+                for array in self.cache.iloc[i, 4]:
+                    cache_size += array.data.nbytes
             if isinstance(self.cache.iloc[i, 4], np.ndarray):
-                cache_size += (self.cache.iloc[i, 4].size * self.cache.iloc[i, 4].itemsize)
-            elif isinstance(self.cache.iloc[i, 4], Tuple):
+                cache_size += self.cache.iloc[i, 4].data.nbytes
+            elif isinstance(self.cache.iloc[i, 4], tuple):
                 for array in self.cache.iloc[i, 4]:
-                    cache_size += (array.size * array.itemsize)
+                    cache_size += array.data.nbytes
             elif isinstance(self.cache.iloc[i, 4], Path):
                 cache_size += 0
             elif isinstance(self.cache.iloc[i, 4], CNMF):
@@ -79,7 +83,7 @@ def _get_cache_size_bytes(self):
 
         if self.size.endswith('G'):
             cache_size = cache_size / 1024**3
-        else:
+        elif self.size.endswith('M'):
             cache_size = cache_size / 1024**2
         return cache_size
 
@@ -143,15 +147,15 @@ def _use_cache(instance, *args, **kwargs):
                         inplace=True,
                     )
                     self.cache = self.cache.reset_index(drop=True)
-                    return_val = func(instance, *args, **kwargs)
-                    self.cache.loc[len(self.cache.index)] = [
-                        instance._series["uuid"],
-                        func.__name__,
-                        args,
-                        kwargs,
-                        return_val,
-                        time.time(),
-                    ]
+                return_val = func(instance, *args, **kwargs)
+                self.cache.loc[len(self.cache.index)] = [
+                    instance._series["uuid"],
+                    func.__name__,
+                    args,
+                    kwargs,
+                    return_val,
+                    time.time(),
+                ]
             # no matter the storage type if size is not going to be exceeded for either, then item can just be added to cache
             else:
                 return_val = func(instance, *args, **kwargs)
diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index b0ed5e1..cfda76a 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -25,7 +25,6 @@ def __init__(self, s: pd.Series):
         self._series = s
 
     @validate("cnmf")
-    @cache.use_cache
     def get_cnmf_memmap(self) -> np.ndarray:
         """
         Get the CNMF memmap
@@ -41,7 +40,6 @@ def get_cnmf_memmap(self) -> np.ndarray:
         images = np.reshape(Yr.T, [T] + list(dims), order="F")
         return images
 
-    @cache.use_cache
     def get_input_memmap(self) -> np.ndarray:
         """
         Return the F-order memmap if the input to the
@@ -65,7 +63,6 @@ def get_input_memmap(self) -> np.ndarray:
 
     # TODO: Cache this globally so that a common upper cache limit is valid for ALL batch items
     @validate("cnmf")
-    @cache.use_cache
     def get_output_path(self) -> Path:
         """
         Returns
diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py
index c46e412..fa14418 100644
--- a/mesmerize_core/caiman_extensions/mcorr.py
+++ b/mesmerize_core/caiman_extensions/mcorr.py
@@ -21,7 +21,6 @@ def __init__(self, s: pd.Series):
         self._series = s
 
     @validate("mcorr")
-    @cache.use_cache
     def get_output_path(self) -> Path:
         """
         Get the path to the motion corrected output memmap file

From cc68e2f747b3baa0b5d6224bb0fce2282d9333ab Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Fri, 8 Jul 2022 15:22:14 -0400
Subject: [PATCH 19/34] final changes to cache, tests should pass except for
 linter

---
 mesmerize_core/caiman_extensions/cache.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 67200f0..2a6e8e7 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -1,12 +1,10 @@
 from functools import wraps
-from typing import Union, Optional, Tuple
-from builtins import list
+from typing import Union, Optional
 
 import pandas as pd
 import time
 import numpy as np
 import sys
-from pathlib import Path
 from caiman.source_extraction.cnmf import CNMF
 import re
 
@@ -21,6 +19,8 @@ def _check_arg_equality(args, cache_args):
 
 
 def _check_args_equality(args, cache_args):
+    if len(args) != len(cache_args):
+        return False
     equality = list()
     if isinstance(args, tuple):
         for arg, cache_arg in zip(args, cache_args):
@@ -66,16 +66,12 @@ def _get_cache_size_bytes(self):
         """Returns in GiB or MB"""
         cache_size = 0
         for i in range(len(self.cache.index)):
-            if isinstance(self.cache.iloc[i, 4], list):
-                for array in self.cache.iloc[i, 4]:
-                    cache_size += array.data.nbytes
             if isinstance(self.cache.iloc[i, 4], np.ndarray):
                 cache_size += self.cache.iloc[i, 4].data.nbytes
             elif isinstance(self.cache.iloc[i, 4], tuple):
-                for array in self.cache.iloc[i, 4]:
-                    cache_size += array.data.nbytes
-            elif isinstance(self.cache.iloc[i, 4], Path):
-                cache_size += 0
+                for lists in self.cache.iloc[i, 4]:
+                    for array in lists:
+                        cache_size += array.data.nbytes
             elif isinstance(self.cache.iloc[i, 4], CNMF):
                 cache_size += (self.cache.iloc[i, 4].estimates.A.data.nbytes + self.cache.iloc[i, 4].estimates.C.data.nbytes + self.cache.iloc[i, 4].estimates.b.data.nbytes + self.cache.iloc[i, 4].estimates.f.data.nbytes)
             else:
@@ -102,6 +98,7 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
+                return return_val
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):

From 398d952eecc3eccca907b955662b6a08e985e271 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Fri, 8 Jul 2022 16:15:35 -0400
Subject: [PATCH 20/34] kushal requested changes

---
 mesmerize_core/caiman_extensions/cache.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 2a6e8e7..6511768 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -68,7 +68,7 @@ def _get_cache_size_bytes(self):
         for i in range(len(self.cache.index)):
             if isinstance(self.cache.iloc[i, 4], np.ndarray):
                 cache_size += self.cache.iloc[i, 4].data.nbytes
-            elif isinstance(self.cache.iloc[i, 4], tuple):
+            elif isinstance(self.cache.iloc[i, 4], (tuple, list)):
                 for lists in self.cache.iloc[i, 4]:
                     for array in lists:
                         cache_size += array.data.nbytes

From b4bf027937129dd41805a54170a41733634dc478 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Fri, 8 Jul 2022 19:33:05 -0400
Subject: [PATCH 21/34] returning copies from cache for future downstream
 analysis, fixing cnmf object size computation, linter

---
 mesmerize_core/caiman_extensions/cache.py | 51 ++++++++++++++---------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 6511768..e1e1c20 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -7,6 +7,8 @@
 import sys
 from caiman.source_extraction.cnmf import CNMF
 import re
+from sys import getsizeof
+import copy
 
 
 def _check_arg_equality(args, cache_args):
@@ -31,6 +33,10 @@ def _check_args_equality(args, cache_args):
     return all(equality)
 
 
+def _return_wrapper(output):
+    return copy.deepcopy(output)
+
+
 class Cache:
     def __init__(self, cache_size: Optional[Union[int, str]] = None):
         self.cache = pd.DataFrame(
@@ -38,13 +44,13 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
             columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
         )
         if cache_size is None:
-            self.size = '1G'
-            self.storage_type = 'RAM'
+            self.size = "1G"
+            self.storage_type = "RAM"
         elif isinstance(cache_size, int):
-            self.storage_type = 'ITEMS'
+            self.storage_type = "ITEMS"
             self.size = cache_size
         else:
-            self.storage_type = 'RAM'
+            self.storage_type = "RAM"
             self.size = cache_size
 
     def get_cache(self):
@@ -56,10 +62,10 @@ def clear_cache(self):
 
     def set_maxsize(self, max_size: Union[int, str]):
         if isinstance(max_size, str):
-            self.storage_type = 'RAM'
+            self.storage_type = "RAM"
             self.size = max_size
         else:
-            self.storage_type = 'ITEMS'
+            self.storage_type = "ITEMS"
             self.size = max_size
 
     def _get_cache_size_bytes(self):
@@ -73,13 +79,18 @@ def _get_cache_size_bytes(self):
                     for array in lists:
                         cache_size += array.data.nbytes
             elif isinstance(self.cache.iloc[i, 4], CNMF):
-                cache_size += (self.cache.iloc[i, 4].estimates.A.data.nbytes + self.cache.iloc[i, 4].estimates.C.data.nbytes + self.cache.iloc[i, 4].estimates.b.data.nbytes + self.cache.iloc[i, 4].estimates.f.data.nbytes)
+                sizes = list()
+                for attr in self.cache.iloc[i, 4].estimates.__dict__.values():
+                    if isinstance(attr, np.ndarray):
+                        sizes.append(attr.data.nbytes)
+                    else:
+                        sizes.append(getsizeof(attr))
             else:
                 cache_size += sys.getsizeof(self.cache.iloc[i, 4])
 
-        if self.size.endswith('G'):
+        if self.size.endswith("G"):
             cache_size = cache_size / 1024**3
-        elif self.size.endswith('M'):
+        elif self.size.endswith("M"):
             cache_size = cache_size / 1024**2
         return cache_size
 
@@ -98,23 +109,23 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
-                return return_val
+                return _return_wrapper(return_val)
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
                 if (
-                        self.cache.iloc[i, 0] == instance._series["uuid"]
-                        and self.cache.iloc[i, 1] == func.__name__
-                        and _check_args_equality(args, self.cache.iloc[i, 2])
-                        and _check_arg_equality(kwargs, self.cache.iloc[i, 3])
+                    self.cache.iloc[i, 0] == instance._series["uuid"]
+                    and self.cache.iloc[i, 1] == func.__name__
+                    and _check_args_equality(args, self.cache.iloc[i, 2])
+                    and _check_arg_equality(kwargs, self.cache.iloc[i, 3])
                 ):
                     self.cache.iloc[i, 5] = time.time()
                     return_val = self.cache.iloc[i, 4]
-                    return self.cache.iloc[i, 4]
+                    return _return_wrapper(self.cache.iloc[i, 4])
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             # if memory type is 'ITEMS': drop the least recently used and then add new item
-            if self.storage_type == 'ITEMS' and len(self.cache.index) >= self.size:
+            if self.storage_type == "ITEMS" and len(self.cache.index) >= self.size:
                 return_val = func(instance, *args, **kwargs)
                 self.cache.drop(
                     index=self.cache.sort_values(
@@ -132,9 +143,11 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
-                return self.cache.iloc[len(self.cache.index) - 1, 4]
+                return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4])
             # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again
-            elif self.storage_type == 'RAM' and self._get_cache_size_bytes() > int(re.split('[a-zA-Z]', self.size)[0]):
+            elif self.storage_type == "RAM" and self._get_cache_size_bytes() > int(
+                re.split("[a-zA-Z]", self.size)[0]
+            ):
                 while self._get_cache_size_bytes() > self.size:
                     self.cache.drop(
                         index=self.cache.sort_values(
@@ -165,6 +178,6 @@ def _use_cache(instance, *args, **kwargs):
                     time.time(),
                 ]
 
-            return return_val
+            return _return_wrapper(return_val)
 
         return _use_cache

From 0da8b552579c28308f0b5725178eca44ee645f54 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sat, 9 Jul 2022 08:18:56 -0400
Subject: [PATCH 22/34] updates to returning a copy or original of extension
 outputs

---
 mesmerize_core/caiman_extensions/cache.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index e1e1c20..5c231b3 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -33,8 +33,11 @@ def _check_args_equality(args, cache_args):
     return all(equality)
 
 
-def _return_wrapper(output):
-    return copy.deepcopy(output)
+def _return_wrapper(output, copy_bool):
+    if copy_bool == True:
+        return copy.deepcopy(output)
+    else:
+        return output
 
 
 class Cache:
@@ -97,6 +100,10 @@ def _get_cache_size_bytes(self):
     def use_cache(self, func):
         @wraps(func)
         def _use_cache(instance, *args, **kwargs):
+            if "copy" in kwargs.keys():
+                return_copy = kwargs["copy"]
+            else:
+                return_copy = True
 
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
@@ -109,7 +116,7 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
-                return _return_wrapper(return_val)
+                return _return_wrapper(return_val, copy_bool=return_copy)
 
             # checking to see if there is a cache hit
             for i in range(len(self.cache.index)):
@@ -121,7 +128,7 @@ def _use_cache(instance, *args, **kwargs):
                 ):
                     self.cache.iloc[i, 5] = time.time()
                     return_val = self.cache.iloc[i, 4]
-                    return _return_wrapper(self.cache.iloc[i, 4])
+                    return _return_wrapper(self.cache.iloc[i, 4], copy_bool=return_copy)
 
             # no cache hit, must check cache limit, and if limit is going to be exceeded...remove least recently used and add new entry
             # if memory type is 'ITEMS': drop the least recently used and then add new item
@@ -143,7 +150,7 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
-                return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4])
+                return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy)
             # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again
             elif self.storage_type == "RAM" and self._get_cache_size_bytes() > int(
                 re.split("[a-zA-Z]", self.size)[0]
@@ -178,6 +185,6 @@ def _use_cache(instance, *args, **kwargs):
                     time.time(),
                 ]
 
-            return _return_wrapper(return_val)
+            return _return_wrapper(return_val, copy_bool=return_copy)
 
         return _use_cache

From e0e80defdd9496bb7bfbb124ec71701c96b69d0d Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sat, 9 Jul 2022 14:41:46 -0400
Subject: [PATCH 23/34] final changes to cache, need to write tests still

---
 mesmerize_core/caiman_extensions/cache.py | 31 ++++++++++-------------
 1 file changed, 13 insertions(+), 18 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 5c231b3..8ba0d93 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -46,27 +46,28 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
             data=None,
             columns=["uuid", "function", "args", "kwargs", "return_val", "time_stamp"],
         )
-        if cache_size is None:
-            self.size = "1G"
-            self.storage_type = "RAM"
-        elif isinstance(cache_size, int):
-            self.storage_type = "ITEMS"
-            self.size = cache_size
-        else:
-            self.storage_type = "RAM"
-            self.size = cache_size
+        self.set_maxsize(cache_size)
 
     def get_cache(self):
         print(self.cache)
 
+    def get_cache2(self):
+        return self.cache
+
     def clear_cache(self):
         while len(self.cache.index) != 0:
             self.cache.drop(index=self.cache.index[-1], axis=0, inplace=True)
 
     def set_maxsize(self, max_size: Union[int, str]):
-        if isinstance(max_size, str):
+        if max_size is None:
             self.storage_type = "RAM"
-            self.size = max_size
+            self.size = 1024**3
+        elif isinstance(max_size, str):
+            self.storage_type = "RAM"
+            if max_size.endswith("G"):
+                self.size = int(max_size[:-1]) * 1024**3
+            elif max_size.endswith("M"):
+                self.size = int(max_size[:-1]) * 1024**2
         else:
             self.storage_type = "ITEMS"
             self.size = max_size
@@ -91,10 +92,6 @@ def _get_cache_size_bytes(self):
             else:
                 cache_size += sys.getsizeof(self.cache.iloc[i, 4])
 
-        if self.size.endswith("G"):
-            cache_size = cache_size / 1024**3
-        elif self.size.endswith("M"):
-            cache_size = cache_size / 1024**2
         return cache_size
 
     def use_cache(self, func):
@@ -152,9 +149,7 @@ def _use_cache(instance, *args, **kwargs):
                 ]
                 return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy)
             # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again
-            elif self.storage_type == "RAM" and self._get_cache_size_bytes() > int(
-                re.split("[a-zA-Z]", self.size)[0]
-            ):
+            elif self.storage_type == "RAM":
                 while self._get_cache_size_bytes() > self.size:
                     self.cache.drop(
                         index=self.cache.sort_values(

From 34d32136eb00e99c685b979ee18014a08f56e2db Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sat, 9 Jul 2022 15:29:24 -0400
Subject: [PATCH 24/34] removing get_cache2()

---
 mesmerize_core/caiman_extensions/cache.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index 8ba0d93..adada39 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -49,9 +49,6 @@ def __init__(self, cache_size: Optional[Union[int, str]] = None):
         self.set_maxsize(cache_size)
 
     def get_cache(self):
-        print(self.cache)
-
-    def get_cache2(self):
         return self.cache
 
     def clear_cache(self):
@@ -73,7 +70,7 @@ def set_maxsize(self, max_size: Union[int, str]):
             self.size = max_size
 
     def _get_cache_size_bytes(self):
-        """Returns in GiB or MB"""
+        """Returns in bytes"""
         cache_size = 0
         for i in range(len(self.cache.index)):
             if isinstance(self.cache.iloc[i, 4], np.ndarray):
@@ -147,7 +144,9 @@ def _use_cache(instance, *args, **kwargs):
                     return_val,
                     time.time(),
                 ]
-                return _return_wrapper(self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy)
+                return _return_wrapper(
+                    self.cache.iloc[len(self.cache.index) - 1, 4], copy_bool=return_copy
+                )
             # if memory type is 'RAM': add new item and then remove least recently used items until cache is under correct size again
             elif self.storage_type == "RAM":
                 while self._get_cache_size_bytes() > self.size:

From 8de3b151f9f2c13ea66059e3a8e487772b0226f2 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sat, 9 Jul 2022 15:30:59 -0400
Subject: [PATCH 25/34] setting default copy = true

---
 mesmerize_core/caiman_extensions/cnmf.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index cfda76a..053125b 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -74,7 +74,7 @@ def get_output_path(self) -> Path:
 
     @validate("cnmf")
     @cache.use_cache
-    def get_output(self) -> CNMF:
+    def get_output(self, copy=True) -> CNMF:
         """
         Returns
         -------
@@ -89,7 +89,7 @@ def get_output(self) -> CNMF:
     @validate("cnmf")
     @cache.use_cache
     def get_spatial_masks(
-        self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01
+        self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, copy=True
     ) -> np.ndarray:
         """
         Get binary masks of the spatial components at the given `ixs`
@@ -156,7 +156,7 @@ def _get_spatial_contours(
     @validate("cnmf")
     @cache.use_cache
     def get_spatial_contours(
-        self, ixs_components: Optional[np.ndarray] = None
+        self, ixs_components: Optional[np.ndarray] = None, copy=True
     ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         """
         Get the contour and center of mass for each spatial footprint
@@ -190,7 +190,7 @@ def get_spatial_contours(
     @validate("cnmf")
     @cache.use_cache
     def get_temporal_components(
-        self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False
+        self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, copy=True
     ) -> np.ndarray:
         """
         Get the temporal components for this CNMF item

From 62755a7183170f4c4d7808b65347f5184423da79 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sat, 9 Jul 2022 15:31:53 -0400
Subject: [PATCH 26/34] adding tests for cache, still need to debug

---
 tests/test_core.py | 129 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index e54e5ff..33bfc30 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1,5 +1,6 @@
 import os
 
+import numpy as np
 from caiman.utils.utils import load_dict_from_hdf5
 from caiman.source_extraction.cnmf import cnmf
 import numpy.testing
@@ -24,6 +25,7 @@
 import shutil
 from zipfile import ZipFile
 from pprint import pprint
+from mesmerize_core.caiman_extensions import mcorr, cnmf
 
 tmp_dir = Path(os.path.dirname(os.path.abspath(__file__)), "tmp")
 vid_dir = Path(os.path.dirname(os.path.abspath(__file__)), "videos")
@@ -1036,3 +1038,130 @@ def test_remove_item():
     assert df.isin([f"test-{algo}"]).any().any() == False
     assert df.isin([f"test1-{algo}"]).any().any() == False
     assert df.empty == True
+
+def test_cache():
+    set_parent_raw_data_path(vid_dir)
+    algo = "mcorr"
+
+    df, batch_path = _create_tmp_batch()
+
+    batch_path = Path(batch_path)
+    batch_dir = batch_path.parent
+
+    input_movie_path = get_datafile(algo)
+    print(input_movie_path)
+
+    df.caiman.add_item(
+        algo=algo,
+        name=f"test-{algo}",
+        input_movie_path=input_movie_path,
+        params=test_params[algo],
+    )
+
+    assert df.iloc[-1]["algo"] == algo
+    assert df.iloc[-1]["name"] == f"test-{algo}"
+    assert df.iloc[-1]["params"] == test_params[algo]
+    assert df.iloc[-1]["outputs"] is None
+    try:
+        UUID(df.iloc[-1]["uuid"])
+    except:
+        pytest.fail("Something wrong with setting UUID for batch items")
+
+    assert vid_dir.joinpath(df.iloc[-1]["input_movie_path"]) == vid_dir.joinpath(
+        f"{algo}.tif"
+    )
+
+    process = df.iloc[-1].caiman.run()
+    process.wait()
+
+    df = load_batch(batch_path)
+
+    with pd.option_context("display.max_rows", None, "display.max_columns", None):
+        print(df)
+
+    pprint(df.iloc[-1]["outputs"], width=-1)
+    print(df.iloc[-1]["outputs"]["traceback"])
+    assert df.iloc[-1]["outputs"]["success"] is True
+    assert df.iloc[-1]["outputs"]["traceback"] is None
+
+    assert (
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]),
+        f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
+    )
+    )
+
+    algo = "cnmf"
+    print("Testing cnmf")
+    input_movie_path = df.iloc[-1].mcorr.get_output_path()
+    df.caiman.add_item(
+        algo=algo,
+        name=f"test-{algo}",
+        input_movie_path=input_movie_path,
+        params=test_params[algo],
+    )
+
+    assert df.iloc[-1]["algo"] == algo
+    assert df.iloc[-1]["name"] == f"test-{algo}"
+    assert df.iloc[-1]["params"] == test_params[algo]
+    assert df.iloc[-1]["outputs"] is None
+    try:
+        UUID(df.iloc[-1]["uuid"])
+    except:
+        pytest.fail("Something wrong with setting UUID for batch items")
+    print("cnmf input_movie_path:", df.iloc[-1]["input_movie_path"])
+    assert batch_dir.joinpath(df.iloc[-1]["input_movie_path"]) == input_movie_path
+
+    process = df.iloc[-1].caiman.run()
+    process.wait()
+
+    df = load_batch(batch_path)
+
+    with pd.option_context("display.max_rows", None, "display.max_columns", None):
+        print(df)
+
+    pprint(df.iloc[-1]["outputs"], width=-1)
+    print(df.iloc[-1]["outputs"]["traceback"])
+
+    # test that cache values are returned when calls are made to same function
+
+    # testing that cache size limits work
+    cnmf.cache.set_maxsize("1M")
+    cnmf_output = df.iloc[-1].cnmf.get_output()
+    hex_get_output = hex(id(cnmf_output))
+    cache = cnmf.cache.get_cache()
+    hex1 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item()))
+    #assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1)
+    #assert(hex_get_output != hex1)
+    time_stamp1 = cache[cache["function"] == "get_output"]["time_stamp"].item()
+    df.iloc[-1].cnmf.get_temporal_components()
+    df.iloc[-1].cnmf.get_spatial_contours()
+    df.iloc[-1].cnmf.get_spatial_masks()
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(7))
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(8))
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(9))
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(6))
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(5))
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(4))
+    df.iloc[-1].cnmf.get_temporal_components(np.arange(3))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(8))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(9))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(7))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(6))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(5))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(4))
+    df.iloc[-1].cnmf.get_spatial_masks(np.arange(3))
+    time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item()
+    hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item()))
+    assert(cache[cache["function"] == "get_output"].index.size == 1)
+    assert(len(cnmf.cache.get_cache().index) == 17)
+    assert(time_stamp2 > time_stamp1)
+    assert(hex1 == hex2)
+
+
+
+
+
+

From 7ee7df8ebd6a4a1931cdb1be28e8966416ec85db Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Wed, 13 Jul 2022 10:17:49 -0400
Subject: [PATCH 27/34] tests for cache, still need to fix issue with maxsize=0

---
 tests/test_core.py | 337 ++++++++++++++++++++++++---------------------
 1 file changed, 181 insertions(+), 156 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index 33bfc30..ec07fb6 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -169,34 +169,34 @@ def test_mcorr():
 
     # test that batch path is propagated to pd.Series
     assert (
-        df.attrs["batch_path"]
-        == df.paths.get_batch_path()
-        == df.iloc[-1].paths.get_batch_path()
-        == df.iloc[-1].attrs["batch_path"]
+            df.attrs["batch_path"]
+            == df.paths.get_batch_path()
+            == df.iloc[-1].paths.get_batch_path()
+            == df.iloc[-1].attrs["batch_path"]
     )
 
     # test that path resolve works for parent_raw_dir
     rel_input_movie_path = input_movie_path.relative_to(vid_dir)
     assert (
-        df.paths.resolve(rel_input_movie_path)
-        == df.iloc[-1].paths.resolve(rel_input_movie_path)
-        == input_movie_path
+            df.paths.resolve(rel_input_movie_path)
+            == df.iloc[-1].paths.resolve(rel_input_movie_path)
+            == input_movie_path
     )
     # test that path splitting works for parent_raw_dir
     split = (vid_dir, input_movie_path.relative_to(vid_dir))
     assert (
-        df.paths.split(input_movie_path)
-        == df.iloc[-1].paths.split(input_movie_path)
-        == split
+            df.paths.split(input_movie_path)
+            == df.iloc[-1].paths.split(input_movie_path)
+            == split
     )
     # test that the input_movie_path in the DataFrame rows are relative
     assert Path(df.iloc[-1]["input_movie_path"]) == split[1]
 
     assert (
-        get_full_raw_data_path(df.iloc[-1]["input_movie_path"])
-        == vid_dir.joinpath(f"{algo}.tif")
-        == vid_dir.joinpath(df.iloc[-1]["input_movie_path"])
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            get_full_raw_data_path(df.iloc[-1]["input_movie_path"])
+            == vid_dir.joinpath(f"{algo}.tif")
+            == vid_dir.joinpath(df.iloc[-1]["input_movie_path"])
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     process = df.iloc[-1].caiman.run()
@@ -219,76 +219,76 @@ def test_mcorr():
     )
     rel_mcorr_memmap_path = mcorr_memmap_path.relative_to(batch_dir)
     assert (
-        df.paths.resolve(rel_mcorr_memmap_path)
-        == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path)
-        == mcorr_memmap_path
+            df.paths.resolve(rel_mcorr_memmap_path)
+            == df.iloc[-1].paths.resolve(rel_mcorr_memmap_path)
+            == mcorr_memmap_path
     )
     # test that path splitting works for batch_dir
     split = (batch_dir, mcorr_memmap_path.relative_to(batch_dir))
     assert (
-        df.paths.split(mcorr_memmap_path)
-        == df.iloc[-1].paths.split(mcorr_memmap_path)
-        == split
+            df.paths.split(mcorr_memmap_path)
+            == df.iloc[-1].paths.split(mcorr_memmap_path)
+            == split
     )
 
     assert (
-        input_movie_path
-        == df.iloc[-1].caiman.get_input_movie_path()
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            input_movie_path
+            == df.iloc[-1].caiman.get_input_movie_path()
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     # test to check mmap output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]),
-            f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]),
+        f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
+    )
     )
 
     # test to check mean-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy'
+    )
     )
 
     # test to check std-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy'
+    )
     )
 
     # test to check max-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy'
+    )
     )
 
     # test to check correlation image output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"])
-        == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy')
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"])
+            == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy')
     )
 
     # test to check mcorr get_output_path()
     assert (
-        df.iloc[-1].mcorr.get_output_path()
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]),
-            f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
-        )
-        == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            df.iloc[-1].mcorr.get_output_path()
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]),
+        f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
+    )
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
     )
 
     # test to check mcorr get_output()
@@ -374,12 +374,12 @@ def test_cnmf():
     assert df.iloc[-1]["outputs"]["traceback"] is None
 
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]),
-            f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mcorr-output-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]),
+        f'{df.iloc[-1]["uuid"]}-mcorr_els__d1_60_d2_80_d3_1_order_F_frames_2000_.mmap',
+    )
     )
 
     algo = "cnmf"
@@ -419,59 +419,59 @@ def test_cnmf():
     assert df.iloc[-1]["outputs"]["traceback"] is None
 
     assert (
-        input_movie_path
-        == df.iloc[-1].caiman.get_input_movie_path()
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            input_movie_path
+            == df.iloc[-1].caiman.get_input_movie_path()
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     assert (
-        batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5')
-        == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
-        == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
+            batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5')
+            == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
+            == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
     )
 
     # test to check mmap output path
     assert (
-        batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]),
-            f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_60_d2_80_d3_1_order_C_frames_2000_.mmap',
-        )
-        == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
-        == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
+            batch_dir.joinpath(
+                str(df.iloc[-1]["uuid"]),
+                f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_60_d2_80_d3_1_order_C_frames_2000_.mmap',
+            )
+            == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
+            == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
     )
 
     # test to check mean-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy'
+    )
     )
 
     # test to check std-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy'
+    )
     )
 
     # test to check max-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy'
+    )
     )
 
     # test to check correlation image output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"])
-        == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy')
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"])
+            == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy')
     )
 
     print("testing cnmf.get_cnmf_memmap()")
@@ -495,10 +495,10 @@ def test_cnmf():
 
     # test to check cnmf get_output_path()
     assert (
-        df.iloc[-1].cnmf.get_output_path()
-        == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
-        == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5')
+            df.iloc[-1].cnmf.get_output_path()
+            == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
+            == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}.hdf5')
     )
 
     # test to check cnmf get_output()
@@ -526,11 +526,11 @@ def test_cnmf():
         allow_pickle=True,
     )
     for contour, actual_contour in zip(
-        cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual
+            cnmf_spatial_contours_contours, cnmf_spatial_contours_contours_actual
     ):
         numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10)
     for com, actual_com in zip(
-        cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual
+            cnmf_spatial_contours_coms, cnmf_spatial_contours_coms_actual
     ):
         numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10)
 
@@ -554,9 +554,9 @@ def test_cnmf():
 
     # test to check caiman get_input_movie_path(), should be output of previous mcorr
     assert (
-        df.iloc[-1].caiman.get_input_movie_path()
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
-        == batch_dir.joinpath(df.iloc[-1]["input_movie_path"])
+            df.iloc[-1].caiman.get_input_movie_path()
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            == batch_dir.joinpath(df.iloc[-1]["input_movie_path"])
     )
 
     # test to check caiman get_correlation_img()
@@ -606,7 +606,7 @@ def test_cnmf():
         allow_pickle=True,
     )
     for contour, actual_contour in zip(
-        ixs_contours_contours, ixs_contours_contours_actual
+            ixs_contours_contours, ixs_contours_contours_actual
     ):
         numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10)
     for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual):
@@ -677,9 +677,9 @@ def test_cnmfe():
         pytest.fail("Something wrong with setting UUID for batch items")
 
     assert (
-        batch_dir.joinpath(df.iloc[-1]["input_movie_path"])
-        == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path())
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            batch_dir.joinpath(df.iloc[-1]["input_movie_path"])
+            == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path())
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     process = df.iloc[-1].caiman.run()
@@ -698,9 +698,9 @@ def test_cnmfe():
     assert df.iloc[-1]["outputs"]["traceback"] is None
 
     assert (
-        input_movie_path
-        == df.iloc[-1].caiman.get_input_movie_path()
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            input_movie_path
+            == df.iloc[-1].caiman.get_input_movie_path()
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     assert batch_dir.joinpath(
@@ -755,9 +755,9 @@ def test_cnmfe():
         pytest.fail("Something wrong with setting UUID for batch items")
 
     assert (
-        batch_dir.joinpath(df.iloc[-1]["input_movie_path"])
-        == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path())
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            batch_dir.joinpath(df.iloc[-1]["input_movie_path"])
+            == batch_dir.joinpath(df.iloc[0].mcorr.get_output_path())
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     process = df.iloc[-1].caiman.run()
@@ -776,9 +776,9 @@ def test_cnmfe():
     assert df.iloc[-1]["outputs"]["traceback"] is None
 
     assert (
-        input_movie_path
-        == df.iloc[-1].caiman.get_input_movie_path()
-        == df.paths.resolve(df.iloc[-1]["input_movie_path"])
+            input_movie_path
+            == df.iloc[-1].caiman.get_input_movie_path()
+            == df.paths.resolve(df.iloc[-1]["input_movie_path"])
     )
 
     assert batch_dir.joinpath(
@@ -787,53 +787,53 @@ def test_cnmfe():
 
     # test to check mmap output path
     assert (
-        batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]),
-            f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_128_d2_128_d3_1_order_C_frames_1000_.mmap',
-        )
-        == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
-        == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
+            batch_dir.joinpath(
+                str(df.iloc[-1]["uuid"]),
+                f'{df.iloc[-1]["uuid"]}_cnmf-memmap__d1_128_d2_128_d3_1_order_C_frames_1000_.mmap',
+            )
+            == df.paths.resolve(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
+            == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-memmap-path"])
     )
 
     # test to check mean-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["mean-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["mean-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_mean_projection.npy'
+    )
     )
 
     # test to check std-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["std-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["std-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_std_projection.npy'
+    )
     )
 
     # test to check max-projection output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"])
-        == batch_dir.joinpath(
-            str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy'
-        )
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["max-projection-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["max-projection-path"])
+            == batch_dir.joinpath(
+        str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_max_projection.npy'
+    )
     )
 
     # test to check correlation image output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"])
-        == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy')
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["corr-img-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["corr-img-path"])
+            == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_cn.npy')
     )
 
     # test to check pnr image output path
     assert (
-        batch_dir.joinpath(df.iloc[-1]["outputs"]["pnr-image-path"])
-        == df.paths.resolve(df.iloc[-1]["outputs"]["pnr-image-path"])
-        == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_pn.npy')
+            batch_dir.joinpath(df.iloc[-1]["outputs"]["pnr-image-path"])
+            == df.paths.resolve(df.iloc[-1]["outputs"]["pnr-image-path"])
+            == batch_dir.joinpath(str(df.iloc[-1]["uuid"]), f'{df.iloc[-1]["uuid"]}_pn.npy')
     )
 
     # extension tests - full
@@ -857,9 +857,9 @@ def test_cnmfe():
 
     # test to check cnmf get_output_path()
     assert (
-        df.iloc[-1].cnmf.get_output_path()
-        == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
-        == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
+            df.iloc[-1].cnmf.get_output_path()
+            == batch_dir.joinpath(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
+            == df.iloc[-1].paths.resolve(df.iloc[-1]["outputs"]["cnmf-hdf5-path"])
     )
 
     # test to check cnmf get_output()
@@ -887,11 +887,11 @@ def test_cnmfe():
         allow_pickle=True,
     )
     for contour, actual_contour in zip(
-        cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual
+            cnmfe_spatial_contours_contours, cnmfe_spatial_contours_contours_actual
     ):
         numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10)
     for com, actual_com in zip(
-        cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual
+            cnmfe_spatial_contours_coms, cnmfe_spatial_contours_coms_actual
     ):
         numpy.testing.assert_allclose(com, actual_com, rtol=1e-2, atol=1e-10)
 
@@ -946,7 +946,7 @@ def test_cnmfe():
         allow_pickle=True,
     )
     for contour, actual_contour in zip(
-        ixs_contours_contours, ixs_contours_contours_actual
+            ixs_contours_contours, ixs_contours_contours_actual
     ):
         numpy.testing.assert_allclose(contour, actual_contour, rtol=1e-2, atol=1e-10)
     for com, actual_com in zip(ixs_contours_coms, ixs_contours_coms_actual):
@@ -1000,9 +1000,9 @@ def test_remove_item():
         pytest.fail("Something wrong with setting UUID for batch items")
 
     assert (
-        get_full_raw_data_path(df.iloc[-1]["input_movie_path"])
-        == vid_dir.joinpath(f"{algo}.tif")
-        == vid_dir.joinpath(df.iloc[-1]["input_movie_path"])
+            get_full_raw_data_path(df.iloc[-1]["input_movie_path"])
+            == vid_dir.joinpath(f"{algo}.tif")
+            == vid_dir.joinpath(df.iloc[-1]["input_movie_path"])
     )
 
     df.caiman.add_item(
@@ -1022,9 +1022,9 @@ def test_remove_item():
         pytest.fail("Something wrong with setting UUID for batch items")
 
     assert (
-        get_full_raw_data_path(df.iloc[-1]["input_movie_path"])
-        == vid_dir.joinpath(f"{algo}.tif")
-        == vid_dir.joinpath(df.iloc[-1]["input_movie_path"])
+            get_full_raw_data_path(df.iloc[-1]["input_movie_path"])
+            == vid_dir.joinpath(f"{algo}.tif")
+            == vid_dir.joinpath(df.iloc[-1]["input_movie_path"])
     )
     # Check removing specific rows works
     assert df.iloc[0]["name"] == f"test-{algo}"
@@ -1039,6 +1039,7 @@ def test_remove_item():
     assert df.isin([f"test1-{algo}"]).any().any() == False
     assert df.empty == True
 
+
 def test_cache():
     set_parent_raw_data_path(vid_dir)
     algo = "mcorr"
@@ -1133,8 +1134,8 @@ def test_cache():
     hex_get_output = hex(id(cnmf_output))
     cache = cnmf.cache.get_cache()
     hex1 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item()))
-    #assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1)
-    #assert(hex_get_output != hex1)
+    # assert(hex(id(df.iloc[-1].cnmf.get_output(copy=False))) == hex1)
+    # assert(hex_get_output != hex1)
     time_stamp1 = cache[cache["function"] == "get_output"]["time_stamp"].item()
     df.iloc[-1].cnmf.get_temporal_components()
     df.iloc[-1].cnmf.get_spatial_contours()
@@ -1155,12 +1156,36 @@ def test_cache():
     df.iloc[-1].cnmf.get_spatial_masks(np.arange(3))
     time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item()
     hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item()))
-    assert(cache[cache["function"] == "get_output"].index.size == 1)
-    assert(len(cnmf.cache.get_cache().index) == 17)
-    assert(time_stamp2 > time_stamp1)
-    assert(hex1 == hex2)
+    assert (cache[cache["function"] == "get_output"].index.size == 1)
+    assert (len(cnmf.cache.get_cache().index) == 17)
+    assert (time_stamp2 > time_stamp1)
+    assert (hex1 == hex2)
+
+    # test clear_cache()
+    cnmf.cache.clear_cache()
+    assert (len(cnmf.cache.get_cache().index) == 0)
+
+    import time
+
+    start = time.time()
+    df.iloc[-1].cnmf.get_output()
+    end = time.time()
+    assert (len(cnmf.cache.get_cache().index) == 1)
+
+    start2 = time.time()
+    df.iloc[-1].cnmf.get_output()
+    end2 = time.time()
+
+    assert(end2-start2 < end-start)
 
+    # test setting maxsize as 0
+    cnmf.cache.clear_cache()
+    assert (len(cnmf.cache.get_cache().index) == 0)
 
+    cnmf.cache.set_maxsize(0)
+    df.iloc[-1].cnmf.get_output()
+    print(cnmf.cache.get_cache())
+    # assert (len(cnmf.cache.get_cache().index) == 0)
 
 
 

From 2d53a51923ac90fbcc604d0112f6402571a2b699 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Wed, 13 Jul 2022 14:25:53 -0400
Subject: [PATCH 28/34] updating tests and cache to handle when cache is set to
 size 0

---
 mesmerize_core/caiman_extensions/cache.py |  4 ++++
 tests/test_core.py                        | 12 ++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index adada39..b01d69a 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -99,6 +99,10 @@ def _use_cache(instance, *args, **kwargs):
             else:
                 return_copy = True
 
+            if self.size == 0:
+                self.clear_cache()
+                return _return_wrapper(func(instance, *args, **kwargs), return_copy)
+
             # if cache is empty, will always be a cache miss
             if len(self.cache.index) == 0:
                 return_val = func(instance, *args, **kwargs)
diff --git a/tests/test_core.py b/tests/test_core.py
index ec07fb6..e09d754 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1183,9 +1183,17 @@ def test_cache():
     assert (len(cnmf.cache.get_cache().index) == 0)
 
     cnmf.cache.set_maxsize(0)
+    start = time.time()
     df.iloc[-1].cnmf.get_output()
-    print(cnmf.cache.get_cache())
-    # assert (len(cnmf.cache.get_cache().index) == 0)
+    end = time.time()
+    assert (len(cnmf.cache.get_cache().index) == 0)
+
+    start2 = time.time()
+    df.iloc[-1].cnmf.get_output()
+    end2 = time.time()
+    assert (len(cnmf.cache.get_cache().index) == 0)
+    assert(abs((end-start)-(end2-start2)) < 0.01)
+
 
 
 

From 66292165c91c0976cd5e1f9f186918eb3a80f12f Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Fri, 15 Jul 2022 18:29:06 -0400
Subject: [PATCH 29/34] updates to cache tests

---
 tests/test_core.py | 70 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 65 insertions(+), 5 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index e09d754..eb6ed50 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -25,7 +25,8 @@
 import shutil
 from zipfile import ZipFile
 from pprint import pprint
-from mesmerize_core.caiman_extensions import mcorr, cnmf
+from mesmerize_core.caiman_extensions import cnmf
+import time
 
 tmp_dir = Path(os.path.dirname(os.path.abspath(__file__)), "tmp")
 vid_dir = Path(os.path.dirname(os.path.abspath(__file__)), "videos")
@@ -1157,31 +1158,40 @@ def test_cache():
     time_stamp2 = cache[cache["function"] == "get_output"]["time_stamp"].item()
     hex2 = hex(id(cache[cache["function"] == "get_output"]["return_val"].item()))
     assert (cache[cache["function"] == "get_output"].index.size == 1)
+    # after adding enough items for cache to exceed max size, cache should remove least recently used items until
+    # size is back under max
     assert (len(cnmf.cache.get_cache().index) == 17)
+    # the time stamp to get_output the second time should be greater than the original time
+    # stamp because the cached item is being returned and therefore will have been accessed more recently
     assert (time_stamp2 > time_stamp1)
+    # the hex id of the item in the cache when get_output is first called
+    # should be the same hex id of the item in the cache when get_output is called again
     assert (hex1 == hex2)
 
     # test clear_cache()
     cnmf.cache.clear_cache()
     assert (len(cnmf.cache.get_cache().index) == 0)
 
-    import time
-
+    # checking that cache is cleared, checking speed at which item is returned
     start = time.time()
     df.iloc[-1].cnmf.get_output()
     end = time.time()
     assert (len(cnmf.cache.get_cache().index) == 1)
 
+    # second call to item now added to cache, time to return item should be must faster than before because item has
+    # now been cached
     start2 = time.time()
     df.iloc[-1].cnmf.get_output()
     end2 = time.time()
-
     assert(end2-start2 < end-start)
 
-    # test setting maxsize as 0
+    # testing clear_cache() again, length of dataframe should be zero
     cnmf.cache.clear_cache()
     assert (len(cnmf.cache.get_cache().index) == 0)
 
+    # test setting maxsize as 0, should effectively disable the cache...additionally, time to return an item called
+    # twice should roughly be the same because item is not being stored in the cache
+    # cache length should remain zero throughout calls to extension functions
     cnmf.cache.set_maxsize(0)
     start = time.time()
     df.iloc[-1].cnmf.get_output()
@@ -1194,6 +1204,56 @@ def test_cache():
     assert (len(cnmf.cache.get_cache().index) == 0)
     assert(abs((end-start)-(end2-start2)) < 0.01)
 
+    # test to check that separate cache items are being returned for different batch items
+    # must add another item to the batch, running cnmfe
+
+    input_movie_path = get_datafile("cnmfe")
+    print(input_movie_path)
+    df.caiman.add_item(
+        algo="mcorr",
+        name=f"test-cnmfe-mcorr",
+        input_movie_path=input_movie_path,
+        params=test_params["mcorr"],
+    )
+    process = df.iloc[-1].caiman.run()
+    process.wait()
+
+    df = load_batch(batch_path)
+
+    algo = "cnmfe"
+    param_name = "cnmfe_full"
+    input_movie_path = df.iloc[-1].mcorr.get_output_path()
+    print(input_movie_path)
+
+    df.caiman.add_item(
+        algo=algo,
+        name=f"test-{algo}",
+        input_movie_path=input_movie_path,
+        params=test_params[param_name],
+    )
+
+    process = df.iloc[-1].caiman.run()
+    process.wait()
+
+    df = load_batch(batch_path)
+
+    cnmf.cache.set_maxsize("1M")
+
+    df.iloc[1].cnmf.get_output() # cnmf output
+    df.iloc[-1].cnmf.get_output() # cnmfe output
+
+    cache = cnmf.cache.get_cache()
+
+    # checking that both outputs from different batch items are added to the cache
+    assert(len(cache.index) == 2)
+
+    # checking that the uuid of each outputs from the different batch items are not the same
+    assert(cache.iloc[-1]["uuid"] != cache.iloc[-2]["uuid"])
+
+    # checking that the uuid of the output in the cache is the correct uuid of the batch item in the df
+    assert(cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"])
+
+
 
 
 

From a1d30da78a14ec8b162b392a2662711b0d358a54 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sun, 17 Jul 2022 22:05:21 -0400
Subject: [PATCH 30/34] updates to cache tests

---
 mesmerize_core/caiman_extensions/cache.py |  4 ++--
 mesmerize_core/caiman_extensions/cnmf.py  |  8 ++++----
 tests/test_core.py                        | 24 +++++++++++++++++++++++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/cache.py b/mesmerize_core/caiman_extensions/cache.py
index b01d69a..c7814f3 100644
--- a/mesmerize_core/caiman_extensions/cache.py
+++ b/mesmerize_core/caiman_extensions/cache.py
@@ -94,8 +94,8 @@ def _get_cache_size_bytes(self):
     def use_cache(self, func):
         @wraps(func)
         def _use_cache(instance, *args, **kwargs):
-            if "copy" in kwargs.keys():
-                return_copy = kwargs["copy"]
+            if "return_copy" in kwargs.keys():
+                return_copy = kwargs["return_copy"]
             else:
                 return_copy = True
 
diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index 053125b..3c853c3 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -74,7 +74,7 @@ def get_output_path(self) -> Path:
 
     @validate("cnmf")
     @cache.use_cache
-    def get_output(self, copy=True) -> CNMF:
+    def get_output(self, return_copy=True) -> CNMF:
         """
         Returns
         -------
@@ -89,7 +89,7 @@ def get_output(self, copy=True) -> CNMF:
     @validate("cnmf")
     @cache.use_cache
     def get_spatial_masks(
-        self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, copy=True
+        self, ixs_components: Optional[np.ndarray] = None, threshold: float = 0.01, return_copy=True
     ) -> np.ndarray:
         """
         Get binary masks of the spatial components at the given `ixs`
@@ -156,7 +156,7 @@ def _get_spatial_contours(
     @validate("cnmf")
     @cache.use_cache
     def get_spatial_contours(
-        self, ixs_components: Optional[np.ndarray] = None, copy=True
+        self, ixs_components: Optional[np.ndarray] = None, return_copy=True
     ) -> Tuple[List[np.ndarray], List[np.ndarray]]:
         """
         Get the contour and center of mass for each spatial footprint
@@ -190,7 +190,7 @@ def get_spatial_contours(
     @validate("cnmf")
     @cache.use_cache
     def get_temporal_components(
-        self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, copy=True
+        self, ixs_components: Optional[np.ndarray] = None, add_background: bool = False, return_copy=True
     ) -> np.ndarray:
         """
         Get the temporal components for this CNMF item
diff --git a/tests/test_core.py b/tests/test_core.py
index eb6ed50..c756217 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1253,6 +1253,30 @@ def test_cache():
     # checking that the uuid of the output in the cache is the correct uuid of the batch item in the df
     assert(cache.iloc[-1]["uuid"] == df.iloc[-1]["uuid"])
 
+    # call get output from cnmf, check that it is the most recent thing called in the cache
+    df.iloc[1].cnmf.get_output()
+    cnmf_uuid = df.iloc[1]["uuid"]
+    most_recently_called = cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1]
+    cache_uuid = most_recently_called["uuid"]
+    assert(cnmf_uuid == cache_uuid)
+
+    # check to make sure by certain params that it is cnmf vs cnmfe
+    output = df.iloc[1].cnmf.get_output()
+    assert(output.params.patch["low_rank_background"] == True)
+    output2 = df.iloc[-1].cnmf.get_output()
+    assert(output2.params.patch["low_rank_background"] == False)
+    
+    # test for copy
+    # if return_copy=True, then hex id of calls to the same function should be false
+    assert(hex(id(output)) != hex(id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1])))
+    # if return_copy=False, then hex id of calls to the same function should be true
+    df.iloc[1].cnmf.get_output(return_copy=False)
+    df.iloc[1].cnmf.get_output(return_copy=False)
+    output = df.iloc[1].cnmf.get_output(return_copy=False)
+    output2 = df.iloc[1].cnmf.get_output(return_copy=False)
+    assert(hex(id(output)) == hex(id(output2)))
+
+
 
 
 

From 6e3437329b067fc1c5c5c41f1238db3a55aa03a2 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sun, 17 Jul 2022 22:21:27 -0400
Subject: [PATCH 31/34] dumb kushal tests

---
 tests/test_core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_core.py b/tests/test_core.py
index c756217..9aa95d7 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -1268,13 +1268,13 @@ def test_cache():
     
     # test for copy
     # if return_copy=True, then hex id of calls to the same function should be false
+    output = df.iloc[1].cnmf.get_output()
     assert(hex(id(output)) != hex(id(cache.sort_values(by=["time_stamp"], ascending=True).iloc[-1])))
     # if return_copy=False, then hex id of calls to the same function should be true
-    df.iloc[1].cnmf.get_output(return_copy=False)
-    df.iloc[1].cnmf.get_output(return_copy=False)
     output = df.iloc[1].cnmf.get_output(return_copy=False)
     output2 = df.iloc[1].cnmf.get_output(return_copy=False)
     assert(hex(id(output)) == hex(id(output2)))
+    assert(hex(id(cnmf.cache.get_cache().iloc[-1]["return_val"])) == hex(id(output)))
 
 
 

From 0928f24c044d44f974cfb1b615532751250e4d5d Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Sun, 17 Jul 2022 22:26:44 -0400
Subject: [PATCH 32/34] insignificant merge conflict

---
 mesmerize_core/caiman_extensions/cnmf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mesmerize_core/caiman_extensions/cnmf.py b/mesmerize_core/caiman_extensions/cnmf.py
index 3c853c3..e29caf6 100644
--- a/mesmerize_core/caiman_extensions/cnmf.py
+++ b/mesmerize_core/caiman_extensions/cnmf.py
@@ -259,7 +259,7 @@ def get_reconstructed_movie(
             ixs_frames = (ixs_frames, ixs_frames + 1)
 
         dn = cnmf_obj.estimates.A[:, idx_components].dot(
-            cnmf_obj.estimates.C[idx_components, ixs_frames[0] : ixs_frames[1]]
+            cnmf_obj.estimates.C[idx_components, ixs_frames[0]: ixs_frames[1]]
         )
 
         if add_background:

From c13888c7e2b98b53d52d99f9fcf4273b1b013b30 Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Thu, 21 Jul 2022 10:26:46 -0400
Subject: [PATCH 33/34] fixing cache tests

---
 tests/test_core.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/test_core.py b/tests/test_core.py
index 9aa95d7..058d7ce 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -633,6 +633,8 @@ def test_cnmf():
         ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10
     )
 
+    cnmf.cache.clear_cache()
+
 
 def test_cnmfe():
     set_parent_raw_data_path(vid_dir)
@@ -977,6 +979,8 @@ def test_cnmfe():
         ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10
     )
 
+    cnmf.cache.clear_cache()
+
 
 def test_remove_item():
     set_parent_raw_data_path(vid_dir)

From 98b78d3d506339b211d5378b495011a290c0698e Mon Sep 17 00:00:00 2001
From: Caitlin Lewis <clewis7@email.unc.edu>
Date: Thu, 21 Jul 2022 17:18:42 -0400
Subject: [PATCH 34/34] hopefully the last changes to cache as of now

---
 mesmerize_core/caiman_extensions/mcorr.py | 1 -
 tests/test_core.py                        | 8 +++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/mesmerize_core/caiman_extensions/mcorr.py b/mesmerize_core/caiman_extensions/mcorr.py
index fa14418..70c5866 100644
--- a/mesmerize_core/caiman_extensions/mcorr.py
+++ b/mesmerize_core/caiman_extensions/mcorr.py
@@ -33,7 +33,6 @@ def get_output_path(self) -> Path:
         return self._series.paths.resolve(self._series["outputs"]["mcorr-output-path"])
 
     @validate("mcorr")
-    @cache.use_cache
     def get_output(self) -> np.ndarray:
         """
         Get the motion corrected output as a memmaped numpy array, allows fast random-access scrolling.
diff --git a/tests/test_core.py b/tests/test_core.py
index 058d7ce..e4c5980 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -633,8 +633,6 @@ def test_cnmf():
         ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10
     )
 
-    cnmf.cache.clear_cache()
-
 
 def test_cnmfe():
     set_parent_raw_data_path(vid_dir)
@@ -979,8 +977,6 @@ def test_cnmfe():
         ixs_reconstructed_movie, ixs_reconstructed_movie_actual, rtol=1e2, atol=1e-10
     )
 
-    cnmf.cache.clear_cache()
-
 
 def test_remove_item():
     set_parent_raw_data_path(vid_dir)
@@ -1046,6 +1042,9 @@ def test_remove_item():
 
 
 def test_cache():
+    print("*** Testing cache ***")
+    cnmf.cache.clear_cache()
+
     set_parent_raw_data_path(vid_dir)
     algo = "mcorr"
 
@@ -1100,7 +1099,6 @@ def test_cache():
     )
 
     algo = "cnmf"
-    print("Testing cnmf")
     input_movie_path = df.iloc[-1].mcorr.get_output_path()
     df.caiman.add_item(
         algo=algo,