feat: reconstruct input data for EOF analysis

xarray-contrib · Feb 24, 2022 · 7ed306a · 7ed306a
1 parent ea39f02
commit 7ed306a
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 9 deletions.
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
diff --git a/xeofs/models/_eof_base.py b/xeofs/models/_eof_base.py
@@ -4,6 +4,8 @@
 import scipy as sc
 from sklearn.decomposition import PCA
 
+from ..utils.tools import get_mode_selector
+
 
 class _EOF_base():
  '''Base class for univariate EOF analysis.
@@ -43,13 +45,15 @@ def __init__(
  weights: Optional[np.ndarray] = None
  ):
  # Remove mean for each feature
- X -= X.mean(axis=0)
+ self._X_mean = X.mean(axis=0)
+ X -= self._X_mean
 
  # Weights are applied to features, not samples.
- if weights is None:
+ self._weights = weights
+ if self._weights is None:
  # Use int type to ensure that there won't be rounding errors
  # when applying trivial weighting (= all weights equal 1)
- weights = np.ones(X.shape[1], dtype=int)
+ self._weights = np.ones(X.shape[1], dtype=int)
 
  # Standardization is included as weights
  if norm:
@@ -60,8 +64,8 @@ def __init__(
  'normalization not possible.'
  )
  raise ValueError(err_msg)
- weights = weights / stdev
- X = X * weights
+ self._weights = self._weights / stdev
+ X = X * self._weights
 
  self.n_samples = X.shape[0]
  self.n_features = X.shape[1]
@@ -212,3 +216,62 @@ def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
  dist = sc.stats.beta(a, a, loc=-1, scale=2)
  pvals = 2 * dist.cdf(-abs(corr))
  return corr, pvals
+
+ def reconstruct_X(
+ self,
+ mode : Optional[Union[int, List[int], slice]] = None
+ ) -> np.ndarray:
+ '''Reconstruct original data field ``X`` using the PCs and EOFs.
+
+ If weights were applied, ``X`` will be automatically rescaled.
+
+ Parameters
+ ----------
+ mode : Optional[Union[int, List[int], slice]]
+ Mode(s) based on which ``X`` will be reconstructed. If ``mode`` is
+ an int, a single mode is used. If a list of integers is provided,
+ use all specified modes for reconstruction. Alternatively, you may
+ want to select a slice to reconstruct. The first mode is denoted
+ by 1 (and not by 0). If None then ``X`` is recontructed using all
+ available modes (the default is None).
+
+ Examples
+ --------
+
+ Perform an analysis using some data ``X``:
+
+ >>> model = EOF(X, norm=True)
+ >>> model.solve()
+
+ Reconstruct ``X`` using all modes:
+
+ >>> model.reconstruct_X()
+
+ Reconstruct ``X`` using the first mode only:
+
+ >>> model.reconstruct_X(1)
+
+ Reconstruct ``X`` using mode 1, 3 and 4:
+
+ >>> model.reconstruct_X([1, 3, 4])
+
+ Reconstruct ``X`` using all modes up to mode 10 (including):
+
+ >>> model.reconstruct_X(slice(10))
+
+ Reconstruct ``X`` using every second mode between 4 and 8 (both
+ including):
+
+ >>> model.reconstruct_X(slice(4, 8, 2))
+
+
+ '''
+ eofs = self._eofs
+ pcs = self._pcs * self._singular_values
+ # Select modes to reconstruct X
+ mode = get_mode_selector(mode)
+ eofs = eofs[:, mode]
+ pcs = pcs[:, mode]
+ Xrec = pcs @ eofs.T
+ # Unweight and add mean
+ return (Xrec / self._weights) + self._X_mean
diff --git a/xeofs/models/eof.py b/xeofs/models/eof.py
@@ -1,4 +1,4 @@
-from typing import Optional, Union, Iterable, Tuple
+from typing import Optional, Union, Iterable, Tuple, List
 
 import numpy as np
 
@@ -116,3 +116,10 @@ def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
  corr = self._tf.back_transform_eofs(corr)
  pvals = self._tf.back_transform_eofs(pvals)
  return corr, pvals
+
+ def reconstruct_X(
+ self,
+ mode : Optional[Union[int, List[int], slice]] = None
+ ) -> np.ndarray:
+ Xrec = super().reconstruct_X(mode)
+ return self._tf.back_transform(Xrec)
diff --git a/xeofs/pandas/eof.py b/xeofs/pandas/eof.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple
+from typing import Optional, Tuple, Union, List
 
 import numpy as np
 import pandas as pd
@@ -138,3 +138,12 @@ def eofs_as_correlation(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
  corr.columns = self._idx_mode
  pvals.columns = self._idx_mode
  return corr, pvals
+
+ def reconstruct_X(
+ self,
+ mode : Optional[Union[int, List[int], slice]] = None
+ ) -> pd.DataFrame:
+ Xrec = super().reconstruct_X(mode)
+ Xrec = self._tf.back_transform(Xrec)
+ Xrec.index = self._tf.index_samples
+ return Xrec
diff --git a/xeofs/utils/tools.py b/xeofs/utils/tools.py
@@ -18,7 +18,7 @@ def get_mode_selector(obj : Optional[Union[int, List[int], slice]]) -> Union[sli
  if obj is None:
  return slice(MAX_MODE)
  elif isinstance(obj, int):
- return slice(obj)
+ return [obj - 1]
  elif isinstance(obj, slice):
  # Reduce slice start by one so that "1" is the first element
  try:

diff --git a/xeofs/xarray/eof.py b/xeofs/xarray/eof.py
@@ -1,4 +1,4 @@
-from typing import Iterable, Optional, Union, Tuple
+from typing import Iterable, Optional, Union, Tuple, List
 
 import numpy as np
 import xarray as xr
@@ -195,3 +195,14 @@ def eofs_as_correlation(self) -> Tuple[xr.DataArray, xr.DataArray]:
  corr.name = 'correlation_coeffient'
  pvals.name = 'p_value'
  return corr, pvals
+
+ def reconstruct_X(
+ self,
+ mode : Optional[Union[int, List[int], slice]] = None
+ ) -> xr.DataArray:
+ Xrec = super().reconstruct_X(mode=mode)
+ Xrec = self._tf.back_transform(Xrec)
+ coords = {dim: self._tf.coords[dim] for dim in self._tf.dims_samples}
+ Xrec = Xrec.assign_coords(coords)
+ Xrec.name = 'X_reconstructed'
+ return Xrec