Skip to content

Commit

Permalink
feat: reconstruct input data for EOF analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
nicrie committed Feb 24, 2022
1 parent ea39f02 commit 7ed306a
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 9 deletions.
Empty file added tests/utils/__init__.py
Empty file.
73 changes: 68 additions & 5 deletions xeofs/models/_eof_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import scipy as sc
from sklearn.decomposition import PCA

from ..utils.tools import get_mode_selector


class _EOF_base():
'''Base class for univariate EOF analysis.
Expand Down Expand Up @@ -43,13 +45,15 @@ def __init__(
weights: Optional[np.ndarray] = None
):
# Remove mean for each feature
X -= X.mean(axis=0)
self._X_mean = X.mean(axis=0)
X -= self._X_mean

# Weights are applied to features, not samples.
if weights is None:
self._weights = weights
if self._weights is None:
# Use int type to ensure that there won't be rounding errors
# when applying trivial weighting (= all weights equal 1)
weights = np.ones(X.shape[1], dtype=int)
self._weights = np.ones(X.shape[1], dtype=int)

# Standardization is included as weights
if norm:
Expand All @@ -60,8 +64,8 @@ def __init__(
'normalization not possible.'
)
raise ValueError(err_msg)
weights = weights / stdev
X = X * weights
self._weights = self._weights / stdev
X = X * self._weights

self.n_samples = X.shape[0]
self.n_features = X.shape[1]
Expand Down Expand Up @@ -212,3 +216,62 @@ def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
dist = sc.stats.beta(a, a, loc=-1, scale=2)
pvals = 2 * dist.cdf(-abs(corr))
return corr, pvals

def reconstruct_X(
self,
mode : Optional[Union[int, List[int], slice]] = None
) -> np.ndarray:
'''Reconstruct original data field ``X`` using the PCs and EOFs.
If weights were applied, ``X`` will be automatically rescaled.
Parameters
----------
mode : Optional[Union[int, List[int], slice]]
Mode(s) based on which ``X`` will be reconstructed. If ``mode`` is
an int, a single mode is used. If a list of integers is provided,
use all specified modes for reconstruction. Alternatively, you may
want to select a slice to reconstruct. The first mode is denoted
by 1 (and not by 0). If None then ``X`` is recontructed using all
available modes (the default is None).
Examples
--------
Perform an analysis using some data ``X``:
>>> model = EOF(X, norm=True)
>>> model.solve()
Reconstruct ``X`` using all modes:
>>> model.reconstruct_X()
Reconstruct ``X`` using the first mode only:
>>> model.reconstruct_X(1)
Reconstruct ``X`` using mode 1, 3 and 4:
>>> model.reconstruct_X([1, 3, 4])
Reconstruct ``X`` using all modes up to mode 10 (including):
>>> model.reconstruct_X(slice(10))
Reconstruct ``X`` using every second mode between 4 and 8 (both
including):
>>> model.reconstruct_X(slice(4, 8, 2))
'''
eofs = self._eofs
pcs = self._pcs * self._singular_values
# Select modes to reconstruct X
mode = get_mode_selector(mode)
eofs = eofs[:, mode]
pcs = pcs[:, mode]
Xrec = pcs @ eofs.T
# Unweight and add mean
return (Xrec / self._weights) + self._X_mean
9 changes: 8 additions & 1 deletion xeofs/models/eof.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Union, Iterable, Tuple
from typing import Optional, Union, Iterable, Tuple, List

import numpy as np

Expand Down Expand Up @@ -116,3 +116,10 @@ def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
corr = self._tf.back_transform_eofs(corr)
pvals = self._tf.back_transform_eofs(pvals)
return corr, pvals

def reconstruct_X(
self,
mode : Optional[Union[int, List[int], slice]] = None
) -> np.ndarray:
Xrec = super().reconstruct_X(mode)
return self._tf.back_transform(Xrec)
11 changes: 10 additions & 1 deletion xeofs/pandas/eof.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Tuple
from typing import Optional, Tuple, Union, List

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -138,3 +138,12 @@ def eofs_as_correlation(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
corr.columns = self._idx_mode
pvals.columns = self._idx_mode
return corr, pvals

def reconstruct_X(
self,
mode : Optional[Union[int, List[int], slice]] = None
) -> pd.DataFrame:
Xrec = super().reconstruct_X(mode)
Xrec = self._tf.back_transform(Xrec)
Xrec.index = self._tf.index_samples
return Xrec
2 changes: 1 addition & 1 deletion xeofs/utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_mode_selector(obj : Optional[Union[int, List[int], slice]]) -> Union[sli
if obj is None:
return slice(MAX_MODE)
elif isinstance(obj, int):
return slice(obj)
return [obj - 1]
elif isinstance(obj, slice):
# Reduce slice start by one so that "1" is the first element
try:
Expand Down
13 changes: 12 additions & 1 deletion xeofs/xarray/eof.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Iterable, Optional, Union, Tuple
from typing import Iterable, Optional, Union, Tuple, List

import numpy as np
import xarray as xr
Expand Down Expand Up @@ -195,3 +195,14 @@ def eofs_as_correlation(self) -> Tuple[xr.DataArray, xr.DataArray]:
corr.name = 'correlation_coeffient'
pvals.name = 'p_value'
return corr, pvals

def reconstruct_X(
self,
mode : Optional[Union[int, List[int], slice]] = None
) -> xr.DataArray:
Xrec = super().reconstruct_X(mode=mode)
Xrec = self._tf.back_transform(Xrec)
coords = {dim: self._tf.coords[dim] for dim in self._tf.dims_samples}
Xrec = Xrec.assign_coords(coords)
Xrec.name = 'X_reconstructed'
return Xrec

0 comments on commit 7ed306a

Please sign in to comment.