Skip to content

Commit

Permalink
feat: allow different scalings of EOFs an PCs
Browse files Browse the repository at this point in the history
  • Loading branch information
nicrie committed Feb 24, 2022
2 parents 85960ab + 64c60c1 commit ea39f02
Show file tree
Hide file tree
Showing 16 changed files with 166 additions and 63 deletions.
1 change: 1 addition & 0 deletions docs/_autosummary/xeofs.models.eof.EOF.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

~EOF.__init__
~EOF.eofs
~EOF.eofs_as_correlation
~EOF.explained_variance
~EOF.explained_variance_ratio
~EOF.pcs
Expand Down
1 change: 1 addition & 0 deletions docs/_autosummary/xeofs.pandas.eof.EOF.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

~EOF.__init__
~EOF.eofs
~EOF.eofs_as_correlation
~EOF.explained_variance
~EOF.explained_variance_ratio
~EOF.pcs
Expand Down
1 change: 1 addition & 0 deletions docs/_autosummary/xeofs.xarray.eof.EOF.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

~EOF.__init__
~EOF.eofs
~EOF.eofs_as_correlation
~EOF.explained_variance
~EOF.explained_variance_ratio
~EOF.pcs
Expand Down
Binary file modified docs/auto_examples/auto_examples_jupyter.zip
Binary file not shown.
Binary file modified docs/auto_examples/auto_examples_python.zip
Binary file not shown.
15 changes: 8 additions & 7 deletions tests/models/test_eof_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from xeofs.xarray.eof import EOF as xrEOF


def test_wrapper_solutions(sample_array):
@pytest.mark.parametrize('scaling', [0, 1, 2])
def test_wrapper_solutions(scaling, sample_array):
# Solutions of numpy, pandas and xarray wrapper are the same
X = sample_array
df = pd.DataFrame(X)
Expand All @@ -32,13 +33,13 @@ def test_wrapper_solutions(sample_array):
actual_pandas_expvar_ratio = pandas_model.explained_variance_ratio().squeeze()
actual_xarray_expvar_ratio = xarray_model.explained_variance_ratio()
# PCs
desired_pcs = numpy_model.pcs()
actual_pandas_pcs = pandas_model.pcs().values
actual_xarray_pcs = xarray_model.pcs().values
desired_pcs = numpy_model.pcs(scaling=scaling)
actual_pandas_pcs = pandas_model.pcs(scaling=scaling).values
actual_xarray_pcs = xarray_model.pcs(scaling=scaling).values
# EOFs
desired_eofs = numpy_model.eofs()
actual_pandas_eofs = pandas_model.eofs().values
actual_xarray_eofs = xarray_model.eofs().values
desired_eofs = numpy_model.eofs(scaling=scaling)
actual_pandas_eofs = pandas_model.eofs(scaling=scaling).values
actual_xarray_eofs = xarray_model.eofs(scaling=scaling).values
# EOFs as correlation
desired_eofs_corr = numpy_model.eofs_as_correlation()
actual_pandas_eofs_corr = pandas_model.eofs_as_correlation()
Expand Down
28 changes: 14 additions & 14 deletions tests/models/test_rotator_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
from xeofs.xarray.rotator import Rotator as xrRotator


@pytest.mark.parametrize('n_rot, power', [
(2, 1),
(5, 1),
(7, 1),
(2, 2),
(5, 2),
(7, 2),
@pytest.mark.parametrize('n_rot, power, scaling', [
(2, 1, 0),
(5, 1, 1),
(7, 1, 2),
(2, 2, 0),
(5, 2, 1),
(7, 2, 2),
])
def test_wrapper_solutions(n_rot, power, sample_array):
def test_wrapper_solutions(n_rot, power, scaling, sample_array):
# Solutions of numpy, pandas and xarray wrapper are the same
X = sample_array
df = pd.DataFrame(X)
Expand All @@ -46,13 +46,13 @@ def test_wrapper_solutions(n_rot, power, sample_array):
actual_pandas_expvar_ratio = pandas_rot.explained_variance_ratio().squeeze()
actual_xarray_expvar_ratio = xarray_rot.explained_variance_ratio()
# PCs
desired_pcs = numpy_rot.pcs()
actual_pandas_pcs = pandas_rot.pcs().values
actual_xarray_pcs = xarray_rot.pcs().values
desired_pcs = numpy_rot.pcs(scaling=scaling)
actual_pandas_pcs = pandas_rot.pcs(scaling=scaling).values
actual_xarray_pcs = xarray_rot.pcs(scaling=scaling).values
# EOFs
desired_eofs = numpy_rot.eofs()
actual_pandas_eofs = pandas_rot.eofs().values
actual_xarray_eofs = xarray_rot.eofs().values
desired_eofs = numpy_rot.eofs(scaling=scaling)
actual_pandas_eofs = pandas_rot.eofs(scaling=scaling).values
actual_xarray_eofs = xarray_rot.eofs(scaling=scaling).values
# EOFs as correlation
desired_eofs_corr = numpy_rot.eofs_as_correlation()
actual_pandas_eofs_corr = pandas_rot.eofs_as_correlation()
Expand Down
45 changes: 40 additions & 5 deletions xeofs/models/_base_rotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,48 @@ def explained_variance_ratio(self) -> np.ndarray:

return self._explained_variance_ratio

def eofs(self) -> np.ndarray:
'''EOFs after rotation.'''
def eofs(self, scaling : int = 0) -> np.ndarray:
'''EOFs after rotation.
return self._eofs
Parameters
----------
scaling : [0, 1, 2]
EOFs are scaled (i) to have unit length (``scaling=0``), (ii) by the
square root of the eigenvalues (``scaling=1``) or (iii) by the
singular values (``scaling=2``). In case no weights were applied,
scaling by the singular values results in the EOFs having the
unit of the input data (the default is 0).
'''
if scaling == 0:
eofs = self._eofs
elif scaling == 1:
eofs = self._eofs * np.sqrt(self._explained_variance)
elif scaling == 2:
eofs = self._eofs * np.sqrt(self._explained_variance * self._model.n_samples)
return eofs

def pcs(self, scaling : int = 0) -> np.ndarray:
'''PCs after rotation.
def pcs(self) -> np.ndarray:
'''PCs after rotation.'''
Parameters
----------
scaling : [0, 1, 2]
PCs are scaled (i) to have unit length (orthonormal for Varimax
rotation) (``scaling=0``), (ii) by the square root of the
eigenvalues (``scaling=1``) or (iii) by the
singular values (``scaling=2``). In case no weights were applied,
scaling by the singular values results in the PCs having the
unit of the input data (the default is 0).
'''
if scaling == 0:
pcs = self._pcs
elif scaling == 1:
pcs = self._pcs * np.sqrt(self._explained_variance)
elif scaling == 2:
pcs = self._pcs * np.sqrt(self._explained_variance * self._model.n_samples)
return pcs

return self._pcs

Expand Down
50 changes: 38 additions & 12 deletions xeofs/models/_eof_base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Tuple
from typing import Optional, Tuple, List, Union

import numpy as np
import scipy as sc
Expand Down Expand Up @@ -122,7 +122,6 @@ def singular_values(self) -> np.ndarray:
and the number of samples, respectively.
'''

return self._singular_values

def explained_variance(self) -> np.ndarray:
Expand All @@ -132,7 +131,6 @@ def explained_variance(self) -> np.ndarray:
of the covariance matrix.
'''

return self._explained_variance

def explained_variance_ratio(self) -> np.ndarray:
Expand All @@ -146,25 +144,53 @@ def explained_variance_ratio(self) -> np.ndarray:

return self._explained_variance_ratio

def eofs(self) -> np.ndarray:
def eofs(self, scaling : int = 0) -> np.ndarray:
'''Get the EOFs.
The empirical orthogonal functions (EOFs) are equivalent to the eigenvectors
of the covariance matrix of `X`.
The empirical orthogonal functions (EOFs) are equivalent to the
eigenvectors of the covariance matrix of `X`.
'''

return self._eofs
Parameters
----------
scaling : [0, 1, 2]
EOFs are scaled (i) to be orthonormal (``scaling=0``), (ii) by the
square root of the eigenvalues (``scaling=1``) or (iii) by the
singular values (``scaling=2``). In case no weights were applied,
scaling by the singular values results in the EOFs having the
unit of the input data (the default is 0).
def pcs(self) -> np.ndarray:
'''
if scaling == 0:
eofs = self._eofs
elif scaling == 1:
eofs = self._eofs * np.sqrt(self._explained_variance)
elif scaling == 2:
eofs = self._eofs * self._singular_values
return eofs

def pcs(self, scaling : int = 0) -> np.ndarray:
'''Get the PCs.
The principal components (PCs), also known as PC scores, are computed
by projecting the data matrix `X` onto the eigenvectors.
'''
Parameters
----------
scaling : [0, 1, 2]
PCs are scaled (i) to be orthonormal (``scaling=0``), (ii) by the
square root of the eigenvalues (``scaling=1``) or (iii) by the
singular values (``scaling=2``). In case no weights were applied,
scaling by the singular values results in the PCs having the
unit of the input data (the default is 0).
return self._pcs
'''
if scaling == 0:
pcs = self._pcs
elif scaling == 1:
pcs = self._pcs * np.sqrt(self._explained_variance)
elif scaling == 2:
pcs = self._pcs * self._singular_values
return pcs

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
'''Correlation coefficients between PCs and data matrix.
Expand Down
8 changes: 4 additions & 4 deletions xeofs/models/eof.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@ def __init__(
weights=weights
)

def eofs(self) -> np.ndarray:
eofs = super().eofs()
def eofs(self, scaling : int = 0) -> np.ndarray:
eofs = super().eofs(scaling=scaling)
return self._tf.back_transform_eofs(eofs)

def pcs(self) -> np.ndarray:
pcs = super().pcs()
def pcs(self, scaling : int = 0) -> np.ndarray:
pcs = super().pcs(scaling=scaling)
return self._tf.back_transform_pcs(pcs)

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
Expand Down
8 changes: 4 additions & 4 deletions xeofs/models/rotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ def explained_variance(self) -> np.ndarray:
def explained_variance_ratio(self) -> np.ndarray:
return super().explained_variance_ratio()

def eofs(self) -> np.ndarray:
eofs = super().eofs()
def eofs(self, scaling : int = 0) -> np.ndarray:
eofs = super().eofs(scaling=scaling)
return self._model._tf.back_transform_eofs(eofs)

def pcs(self) -> np.ndarray:
pcs = super().pcs()
def pcs(self, scaling : int = 0) -> np.ndarray:
pcs = super().pcs(scaling=scaling)
return self._model._tf.back_transform_pcs(pcs)

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
Expand Down
8 changes: 4 additions & 4 deletions xeofs/pandas/eof.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@ def explained_variance_ratio(self) -> pd.DataFrame:
)
return expvar

def eofs(self) -> pd.DataFrame:
eofs = super().eofs()
def eofs(self, scaling : int = 0) -> pd.DataFrame:
eofs = super().eofs(scaling=scaling)
eofs = self._tf.back_transform_eofs(eofs)
eofs.columns = self._idx_mode
return eofs

def pcs(self) -> pd.DataFrame:
pcs = super().pcs()
def pcs(self, scaling : int = 0) -> pd.DataFrame:
pcs = super().pcs(scaling=scaling)
pcs = self._tf.back_transform_pcs(pcs)
pcs.columns = self._idx_mode
return pcs
Expand Down
8 changes: 4 additions & 4 deletions xeofs/pandas/rotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def explained_variance_ratio(self) -> pd.DataFrame:
index=self._model._idx_mode[:self._n_rot]
)

def eofs(self) -> pd.DataFrame:
eofs = super().eofs()
def eofs(self, scaling : int = 0) -> pd.DataFrame:
eofs = super().eofs(scaling=scaling)
return self._model._tf.back_transform_eofs(eofs)

def pcs(self) -> pd.DataFrame:
pcs = super().pcs()
def pcs(self, scaling : int = 0) -> pd.DataFrame:
pcs = super().pcs(scaling=scaling)
return self._model._tf.back_transform_pcs(pcs)

def eofs_as_correlation(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
Expand Down
38 changes: 38 additions & 0 deletions xeofs/utils/tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from typing import Optional, Union, List


def get_mode_selector(obj : Optional[Union[int, List[int], slice]]) -> Union[slice, List]:
''' Create a mode selector for a given input object.
For all possible input types (except for list) the object is returned
as a slice. Lists are returned as lists.
Parameters
----------
obj : Optional[Union[int, List[int], slice]]
Data type to be casted as a mode selector.
'''
MAX_MODE = 9999999
if obj is None:
return slice(MAX_MODE)
elif isinstance(obj, int):
return slice(obj)
elif isinstance(obj, slice):
# Reduce slice start by one so that "1" is the first element
try:
new_start = obj.start - 1
except TypeError:
new_start = 0
# Slice start cannot be negative
new_start = max(0, new_start)
return slice(new_start, obj.stop, obj.step)
elif isinstance(obj, list):
# Reduce all list elements by 1 so that "1" is first element
return [o - 1 for o in obj]
else:
obj_type = type(obj)
err_msg = 'Invalid type {:}. Must be one of [int, slice, list, None].'
err_msg = err_msg.format(obj_type)
raise ValueError(err_msg)
10 changes: 5 additions & 5 deletions xeofs/xarray/eof.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import xarray as xr

from ..models._eof_base import _EOF_base
from xeofs.xarray._dataarray_transformer import _DataArrayTransformer
from ._dataarray_transformer import _DataArrayTransformer


class EOF(_EOF_base):
Expand Down Expand Up @@ -176,14 +176,14 @@ def explained_variance_ratio(self) -> xr.DataArray:
name='explained_variance_ratio'
)

def eofs(self) -> xr.DataArray:
eofs = super().eofs()
def eofs(self, scaling : int = 0) -> xr.DataArray:
eofs = super().eofs(scaling=scaling)
eofs = self._tf.back_transform_eofs(eofs)
eofs.name = 'EOFs'
return eofs

def pcs(self) -> xr.DataArray:
pcs = super().pcs()
def pcs(self, scaling : int = 0) -> xr.DataArray:
pcs = super().pcs(scaling=scaling)
pcs = self._tf.back_transform_pcs(pcs)
pcs.name = 'PCs'
return pcs
Expand Down
8 changes: 4 additions & 4 deletions xeofs/xarray/rotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ def explained_variance_ratio(self) -> xr.DataArray:
name='explained_variance_ratio'
)

def eofs(self) -> xr.DataArray:
eofs = super().eofs()
def eofs(self, scaling : int = 0) -> xr.DataArray:
eofs = super().eofs(scaling=scaling)
eofs = self._model._tf.back_transform_eofs(eofs)
eofs.name = 'EOFs'
return eofs

def pcs(self) -> xr.DataArray:
pcs = super().pcs()
def pcs(self, scaling : int = 0) -> xr.DataArray:
pcs = super().pcs(scaling=scaling)
pcs = self._model._tf.back_transform_pcs(pcs)
pcs.name = 'PCs'
return pcs
Expand Down

0 comments on commit ea39f02

Please sign in to comment.