Skip to content

Commit

Permalink
feat: add eofs as correlation
Browse files Browse the repository at this point in the history
  • Loading branch information
nicrie committed Feb 22, 2022
2 parents 29f1b4d + d84454c commit 85960ab
Show file tree
Hide file tree
Showing 13 changed files with 265 additions and 76 deletions.
12 changes: 12 additions & 0 deletions tests/models/test_eof.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,15 @@ def test_n_modes(n_modes, sample_array):
ref_n_modes = min(data_no_nan.shape) if n_modes is None else n_modes

assert base.n_modes == ref_n_modes


def test_eofs_as_correlation(sample_array):
# Correlation coefficients are between -1 and 1
# p values are between 0 and 1
data_no_nan = sample_array[:, ~np.isnan(sample_array).all(axis=0)]
model = EOF(data_no_nan)
model.solve()
corr, pvals = model.eofs_as_correlation()
assert (abs(corr) <= 1).all()
assert (pvals >= 0).all()
assert (pvals <= 1).all()
59 changes: 59 additions & 0 deletions tests/models/test_eof_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import numpy as np
import pandas as pd
import xarray as xr
import pytest

from xeofs.models.eof import EOF
from xeofs.pandas.eof import EOF as pdEOF
from xeofs.xarray.eof import EOF as xrEOF


def test_wrapper_solutions(sample_array):
# Solutions of numpy, pandas and xarray wrapper are the same
X = sample_array
df = pd.DataFrame(X)
da = xr.DataArray(X)
# Perform analysis with all three wrappers
numpy_model = EOF(X)
numpy_model.solve()

pandas_model = pdEOF(df)
pandas_model.solve()

xarray_model = xrEOF(da, dim='dim_0')
xarray_model.solve()

# Explained variance
desired_expvar = numpy_model.explained_variance()
actual_pandas_expvar = pandas_model.explained_variance().squeeze()
actual_xarray_expvar = xarray_model.explained_variance()
# Explained variance ratio
desired_expvar_ratio = numpy_model.explained_variance_ratio()
actual_pandas_expvar_ratio = pandas_model.explained_variance_ratio().squeeze()
actual_xarray_expvar_ratio = xarray_model.explained_variance_ratio()
# PCs
desired_pcs = numpy_model.pcs()
actual_pandas_pcs = pandas_model.pcs().values
actual_xarray_pcs = xarray_model.pcs().values
# EOFs
desired_eofs = numpy_model.eofs()
actual_pandas_eofs = pandas_model.eofs().values
actual_xarray_eofs = xarray_model.eofs().values
# EOFs as correlation
desired_eofs_corr = numpy_model.eofs_as_correlation()
actual_pandas_eofs_corr = pandas_model.eofs_as_correlation()
actual_xarray_eofs_corr = xarray_model.eofs_as_correlation()

np.testing.assert_allclose(actual_pandas_expvar, desired_expvar)
np.testing.assert_allclose(actual_pandas_expvar_ratio, desired_expvar_ratio)
np.testing.assert_allclose(actual_pandas_pcs, desired_pcs)
np.testing.assert_allclose(actual_pandas_eofs, desired_eofs)
np.testing.assert_allclose(actual_pandas_eofs_corr[0], desired_eofs_corr[0])
np.testing.assert_allclose(actual_pandas_eofs_corr[1], desired_eofs_corr[1])

np.testing.assert_allclose(actual_xarray_expvar, desired_expvar)
np.testing.assert_allclose(actual_xarray_expvar_ratio, desired_expvar_ratio)
np.testing.assert_allclose(actual_xarray_pcs, desired_pcs)
np.testing.assert_allclose(actual_xarray_eofs, desired_eofs)
np.testing.assert_allclose(actual_xarray_eofs_corr[0], desired_eofs_corr[0])
np.testing.assert_allclose(actual_xarray_eofs_corr[1], desired_eofs_corr[1])
76 changes: 15 additions & 61 deletions tests/models/test_rotator.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
import numpy as np
import pandas as pd
import xarray as xr
import pytest
from pytest import approx
from numpy.testing import assert_allclose, assert_raises

from xeofs.models.eof import EOF
from xeofs.pandas.eof import EOF as pdEOF
from xeofs.xarray.eof import EOF as xrEOF
from xeofs.models.rotator import Rotator
from xeofs.pandas.rotator import Rotator as pdRotator
from xeofs.xarray.rotator import Rotator as xrRotator


@pytest.mark.parametrize('n_modes', [0, 1])
Expand All @@ -20,9 +14,9 @@ def test_invalid_rotation(n_modes, sample_array):
model = EOF(X)
model.solve()
with pytest.raises(Exception):
rot = Rotator(model=model, n_rot=n_modes)
_ = Rotator(model=model, n_rot=n_modes)



@pytest.mark.parametrize('n_modes', [2, 5, 7])
def test_explained_variance(n_modes, sample_array):
# Amount of explained variance is same before and after Varimax rotation
Expand Down Expand Up @@ -63,6 +57,19 @@ def test_pcs_uncorrelated(n_modes, sample_array):
assert_allclose(rpcs.T @ rpcs, np.identity(n_modes), atol=1e-3)


def test_eofs_as_correlation(sample_array):
# Correlation coefficients are between -1 and 1
# p values are between 0 and 1
data_no_nan = sample_array[:, ~np.isnan(sample_array).all(axis=0)]
model = EOF(data_no_nan)
model.solve()
rot = Rotator(model=model, n_rot=5)
corr, pvals = rot.eofs_as_correlation()
assert (abs(corr) <= 1).all()
assert (pvals >= 0).all()
assert (pvals <= 1).all()


@pytest.mark.parametrize('n_modes, power', [
(2, 1),
(5, 1),
Expand All @@ -87,56 +94,3 @@ def test_relaxed_orthogonal_contraint(n_modes, power, sample_array):

assert_raises(AssertionError, assert_allclose, actual_var, desired, atol=1e-3)
assert_raises(AssertionError, assert_allclose, actual_pro, desired, atol=1e-3)


@pytest.mark.parametrize('n_rot, power', [
(2, 1),
(5, 1),
(7, 1),
(2, 2),
(5, 2),
(7, 2),
])
def test_wrapper_solutions(n_rot, power, sample_array):
# Solutions of numpy, pandas and xarray wrapper are the same
X = sample_array
df = pd.DataFrame(X)
da = xr.DataArray(X)

numpy_model = EOF(X)
numpy_model.solve()
numpy_rot = Rotator(numpy_model, n_rot=n_rot, power=power)

pandas_model = pdEOF(df)
pandas_model.solve()
pandas_rot = pdRotator(pandas_model, n_rot=n_rot, power=power)

xarray_model = xrEOF(da, dim='dim_0')
xarray_model.solve()
xarray_rot = xrRotator(xarray_model, n_rot=n_rot, power=power)

desired_expvar = numpy_rot.explained_variance()
actual_pandas_expvar = pandas_rot.explained_variance().squeeze()
actual_xarray_expvar = xarray_rot.explained_variance()

desired_expvar_ratio = numpy_rot.explained_variance_ratio()
actual_pandas_expvar_ratio = pandas_rot.explained_variance_ratio().squeeze()
actual_xarray_expvar_ratio = xarray_rot.explained_variance_ratio()

desired_pcs = numpy_rot.pcs()
actual_pandas_pcs = pandas_rot.pcs().values
actual_xarray_pcs = xarray_rot.pcs().values

desired_eofs = numpy_rot.eofs()
actual_pandas_eofs = pandas_rot.eofs().values
actual_xarray_eofs = xarray_rot.eofs().values

np.testing.assert_allclose(actual_pandas_expvar, desired_expvar)
np.testing.assert_allclose(actual_pandas_expvar_ratio, desired_expvar_ratio)
np.testing.assert_allclose(actual_pandas_pcs, desired_pcs)
np.testing.assert_allclose(actual_pandas_eofs, desired_eofs)

np.testing.assert_allclose(actual_xarray_expvar, desired_expvar)
np.testing.assert_allclose(actual_xarray_expvar_ratio, desired_expvar_ratio)
np.testing.assert_allclose(actual_xarray_pcs, desired_pcs)
np.testing.assert_allclose(actual_xarray_eofs, desired_eofs)
73 changes: 73 additions & 0 deletions tests/models/test_rotator_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import numpy as np
import pandas as pd
import xarray as xr
import pytest

from xeofs.models.eof import EOF
from xeofs.pandas.eof import EOF as pdEOF
from xeofs.xarray.eof import EOF as xrEOF
from xeofs.models.rotator import Rotator
from xeofs.pandas.rotator import Rotator as pdRotator
from xeofs.xarray.rotator import Rotator as xrRotator


@pytest.mark.parametrize('n_rot, power', [
(2, 1),
(5, 1),
(7, 1),
(2, 2),
(5, 2),
(7, 2),
])
def test_wrapper_solutions(n_rot, power, sample_array):
# Solutions of numpy, pandas and xarray wrapper are the same
X = sample_array
df = pd.DataFrame(X)
da = xr.DataArray(X)
# Perform analysis with all three wrappers
numpy_model = EOF(X)
numpy_model.solve()
numpy_rot = Rotator(numpy_model, n_rot=n_rot, power=power)

pandas_model = pdEOF(df)
pandas_model.solve()
pandas_rot = pdRotator(pandas_model, n_rot=n_rot, power=power)

xarray_model = xrEOF(da, dim='dim_0')
xarray_model.solve()
xarray_rot = xrRotator(xarray_model, n_rot=n_rot, power=power)

# Explained variance
desired_expvar = numpy_rot.explained_variance()
actual_pandas_expvar = pandas_rot.explained_variance().squeeze()
actual_xarray_expvar = xarray_rot.explained_variance()
# Explained variance ratio
desired_expvar_ratio = numpy_rot.explained_variance_ratio()
actual_pandas_expvar_ratio = pandas_rot.explained_variance_ratio().squeeze()
actual_xarray_expvar_ratio = xarray_rot.explained_variance_ratio()
# PCs
desired_pcs = numpy_rot.pcs()
actual_pandas_pcs = pandas_rot.pcs().values
actual_xarray_pcs = xarray_rot.pcs().values
# EOFs
desired_eofs = numpy_rot.eofs()
actual_pandas_eofs = pandas_rot.eofs().values
actual_xarray_eofs = xarray_rot.eofs().values
# EOFs as correlation
desired_eofs_corr = numpy_rot.eofs_as_correlation()
actual_pandas_eofs_corr = pandas_rot.eofs_as_correlation()
actual_xarray_eofs_corr = xarray_rot.eofs_as_correlation()

np.testing.assert_allclose(actual_pandas_expvar, desired_expvar)
np.testing.assert_allclose(actual_pandas_expvar_ratio, desired_expvar_ratio)
np.testing.assert_allclose(actual_pandas_pcs, desired_pcs)
np.testing.assert_allclose(actual_pandas_eofs, desired_eofs)
np.testing.assert_allclose(actual_pandas_eofs_corr[0], desired_eofs_corr[0])
np.testing.assert_allclose(actual_pandas_eofs_corr[1], desired_eofs_corr[1])

np.testing.assert_allclose(actual_xarray_expvar, desired_expvar)
np.testing.assert_allclose(actual_xarray_expvar_ratio, desired_expvar_ratio)
np.testing.assert_allclose(actual_xarray_pcs, desired_pcs)
np.testing.assert_allclose(actual_xarray_eofs, desired_eofs)
np.testing.assert_allclose(actual_xarray_eofs_corr[0], desired_eofs_corr[0])
np.testing.assert_allclose(actual_xarray_eofs_corr[1], desired_eofs_corr[1])
2 changes: 1 addition & 1 deletion tests/pandas/test_eof.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
('EOF', True, None)
])
def test_solution(method, norm, weights, reference_solution, sample_DataFrame):
# Compare numpy implementation against reference solution
# Compare pandas implementation against reference solution
experiment = reference_solution.get_experiment(
method=method, norm=norm, weights=weights
)
Expand Down
25 changes: 24 additions & 1 deletion xeofs/models/_base_rotator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import numpy as np
import scipy as sc
from typing import Tuple

from .eof import EOF
from ..utils.rotation import promax
Expand Down Expand Up @@ -66,7 +68,7 @@ def __init__(
# Reorder according to variance
self._pcs = self._pcs[:, self._idx_var]

def _rotation_matrix(self, inverse_transpose : bool = False):
def _rotation_matrix(self, inverse_transpose : bool = False) -> np.ndarray:
'''Return the rotation matrix.
Parameters
Expand Down Expand Up @@ -111,3 +113,24 @@ def pcs(self) -> np.ndarray:
'''PCs after rotation.'''

return self._pcs

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
'''Correlation coefficients between rotated PCs and data matrix.
Returns
-------
Tuple[np.ndarray, np.ndarry]
Matrices of correlation coefficients and associated
two-sided p-values with features as rows and modes as columns.
'''

# Compute correlation matrix
corr = np.corrcoef(self._model.X, self._pcs, rowvar=False)
corr = corr[:self._model.n_features, self._model.n_features:]
# Compute two-sided p-values
# Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html#r8c6348c62346-1
a = self._model.n_samples / 2 - 1
dist = sc.stats.beta(a, a, loc=-1, scale=2)
pvals = 2 * dist.cdf(-abs(corr))
return corr, pvals
25 changes: 23 additions & 2 deletions xeofs/models/_eof_base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from abc import abstractmethod
from typing import Iterable, Optional
from typing import Optional, Tuple

import numpy as np
import scipy as sc
from sklearn.decomposition import PCA


Expand Down Expand Up @@ -165,3 +165,24 @@ def pcs(self) -> np.ndarray:
'''

return self._pcs

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
'''Correlation coefficients between PCs and data matrix.
Returns
-------
Tuple[np.ndarray, np.ndarry]
Matrices of correlation coefficients and associated
two-sided p-values with features as rows and modes as columns.
'''

# Compute correlation matrix
corr = np.corrcoef(self.X, self._pcs, rowvar=False)
corr = corr[:self.n_features, self.n_features:]
# Compute two-sided p-values
# Reference: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html#r8c6348c62346-1
a = self.n_samples / 2 - 1
dist = sc.stats.beta(a, a, loc=-1, scale=2)
pvals = 2 * dist.cdf(-abs(corr))
return corr, pvals
14 changes: 10 additions & 4 deletions xeofs/models/eof.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Union, Iterable
from typing import Optional, Union, Iterable, Tuple

import numpy as np

Expand Down Expand Up @@ -95,18 +95,24 @@ def __init__(
self._tf = _ArrayTransformer()
X = self._tf.fit_transform(X, axis=axis)
weights = self._tf.transform_weights(weights)

super().__init__(
X=X,
n_modes=n_modes,
norm=norm,
weights=weights
)

def eofs(self):
def eofs(self) -> np.ndarray:
eofs = super().eofs()
return self._tf.back_transform_eofs(eofs)

def pcs(self):
def pcs(self) -> np.ndarray:
pcs = super().pcs()
return self._tf.back_transform_pcs(pcs)

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
corr, pvals = super().eofs_as_correlation()
corr = self._tf.back_transform_eofs(corr)
pvals = self._tf.back_transform_eofs(pvals)
return corr, pvals
7 changes: 7 additions & 0 deletions xeofs/models/rotator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from typing import Tuple

from .eof import EOF
from ._base_rotator import _BaseRotator
Expand Down Expand Up @@ -52,3 +53,9 @@ def eofs(self) -> np.ndarray:
def pcs(self) -> np.ndarray:
pcs = super().pcs()
return self._model._tf.back_transform_pcs(pcs)

def eofs_as_correlation(self) -> Tuple[np.ndarray, np.ndarray]:
corr, pvals = super().eofs_as_correlation()
corr = self._model._tf.back_transform_eofs(corr)
pvals = self._model._tf.back_transform_eofs(pvals)
return corr, pvals
Loading

0 comments on commit 85960ab

Please sign in to comment.