docs: update docstrings

xarray-contrib · Feb 16, 2022 · e02b6ec · e02b6ec
1 parent 3745308
commit e02b6ec
Show file tree

Hide file tree

Showing 3 changed files with 271 additions and 10 deletions.
diff --git a/xeofs/models/eof.py b/xeofs/models/eof.py
@@ -13,6 +13,69 @@
 
 
 class EOF(_EOF_base):
+ '''EOF analysis of a single ``np.ndarray``.
+
+ Parameters
+ ----------
+ X : np.ndarray
+ Data to be decpomposed. ``X`` can be any N-dimensional array with the
+ first dimension containing the variable whose variance is to be
+ maximised. All remaining dimensions will be automatically reshaped to
+ obtain a 2D matrix. For example, given data of `n` time series of ``p1`` different
+ variables each measured at ``p2`` different locations, the data matrix
+ ``X`` of dimensions ``(n x p1 x p2)`` will maximise `temporal` variance.
+ In contrast, if provided as ``(p2 x n x p1)``, the `spatial` variance
+ will be maximised.
+ n_modes : Optional[int]
+ Number of modes to compute. Computing less modes can results in
+ performance gains. If None, then the maximum number of modes is
+ equivalent to ``min(n_samples, n_features)`` (the default is None).
+ norm : bool
+ Normalize each feature (e.g. grid cell) by its temporal standard
+ deviation (the default is False).
+
+
+ Examples
+ --------
+
+ Import package and create data:
+
+ >>> import numpy as np
+ >>> from xeofs.models import EOF
+ >>> rng = np.random.default_rng(7)
+ >>> X = rng.standard_normal((4, 2, 3))
+
+ Initialize standardized EOF analysis and compute the first 2 modes:
+
+ >>> model = EOF(X, norm=True, n_modes=2)
+ >>> model.solve()
+
+ Get explained variance:
+
+ >>> model.explained_variance()
+ ... array([4.81848833, 2.20765019])
+
+ Get EOFs:
+
+ >>> model.eofs()
+ ... array([[[ 0.52002146, -0.00698575],
+ ... [ 0.41059796, -0.31603563]],
+ ...
+ ... [[ 0.32091783, 0.47647144],
+ ... [-0.51771611, 0.01380736]],
+ ...
+ ... [[-0.28840959, 0.63341346],
+ ... [ 0.32678537, 0.52119516]]])
+
+ Get PCs:
+
+ >>> model.pcs()
+ ... array([[ 1.76084189, -0.92030205],
+ ... [-2.13581896, -1.49704775],
+ ... [ 2.02091078, 0.65502667],
+ ... [-1.64593371, 1.76232312]])
+
+ '''
 
  def __init__(
  self,
@@ -29,12 +92,31 @@ def __init__(
 
  super().__init__(
  X=self.X,
- Y=None,
  n_modes=n_modes,
  norm=norm
  )
 
- def solve(self):
+ def solve(self) -> None:
+ '''
+ Perform the EOF analysis.
+
+ To boost performance, the standard solver is based on
+ the PCA implementation of scikit-learn [1]_ which uses different algorithms
+ to perform the decomposition based on the data matrix size.
+
+ Naive approaches using singular value decomposition of the
+ data matrix ``X (n x p)`` or the covariance matrix ``C (p x p)``
+ quickly become infeasable computationally when the number of
+ samples :math:`n` or features :math:`p` increase (computational power increases
+ by :math:`O(n^2p)` and :math:`O(p^3)`, respectively.)
+
+
+ References
+ ----------
+ .. [1] https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
+
+ '''
+
  pca = PCA(n_components=self.n_modes)
  self._pcs = pca.fit_transform(self.X)
  self._singular_values = pca.singular_values_
@@ -48,17 +130,58 @@ def solve(self):
  self._eofs *= flip_signs
  self._pcs *= flip_signs
 
- def singular_values(self):
+ def singular_values(self) -> np.ndarray:
+ '''Get the singular values.
+
+ The `i` th singular value :math:`\sigma_i` is defined by
+
+ .. math::
+ \sigma_i = \sqrt{n \lambda_i}
+
+ where :math:`\lambda_i` and :math:`n` are the associated eigenvalues
+ and the number of samples, respectively.
+
+ '''
+
  return self._singular_values
 
  def explained_variance(self):
+ '''Get the explained variance.
+
+ The explained variance is simply given by the individual eigenvalues
+ of the covariance matrix.
+
+ '''
+
  return self._explained_variance
 
  def explained_variance_ratio(self):
+ '''Get the explained variance ratio.
+
+ The explained variance ratio is the fraction of total variance
+ explained by a given mode and is calculated by :math:`\lambda_i / \sum_i^m \lambda_i`
+ where `m` is the total number of modes.
+
+ '''
+
  return self._explained_variance_ratio
 
  def eofs(self):
+ '''Get the EOFs.
+
+ The empirical orthogonal functions (EOFs) are equivalent to the eigenvectors
+ of the covariance matrix of `X`.
+
+ '''
+
  return self._arr_tf.back_transform(self._eofs.T).T
 
  def pcs(self):
+ '''Get the PCs.
+
+ The principal components (PCs), also known as PC scores, are computed
+ by projecting the data matrix `X` onto the eigenvectors.
+
+ '''
+
  return self._pcs
diff --git a/xeofs/pandas/eof.py b/xeofs/pandas/eof.py
@@ -8,6 +8,64 @@
 
 
 class EOF(models.eof.EOF):
+ '''EOF analysis of a single ``pd.DataFrame``.
+
+ Parameters
+ ----------
+ X : pd.DataFrame
+ Data to be decpomposed. First dimension contains the variable whose variance is to be
+ maximised.
+ n_modes : Optional[int]
+ Number of modes to compute. Computing less modes can results in
+ performance gains. If None, then the maximum number of modes is
+ equivalent to ``min(n_samples, n_features)`` (the default is None).
+ norm : bool
+ Normalize each feature (e.g. grid cell) by its temporal standard
+ deviation (the default is False).
+
+
+ Examples
+ --------
+
+ Import package and create data:
+
+ >>> import pandas as pd
+ >>> from xeofs.pandas import EOF
+ >>> rng = np.random.default_rng(7)
+ >>> X = rng.standard_normal((4, 3))
+ >>> df = pd.DataFrame(X)
+
+ Initialize standardized EOF analysis and compute the first 2 modes:
+
+ >>> model = EOF(df, norm=True, n_modes=2)
+ >>> model.solve()
+
+ Get explained variance:
+
+ >>> model.explained_variance()
+ ... explained_variance
+ ... mode
+ ... 1 2.562701
+ ... 2 1.167054
+
+ Get EOFs:
+
+ >>> model.eofs()
+ ... mode 1 2
+ ... 0 0.626041 -0.428431
+ ... 1 0.677121 -0.115737
+ ... 2 0.386755 0.896132
+
+ Get PCs:
+
+ >>> model.pcs()
+ ... mode 1 2
+ ... 0 0.495840 -0.221963
+ ... 1 -2.254508 -0.470420
+ ... 2 1.516900 -0.876695
+ ... 3 0.241768 1.569078
+
+ '''
 
  def __init__(
  self,
@@ -24,7 +82,6 @@ def __init__(
 
  super().__init__(
  X=X,
- Y=None,
  n_modes=n_modes,
  norm=norm
  )

diff --git a/xeofs/xarray/eof.py b/xeofs/xarray/eof.py
@@ -8,6 +8,88 @@
 
 
 class EOF(models.eof.EOF):
+ '''EOF analysis of a single ``xr.DataArray``.
+
+ Parameters
+ ----------
+ X : xr.DataArray
+ Data to be decomposed.
+ n_modes : Optional[int]
+ Number of modes to compute. Computing less modes can results in
+ performance gains. If None, then the maximum number of modes is
+ equivalent to ``min(n_samples, n_features)`` (the default is None).
+ norm : bool
+ Normalize each feature (e.g. grid cell) by its temporal standard
+ deviation (the default is False).
+ dim : str
+ Define the dimension which should considered for maximising variance.
+ For most applications in climate science, temporal variance is
+ maximised (also known as S-mode EOF analysis) i.e. the time dimension
+ should be chosen (the default is ``time``).
+
+ Examples
+ --------
+
+ Import package and create data:
+
+ >>> import xarray as xr
+ >>> from xeofs.xarray import EOF
+ >>> da = xr.tutorial.load_dataset('rasm')['Tair']
+ >>> da = xr.tutorial.load_dataset('air_temperature')['air']
+ >>> da = da.isel(lon=slice(0, 3), lat=slice(0, 2))
+
+ Initialize standardized EOF analysis and compute the first 2 modes:
+
+ >>> model = EOF(da, norm=True, n_modes=2)
+ >>> model.solve()
+
+ Get explained variance:
+
+ >>> model.explained_variance()
+ ... xarray.DataArray'explained_variance'mode: 2
+ ... array([5.8630486 , 0.12335653], dtype=float32)
+ ... Coordinates:
+ ... mode (mode) int64 1 2
+ ... Attributes:
+ ... (0)
+
+ Get EOFs:
+
+ >>> model.eofs()
+ ... xarray.DataArray 'EOFs' lon: 3 lat: 2 mode: 2
+ ... array([[[ 0.4083837 , -0.39021498],
+ ... [ 0.40758175, 0.42474997]],
+ ...
+ ... [[ 0.40875185, -0.40969774],
+ ... [ 0.40863484, 0.41475943]],
+ ...
+ ... [[ 0.40776297, -0.4237887 ],
+ ... [ 0.40837321, 0.38450614]]])
+ ... Coordinates:
+ ... lat (lat) float32 75.0 72.5
+ ... lon (lon) float32 200.0 202.5 205.0
+ ... mode (mode) int64 1 2
+ ... Attributes:
+ ... (0)
+
+ Get PCs:
+
+ >>> model.pcs()
+ ... xarray.DataArray'PCs'time: 2920mode: 2
+ ... array([[-3.782707 , -0.07754549],
+ ... [-3.7966802 , -0.13775176],
+ ... [-3.7969239 , -0.05770111],
+ ... ...,
+ ... [-3.2584608 , 0.3592216 ],
+ ... [-3.031799 , 0.2055658 ],
+ ... [-3.0840495 , 0.25031802]], dtype=float32)
+ ... Coordinates:
+ ... time (time) datetime64[ns] 2013-01-01 ... 2014-12-31T18:00:00
+ ... mode (mode) int64 1 2
+ ... Attributes:
+ ... (0)
+
+ '''
 
  def __init__(
  self,
@@ -25,14 +107,13 @@ def __init__(
 
  super().__init__(
  X=X,
- Y=None,
  n_modes=n_modes,
  norm=norm
  )
  self._mode_idx = xr.IndexVariable('mode', range(1, self.n_modes + 1))
  self._dim = dim
 
- def singular_values(self):
+ def singular_values(self) -> xr.DataArray:
  svalues = super().singular_values()
  return xr.DataArray(
  svalues,
@@ -41,7 +122,7 @@ def singular_values(self):
  name='singular_values'
  )
 
- def explained_variance(self):
+ def explained_variance(self) -> xr.DataArray:
  expvar = super().explained_variance()
  return xr.DataArray(
  expvar,
@@ -50,7 +131,7 @@ def explained_variance(self):
  name='explained_variance'
  )
 
- def explained_variance_ratio(self):
+ def explained_variance_ratio(self) -> xr.DataArray:
  expvar = super().explained_variance_ratio()
  return xr.DataArray(
  expvar,
@@ -59,15 +140,15 @@ def explained_variance_ratio(self):
  name='explained_variance_ratio'
  )
 
- def eofs(self):
+ def eofs(self) -> xr.DataArray:
  eofs = self._eofs
  eofs = self._da_tf.back_transform(eofs.T).T
  eofs = eofs.rename({self._dim : 'mode'})
  eofs = eofs.assign_coords({'mode' : self._mode_idx})
  eofs.name = 'EOFs'
  return eofs
 
- def pcs(self):
+ def pcs(self) -> xr.DataArray:
  pcs = super().pcs()
  coords = {
  self._dim : self._da_tf.coords_in[self._dim],