feat!: implement different rules for step size in derivatives approx

Step size can heavily influence the solving process based on the derivatives. For that reason, derivative approximations now allow to choose the relative epsilon value and step size computation and experiment with it to achieve the best results for the specific use case. Short notes about the implemented rules: * Fixed -- Using the machine precision without any scaling. * ValueScaled -- Scaling the machine precision by the current value, which avoids small steps for large variables and "big" steps for very small variables. This is often recommended on the internet. * AbsValueScaled -- A variation of the previous, which always uses positive value of the step. This "rule" is used in GSL. It is the default, because it was one of the rules for which our current test for scaled rosenbrock passed, together with Fixed (which I consider naive) and other always-positive step size rules (from which this is the most standard, more on that below). The real reason why the scaled rosenbrock test fails is that, when negative step size is used, it results in singular Jacobian in the first iteration, forcing the trust region algorithm to use LM step, which doesn't behave well apparently. * ValueOrMagScaled -- Scaling the machine precision by the absolute value of the variable or typical magnitude, whatever is larger, to avoid problems when the variable is close to zero. This "rule" is used in Numerical Methods for Unconstrained Optimization and Nonlinear Equations by Dennis and Schnabel. * AbsValueOrMagScaled -- A variation of the previous, which always uses positive value of the step. * ValueOrMagTimesValueScaled -- A variation of the next rule, which uses the sign of the variable value for the sign of the step size. * AbsValueOrMagTimesAbsValueScaled -- This is actually the original rule used in gomez, implemented by accident by swapping the arguments in copysign function (see 4161c56). So it was a bug, but for some reason from all these rules it works the best for my use case. I assume it relates to using bigger step size (if x > 1) and smaller step size (if x < 1). Nevertheless, using max(|x|, mag) * x is seems important, because variants like x^2 * sign(x) or similar did not work well. Anyway, I consider it non-standard, because it isn't based on any literature, but I want to have it in gomez, because it works well for me.
datamole-ai · Nov 16, 2023 · 2ee24f0 · 2ee24f0
1 parent a9d1ad9
commit 2ee24f0
Show file tree

Hide file tree

Showing 4 changed files with 152 additions and 51 deletions.
diff --git a/src/algo/trust_region.rs b/src/algo/trust_region.rs
@@ -37,7 +37,7 @@ use thiserror::Error;
 
 use crate::{
  core::{Domain, Function, Optimizer, Problem, RealField as _, Solver, System},
- derivatives::{Gradient, Hessian, Jacobian},
+ derivatives::{Gradient, Hessian, Jacobian, StepRule},
 };
 
 /// Specification for initial value of trust region size.
@@ -77,6 +77,14 @@ pub struct TrustRegionOptions<P: Problem> {
  /// Determines whether steps that increase the error can be accepted.
  /// Default: `true`.
  allow_ascent: bool,
+ /// Relative epsilon used in Jacobian and gradient computations.
+ /// Default: `sqrt(EPSILON)`.
+ eps_sqrt: P::Field,
+ /// Relative epsilon used in Hessian computations.
+ /// Default: `cbrt(EPSILON)`.
+ eps_cbrt: P::Field,
+ /// Step rule used in Jacobian, gradient and Hessian computations.
+ step_rule: StepRule,
  #[getset(skip)]
  prefer_greater_magnitude_in_cauchy: bool,
 }
@@ -111,6 +119,9 @@ impl<P: Problem> Default for TrustRegionOptions<P> {
  rejections_thresh: 10,
  allow_ascent: true,
  prefer_greater_magnitude_in_cauchy: false,
+ eps_sqrt: P::Field::EPSILON_SQRT,
+ eps_cbrt: P::Field::EPSILON_CBRT,
+ step_rule: StepRule::default(),
  }
  }
 }
@@ -225,6 +236,8 @@ impl<R: System> Solver<R> for TrustRegion<R> {
  rejections_thresh,
  allow_ascent,
  prefer_greater_magnitude_in_cauchy,
+ eps_sqrt,
+ step_rule,
  ..
  } = self.options;
 
@@ -264,7 +277,7 @@ impl<R: System> Solver<R> for TrustRegion<R> {
 
  // Compute r(x) and r'(x).
  r.eval(x, rx);
- jac.compute(r, x, scale, rx);
+ jac.compute(r, x, scale, rx, eps_sqrt, step_rule);
 
  let rx_norm = rx.norm();
 
@@ -706,6 +719,9 @@ impl<F: Function> Optimizer<F> for TrustRegion<F> {
  accept_thresh,
  rejections_thresh,
  allow_ascent,
+ eps_sqrt,
+ eps_cbrt,
+ step_rule,
  ..
  } = self.options;
 
@@ -742,8 +758,8 @@ impl<F: Function> Optimizer<F> for TrustRegion<F> {
 
  // Compute f(x), grad f(x) and H(x).
  let mut fx = f.apply(x);
- grad.compute(f, x, scale, fx);
- hes.compute(f, x, scale, fx);
+ grad.compute(f, x, scale, fx, eps_sqrt, step_rule);
+ hes.compute(f, x, scale, fx, eps_cbrt, step_rule);
 
  let estimate_delta = *delta == zero;
  if estimate_delta {

diff --git a/src/analysis.rs b/src/analysis.rs
@@ -1,13 +1,12 @@
 //! Various supporting analyses.
 
 use nalgebra::{
- convert, storage::StorageMut, ComplexField, DimName, Dyn, IsContiguous, OVector, RealField,
- Vector, U1,
+ convert, storage::StorageMut, ComplexField, DimName, Dyn, IsContiguous, OVector, Vector, U1,
 };
 
 use crate::{
- core::{Domain, RealField as _, System},
- derivatives::Jacobian,
+ core::{Domain, RealField, System},
+ derivatives::{Jacobian, StepRule},
 };
 
 /// Estimates magnitude of the variable given lower and upper bounds.
@@ -55,7 +54,14 @@ where
 
  // Compute r'(x) in the initial point.
  r.eval(x, rx);
- let jac1 = Jacobian::new(r, x, &scale, rx);
+ let jac1 = Jacobian::new(
+ r,
+ x,
+ &scale,
+ rx,
+ R::Field::EPSILON_SQRT,
+ StepRule::default(),
+ );
 
  // Compute Newton step.
  let mut p = rx.clone_owned();
@@ -70,7 +76,14 @@ where
 
  // Compute r'(x) after one Newton step.
  r.eval(x, rx);
- let jac2 = Jacobian::new(r, x, &scale, rx);
+ let jac2 = Jacobian::new(
+ r,
+ x,
+ &scale,
+ rx,
+ R::Field::EPSILON_SQRT,
+ StepRule::default(),
+ );
 
  // Linear variables have no effect on the Jacobian matrix. They can be
  // recognized by observing no change in corresponding columns (i.e.,

diff --git a/src/core/domain.rs b/src/core/domain.rs
@@ -5,10 +5,10 @@ use std::iter::FromIterator;
 use fastrand::Rng;
 use na::{Dim, DimName};
 use nalgebra as na;
-use nalgebra::{storage::StorageMut, OVector, RealField, Vector};
+use nalgebra::{storage::StorageMut, OVector, Vector};
 
 use crate::analysis::estimate_magnitude_from_bounds;
-use crate::core::Sample;
+use crate::core::{RealField, Sample};
 
 /// Domain for a problem.
 #[derive(Clone)]