From e42cfa22dbf95c4c79542c19c1e119247c6b184b Mon Sep 17 00:00:00 2001 From: Patrick Stevens Date: Sun, 7 May 2023 21:49:25 +0100 Subject: [PATCH] Move gradient descent to lib (#20) --- Cargo.lock | 1 + little_learner/Cargo.toml | 1 + little_learner/src/gradient_descent.rs | 408 +++++++++++++++++ .../src/hyper.rs | 4 +- little_learner/src/lib.rs | 3 + little_learner/src/loss.rs | 42 -- .../src/sample.rs | 0 little_learner_app/src/main.rs | 413 +----------------- 8 files changed, 422 insertions(+), 450 deletions(-) create mode 100644 little_learner/src/gradient_descent.rs rename {little_learner_app => little_learner}/src/hyper.rs (97%) rename {little_learner_app => little_learner}/src/sample.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 0f22f90..685a390 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -72,6 +72,7 @@ version = "0.1.0" dependencies = [ "immutable-chunkmap", "ordered-float", + "rand", ] [[package]] diff --git a/little_learner/Cargo.toml b/little_learner/Cargo.toml index b32090b..16221cc 100644 --- a/little_learner/Cargo.toml +++ b/little_learner/Cargo.toml @@ -8,5 +8,6 @@ edition = "2021" [dependencies] immutable-chunkmap = "1.0.5" ordered-float = "3.6.0" +rand = "0.8.5" [lib] diff --git a/little_learner/src/gradient_descent.rs b/little_learner/src/gradient_descent.rs new file mode 100644 index 0000000..f136352 --- /dev/null +++ b/little_learner/src/gradient_descent.rs @@ -0,0 +1,408 @@ +use crate::auto_diff::{grad, Differentiable, RankedDifferentiable}; +use crate::hyper::BaseGradientDescentHyper; +use crate::loss::{l2_loss_2, Predictor}; +use crate::sample::sample2; +use crate::traits::NumLike; +use rand::Rng; +use std::hash::Hash; + +fn iterate(mut f: F, start: A, n: u32) -> A +where + F: FnMut(A) -> A, +{ + let mut v = start; + for _ in 0..n { + v = f(v); + } + v +} + +/// `adjust` takes the previous value and a delta, and returns a deflated new value. +fn general_gradient_descent_step< + A, + F, + Inflated, + Deflate, + Adjust, + Hyper, + const RANK: usize, + const PARAM_NUM: usize, +>( + f: &mut F, + theta: [Inflated; PARAM_NUM], + deflate: Deflate, + hyper: Hyper, + mut adjust: Adjust, +) -> [Inflated; PARAM_NUM] +where + A: Clone + NumLike + Hash + Eq, + F: FnMut(&[Differentiable; PARAM_NUM]) -> RankedDifferentiable, + Deflate: FnMut(Inflated) -> Differentiable, + Inflated: Clone, + Hyper: Clone, + Adjust: FnMut(Inflated, &Differentiable, Hyper) -> Inflated, +{ + let deflated = theta.clone().map(deflate); + let delta = grad(f, &deflated); + let mut i = 0; + theta.map(|inflated| { + let delta = &delta[i]; + i += 1; + adjust(inflated, delta, hyper.clone()) + }) +} + +pub fn gradient_descent< + 'a, + T, + R, + Point, + F, + G, + H, + Inflated, + Hyper, + ImmutableHyper, + const IN_SIZE: usize, + const PARAM_NUM: usize, +>( + hyper: Hyper, + xs: &'a [Point], + to_ranked_differentiable: G, + ys: &[T], + zero_params: [Differentiable; PARAM_NUM], + mut predictor: Predictor, ImmutableHyper>, + to_immutable: H, +) -> [Differentiable; PARAM_NUM] +where + T: NumLike + Hash + Copy + Default, + Point: 'a + Copy, + F: Fn( + RankedDifferentiable, + &[Differentiable; PARAM_NUM], + ) -> RankedDifferentiable, + G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable, + Inflated: Clone, + ImmutableHyper: Clone, + Hyper: Into>, + H: FnOnce(&Hyper) -> ImmutableHyper, + R: Rng, +{ + let sub_hypers = to_immutable(&hyper); + let mut gradient_hyper: BaseGradientDescentHyper = hyper.into(); + let iterations = gradient_hyper.iterations; + let out = iterate( + |theta| { + general_gradient_descent_step( + &mut |x| match gradient_hyper.sampling.as_mut() { + None => RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( + l2_loss_2( + &predictor.predict, + to_ranked_differentiable(xs), + RankedDifferentiable::of_slice(ys), + x, + ), + )]), + Some((rng, batch_size)) => { + let (sampled_xs, sampled_ys) = sample2(rng, *batch_size, xs, ys); + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( + l2_loss_2( + &predictor.predict, + to_ranked_differentiable(&sampled_xs), + RankedDifferentiable::of_slice(&sampled_ys), + x, + ), + )]) + } + }, + theta, + predictor.deflate, + sub_hypers.clone(), + predictor.update, + ) + }, + zero_params.map(predictor.inflate), + iterations, + ); + out.map(&mut predictor.deflate) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::auto_diff::RankedDifferentiableTagged; + use crate::hyper::{RmsGradientDescentHyper, VelocityGradientDescentHyper}; + use crate::loss::{ + naked_predictor, predict_line_2_unranked, predict_plane, predict_quadratic_unranked, + rms_predictor, velocity_predictor, + }; + use crate::not_nan::{to_not_nan_1, to_not_nan_2}; + use crate::scalar::Scalar; + use crate::traits::Zero; + use ordered_float::NotNan; + use rand::rngs::StdRng; + use rand::SeedableRng; + + #[test] + fn test_iterate() { + let f = |t: [i32; 3]| t.map(|i| i - 3); + assert_eq!(iterate(f, [1, 2, 3], 5u32), [-14, -13, -12]); + } + + #[test] + fn first_optimisation_test() { + let xs = [2.0, 1.0, 4.0, 3.0]; + let ys = [1.8, 1.2, 4.2, 3.3]; + + let zero = Scalar::>::zero(); + + let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000); + let iterated = { + let xs = to_not_nan_1(xs); + let ys = to_not_nan_1(ys); + let zero_params = [ + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero).to_unranked(), + ]; + gradient_descent( + hyper, + &xs, + |b| RankedDifferentiable::of_slice(b), + &ys, + zero_params, + naked_predictor(predict_line_2_unranked), + BaseGradientDescentHyper::to_immutable, + ) + }; + let iterated = iterated + .into_iter() + .map(|x| x.into_scalar().real_part().into_inner()) + .collect::>(); + + assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]); + } + + #[test] + fn optimise_quadratic() { + let xs = [-1.0, 0.0, 1.0, 2.0, 3.0]; + let ys = [2.55, 2.1, 4.35, 10.2, 18.25]; + + let zero = Scalar::>::zero(); + + let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); + + let iterated = { + let xs = to_not_nan_1(xs); + let ys = to_not_nan_1(ys); + let zero_params = [ + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero).to_unranked(), + ]; + gradient_descent( + hyper, + &xs, + |b| RankedDifferentiable::of_slice(b), + &ys, + zero_params, + naked_predictor(predict_quadratic_unranked), + BaseGradientDescentHyper::to_immutable, + ) + }; + let iterated = iterated + .into_iter() + .map(|x| x.into_scalar().real_part().into_inner()) + .collect::>(); + + assert_eq!( + iterated, + [2.0546423148479684, 0.9928606519360353, 1.4787394427094362] + ); + } + + const PLANE_XS: [[f64; 2]; 6] = [ + [1.0, 2.05], + [1.0, 3.0], + [2.0, 2.0], + [2.0, 3.91], + [3.0, 6.13], + [4.0, 8.09], + ]; + const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; + + #[test] + fn optimise_plane() { + let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); + + let iterated = { + let xs = to_not_nan_2(PLANE_XS); + let ys = to_not_nan_1(PLANE_YS); + let zero_params = [ + RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(), + Differentiable::of_scalar(Scalar::zero()), + ]; + gradient_descent( + hyper, + &xs, + RankedDifferentiable::of_slice_2::<_, 2>, + &ys, + zero_params, + naked_predictor(predict_plane), + BaseGradientDescentHyper::to_immutable, + ) + }; + + let [theta0, theta1] = iterated; + + let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); + let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); + + assert_eq!(theta0.collect(), [3.97757644609063, 2.0496557321494446]); + assert_eq!( + theta1.to_scalar().real_part().into_inner(), + 5.786758464448078 + ); + } + + #[test] + fn optimise_plane_with_sampling() { + let rng = StdRng::seed_from_u64(314159); + let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) + .with_rng(rng, 4); + + let iterated = { + let xs = to_not_nan_2(PLANE_XS); + let ys = to_not_nan_1(PLANE_YS); + let zero_params = [ + RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(), + Differentiable::of_scalar(Scalar::zero()), + ]; + gradient_descent( + hyper, + &xs, + RankedDifferentiable::of_slice_2::<_, 2>, + &ys, + zero_params, + naked_predictor(predict_plane), + BaseGradientDescentHyper::to_immutable, + ) + }; + + let [theta0, theta1] = iterated; + + let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor").collect(); + let theta1 = theta1 + .attach_rank::<0>() + .expect("rank 0 tensor") + .to_scalar() + .real_part() + .into_inner(); + + /* + Mathematica code to verify by eye that the optimisation gave a reasonable result: + + xs = {{1.0, 2.05}, {1.0, 3.0}, {2.0, 2.0}, {2.0, 3.91}, {3.0, + 6.13}, {4.0, 8.09}}; + ys = {13.99, 15.99, 18.0, 22.4, 30.2, 37.94}; + points = ListPointPlot3D[Append @@@ Transpose[{xs, ys}]]; + + withoutBatching0 = {3.97757644609063, 2.0496557321494446}; + withoutBatching1 = 5.2839863438547159; + withoutBatching = + Plot3D[{x, y} . withoutBatching0 + withoutBatching1, {x, 0, 4}, {y, + 0, 8}]; + + withBatching0 = {3.8581694055684781, 2.2166222673968554}; + withBatching1 = 5.2399202468216668; + withBatching = + Plot3D[{x, y} . withBatching0 + withBatching1, {x, 0, 4}, {y, 0, 8}]; + + Show[points, withoutBatching] + + Show[points, withBatching] + */ + + assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]); + assert_eq!(theta1, 5.2839863438547159); + } + + #[test] + fn test_with_velocity() { + let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) + .with_mu(NotNan::new(0.9).expect("not nan")); + + let iterated = { + let xs = to_not_nan_2(PLANE_XS); + let ys = to_not_nan_1(PLANE_YS); + let zero_params = [ + RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()]) + .to_unranked(), + Differentiable::of_scalar(Scalar::zero()), + ]; + + gradient_descent( + hyper, + &xs, + RankedDifferentiableTagged::of_slice_2::<_, 2>, + &ys, + zero_params, + velocity_predictor(predict_plane), + VelocityGradientDescentHyper::to_immutable, + ) + }; + + let [theta0, theta1] = iterated; + + let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); + let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); + + assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]); + assert_eq!( + theta1.to_scalar().real_part().into_inner(), + 6.169579045974949 + ); + } + + #[test] + fn test_with_rms() { + let beta = NotNan::new(0.9).expect("not nan"); + let stabilizer = NotNan::new(0.00000001).expect("not nan"); + let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000) + .with_stabilizer(stabilizer) + .with_beta(beta); + + let iterated = { + let xs = to_not_nan_2(PLANE_XS); + let ys = to_not_nan_1(PLANE_YS); + let zero_params = [ + RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()]) + .to_unranked(), + Differentiable::of_scalar(Scalar::zero()), + ]; + + gradient_descent( + hyper, + &xs, + RankedDifferentiableTagged::of_slice_2::<_, 2>, + &ys, + zero_params, + rms_predictor(predict_plane), + RmsGradientDescentHyper::to_immutable, + ) + }; + + let [theta0, theta1] = iterated; + + let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); + let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); + + let fitted_theta0 = theta0 + .collect() + .iter() + .map(|x| x.into_inner()) + .collect::>(); + let fitted_theta1 = theta1.to_scalar().real_part().into_inner(); + assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]); + assert_eq!(fitted_theta1, 6.1642229831811681); + } +} diff --git a/little_learner_app/src/hyper.rs b/little_learner/src/hyper.rs similarity index 97% rename from little_learner_app/src/hyper.rs rename to little_learner/src/hyper.rs index 54502bc..58b203d 100644 --- a/little_learner_app/src/hyper.rs +++ b/little_learner/src/hyper.rs @@ -1,5 +1,5 @@ -use little_learner::loss::{NakedHypers, RmsHyper, VelocityHypers}; -use little_learner::traits::{NumLike, Zero}; +use crate::loss::{NakedHypers, RmsHyper, VelocityHypers}; +use crate::traits::{NumLike, Zero}; use rand::{rngs::StdRng, Rng}; pub struct BaseGradientDescentHyper { diff --git a/little_learner/src/lib.rs b/little_learner/src/lib.rs index ad4f0d8..c163e0d 100644 --- a/little_learner/src/lib.rs +++ b/little_learner/src/lib.rs @@ -5,8 +5,11 @@ pub mod auto_diff; pub mod const_teq; pub mod expr_syntax_tree; +pub mod gradient_descent; +pub mod hyper; pub mod loss; pub mod not_nan; +pub mod sample; pub mod scalar; pub mod smooth; pub mod tensor; diff --git a/little_learner/src/loss.rs b/little_learner/src/loss.rs index 3c1146d..22766e0 100644 --- a/little_learner/src/loss.rs +++ b/little_learner/src/loss.rs @@ -245,12 +245,6 @@ pub struct Predictor { pub update: fn(Inflated, &Deflated, Params) -> Inflated, } -type ParameterPredictor = - fn( - RankedDifferentiable, - &[Differentiable; THETA], - ) -> RankedDifferentiable; - #[derive(Clone)] pub struct NakedHypers { pub learning_rate: A, @@ -343,42 +337,6 @@ where } } -pub const fn plane_predictor( -) -> Predictor, Differentiable, Differentiable, NakedHypers> -where - T: NumLike + Default, -{ - naked_predictor(predict_plane) -} - -pub const fn velocity_plane_predictor() -> Predictor< - ParameterPredictor, - DifferentiableTagged, - Differentiable, - VelocityHypers, -> -where - T: NumLike + Default, -{ - velocity_predictor(predict_plane) -} - -pub const fn line_unranked_predictor( -) -> Predictor, Differentiable, Differentiable, NakedHypers> -where - T: NumLike + Default + Copy, -{ - naked_predictor(predict_line_2_unranked) -} - -pub const fn quadratic_unranked_predictor( -) -> Predictor, Differentiable, Differentiable, NakedHypers> -where - T: NumLike + Default, -{ - naked_predictor(predict_quadratic_unranked) -} - #[cfg(test)] mod test_loss { use crate::auto_diff::RankedDifferentiable; diff --git a/little_learner_app/src/sample.rs b/little_learner/src/sample.rs similarity index 100% rename from little_learner_app/src/sample.rs rename to little_learner/src/sample.rs diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs index 4a6e99a..5dd8178 100644 --- a/little_learner_app/src/main.rs +++ b/little_learner_app/src/main.rs @@ -1,145 +1,18 @@ #![allow(incomplete_features)] #![feature(generic_const_exprs)] -mod hyper; -mod sample; mod with_tensor; -use core::hash::Hash; -use rand::Rng; +use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged}; -use little_learner::auto_diff::{ - grad, Differentiable, RankedDifferentiable, RankedDifferentiableTagged, -}; - -use crate::hyper::{BaseGradientDescentHyper, VelocityGradientDescentHyper}; -use crate::sample::sample2; -use little_learner::loss::{l2_loss_2, velocity_plane_predictor, Predictor}; +use little_learner::gradient_descent::gradient_descent; +use little_learner::hyper::VelocityGradientDescentHyper; +use little_learner::loss::{predict_plane, velocity_predictor}; use little_learner::not_nan::{to_not_nan_1, to_not_nan_2}; use little_learner::scalar::Scalar; -use little_learner::traits::{NumLike, Zero}; +use little_learner::traits::Zero; use ordered_float::NotNan; -fn iterate(mut f: F, start: A, n: u32) -> A -where - F: FnMut(A) -> A, -{ - let mut v = start; - for _ in 0..n { - v = f(v); - } - v -} - -/// `adjust` takes the previous value and a delta, and returns a deflated new value. -fn general_gradient_descent_step< - A, - F, - Inflated, - Deflate, - Adjust, - Hyper, - const RANK: usize, - const PARAM_NUM: usize, ->( - f: &mut F, - theta: [Inflated; PARAM_NUM], - deflate: Deflate, - hyper: Hyper, - mut adjust: Adjust, -) -> [Inflated; PARAM_NUM] -where - A: Clone + NumLike + Hash + Eq, - F: FnMut(&[Differentiable; PARAM_NUM]) -> RankedDifferentiable, - Deflate: FnMut(Inflated) -> Differentiable, - Inflated: Clone, - Hyper: Clone, - Adjust: FnMut(Inflated, &Differentiable, Hyper) -> Inflated, -{ - let deflated = theta.clone().map(deflate); - let delta = grad(f, &deflated); - let mut i = 0; - theta.map(|inflated| { - let delta = &delta[i]; - i += 1; - adjust(inflated, delta, hyper.clone()) - }) -} - -fn gradient_descent< - 'a, - T, - R: Rng, - Point, - F, - G, - H, - Inflated, - Hyper, - ImmutableHyper, - const IN_SIZE: usize, - const PARAM_NUM: usize, ->( - hyper: Hyper, - xs: &'a [Point], - to_ranked_differentiable: G, - ys: &[T], - zero_params: [Differentiable; PARAM_NUM], - mut predictor: Predictor, ImmutableHyper>, - to_immutable: H, -) -> [Differentiable; PARAM_NUM] -where - T: NumLike + Hash + Copy + Default, - Point: 'a + Copy, - F: Fn( - RankedDifferentiable, - &[Differentiable; PARAM_NUM], - ) -> RankedDifferentiable, - G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable, - Inflated: Clone, - ImmutableHyper: Clone, - Hyper: Into>, - H: FnOnce(&Hyper) -> ImmutableHyper, -{ - let sub_hypers = to_immutable(&hyper); - let mut gradient_hyper: BaseGradientDescentHyper = hyper.into(); - let iterations = gradient_hyper.iterations; - let out = iterate( - |theta| { - general_gradient_descent_step( - &mut |x| match gradient_hyper.sampling.as_mut() { - None => RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( - l2_loss_2( - &predictor.predict, - to_ranked_differentiable(xs), - RankedDifferentiable::of_slice(ys), - x, - ), - )]), - Some((rng, batch_size)) => { - let (sampled_xs, sampled_ys) = sample2(rng, *batch_size, xs, ys); - RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( - l2_loss_2( - &predictor.predict, - to_ranked_differentiable(&sampled_xs), - RankedDifferentiable::of_slice(&sampled_ys), - x, - ), - )]) - } - }, - theta, - predictor.deflate, - sub_hypers.clone(), - predictor.update, - ) - }, - zero_params.map(predictor.inflate), - iterations, - ); - out.map(&mut predictor.deflate) -} - fn main() { let plane_xs = [ [1.0, 2.05], @@ -169,7 +42,7 @@ fn main() { RankedDifferentiableTagged::of_slice_2::<_, 2>, &ys, zero_params, - velocity_plane_predictor(), + velocity_predictor(predict_plane), VelocityGradientDescentHyper::to_immutable, ) }; @@ -187,276 +60,4 @@ fn main() { } #[cfg(test)] -mod tests { - use super::*; - use crate::hyper::RmsGradientDescentHyper; - use little_learner::loss::{ - line_unranked_predictor, plane_predictor, predict_plane, quadratic_unranked_predictor, - rms_predictor, - }; - use rand::rngs::StdRng; - use rand::SeedableRng; - - #[test] - fn test_iterate() { - let f = |t: [i32; 3]| t.map(|i| i - 3); - assert_eq!(iterate(f, [1, 2, 3], 5u32), [-14, -13, -12]); - } - - #[test] - fn first_optimisation_test() { - let xs = [2.0, 1.0, 4.0, 3.0]; - let ys = [1.8, 1.2, 4.2, 3.3]; - - let zero = Scalar::>::zero(); - - let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000); - let iterated = { - let xs = to_not_nan_1(xs); - let ys = to_not_nan_1(ys); - let zero_params = [ - RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), - RankedDifferentiable::of_scalar(zero).to_unranked(), - ]; - gradient_descent( - hyper, - &xs, - |b| RankedDifferentiable::of_slice(b), - &ys, - zero_params, - line_unranked_predictor(), - BaseGradientDescentHyper::to_immutable, - ) - }; - let iterated = iterated - .into_iter() - .map(|x| x.into_scalar().real_part().into_inner()) - .collect::>(); - - assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]); - } - - #[test] - fn optimise_quadratic() { - let xs = [-1.0, 0.0, 1.0, 2.0, 3.0]; - let ys = [2.55, 2.1, 4.35, 10.2, 18.25]; - - let zero = Scalar::>::zero(); - - let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); - - let iterated = { - let xs = to_not_nan_1(xs); - let ys = to_not_nan_1(ys); - let zero_params = [ - RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), - RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), - RankedDifferentiable::of_scalar(zero).to_unranked(), - ]; - gradient_descent( - hyper, - &xs, - |b| RankedDifferentiable::of_slice(b), - &ys, - zero_params, - quadratic_unranked_predictor(), - BaseGradientDescentHyper::to_immutable, - ) - }; - let iterated = iterated - .into_iter() - .map(|x| x.into_scalar().real_part().into_inner()) - .collect::>(); - - assert_eq!( - iterated, - [2.0546423148479684, 0.9928606519360353, 1.4787394427094362] - ); - } - - const PLANE_XS: [[f64; 2]; 6] = [ - [1.0, 2.05], - [1.0, 3.0], - [2.0, 2.0], - [2.0, 3.91], - [3.0, 6.13], - [4.0, 8.09], - ]; - const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; - - #[test] - fn optimise_plane() { - let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); - - let iterated = { - let xs = to_not_nan_2(PLANE_XS); - let ys = to_not_nan_1(PLANE_YS); - let zero_params = [ - RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(), - Differentiable::of_scalar(Scalar::zero()), - ]; - gradient_descent( - hyper, - &xs, - RankedDifferentiable::of_slice_2::<_, 2>, - &ys, - zero_params, - plane_predictor(), - BaseGradientDescentHyper::to_immutable, - ) - }; - - let [theta0, theta1] = iterated; - - let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); - let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); - - assert_eq!(theta0.collect(), [3.97757644609063, 2.0496557321494446]); - assert_eq!( - theta1.to_scalar().real_part().into_inner(), - 5.786758464448078 - ); - } - - #[test] - fn optimise_plane_with_sampling() { - let rng = StdRng::seed_from_u64(314159); - let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) - .with_rng(rng, 4); - - let iterated = { - let xs = to_not_nan_2(PLANE_XS); - let ys = to_not_nan_1(PLANE_YS); - let zero_params = [ - RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(), - Differentiable::of_scalar(Scalar::zero()), - ]; - gradient_descent( - hyper, - &xs, - RankedDifferentiable::of_slice_2::<_, 2>, - &ys, - zero_params, - plane_predictor(), - BaseGradientDescentHyper::to_immutable, - ) - }; - - let [theta0, theta1] = iterated; - - let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor").collect(); - let theta1 = theta1 - .attach_rank::<0>() - .expect("rank 0 tensor") - .to_scalar() - .real_part() - .into_inner(); - - /* - Mathematica code to verify by eye that the optimisation gave a reasonable result: - - xs = {{1.0, 2.05}, {1.0, 3.0}, {2.0, 2.0}, {2.0, 3.91}, {3.0, - 6.13}, {4.0, 8.09}}; - ys = {13.99, 15.99, 18.0, 22.4, 30.2, 37.94}; - points = ListPointPlot3D[Append @@@ Transpose[{xs, ys}]]; - - withoutBatching0 = {3.97757644609063, 2.0496557321494446}; - withoutBatching1 = 5.2839863438547159; - withoutBatching = - Plot3D[{x, y} . withoutBatching0 + withoutBatching1, {x, 0, 4}, {y, - 0, 8}]; - - withBatching0 = {3.8581694055684781, 2.2166222673968554}; - withBatching1 = 5.2399202468216668; - withBatching = - Plot3D[{x, y} . withBatching0 + withBatching1, {x, 0, 4}, {y, 0, 8}]; - - Show[points, withoutBatching] - - Show[points, withBatching] - */ - - assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]); - assert_eq!(theta1, 5.2839863438547159); - } - - #[test] - fn test_with_velocity() { - let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) - .with_mu(NotNan::new(0.9).expect("not nan")); - - let iterated = { - let xs = to_not_nan_2(PLANE_XS); - let ys = to_not_nan_1(PLANE_YS); - let zero_params = [ - RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()]) - .to_unranked(), - Differentiable::of_scalar(Scalar::zero()), - ]; - - gradient_descent( - hyper, - &xs, - RankedDifferentiableTagged::of_slice_2::<_, 2>, - &ys, - zero_params, - velocity_plane_predictor(), - VelocityGradientDescentHyper::to_immutable, - ) - }; - - let [theta0, theta1] = iterated; - - let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); - let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); - - assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]); - assert_eq!( - theta1.to_scalar().real_part().into_inner(), - 6.169579045974949 - ); - } - - #[test] - fn test_with_rms() { - let beta = NotNan::new(0.9).expect("not nan"); - let stabilizer = NotNan::new(0.00000001).expect("not nan"); - let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000) - .with_stabilizer(stabilizer) - .with_beta(beta); - - let iterated = { - let xs = to_not_nan_2(PLANE_XS); - let ys = to_not_nan_1(PLANE_YS); - let zero_params = [ - RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()]) - .to_unranked(), - Differentiable::of_scalar(Scalar::zero()), - ]; - - gradient_descent( - hyper, - &xs, - RankedDifferentiableTagged::of_slice_2::<_, 2>, - &ys, - zero_params, - rms_predictor(predict_plane), - RmsGradientDescentHyper::to_immutable, - ) - }; - - let [theta0, theta1] = iterated; - - let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); - let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); - - let fitted_theta0 = theta0 - .collect() - .iter() - .map(|x| x.into_inner()) - .collect::>(); - let fitted_theta1 = theta1.to_scalar().real_part().into_inner(); - assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]); - assert_eq!(fitted_theta1, 6.1642229831811681); - } -} +mod tests {}