diff --git a/Cargo.lock b/Cargo.lock
index 0f22f90..685a390 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -72,6 +72,7 @@ version = "0.1.0"
dependencies = [
"immutable-chunkmap",
"ordered-float",
+ "rand",
]
[[package]]
diff --git a/little_learner/Cargo.toml b/little_learner/Cargo.toml
index b32090b..16221cc 100644
--- a/little_learner/Cargo.toml
+++ b/little_learner/Cargo.toml
@@ -8,5 +8,6 @@ edition = "2021"
[dependencies]
immutable-chunkmap = "1.0.5"
ordered-float = "3.6.0"
+rand = "0.8.5"
[lib]
diff --git a/little_learner/src/gradient_descent.rs b/little_learner/src/gradient_descent.rs
new file mode 100644
index 0000000..f136352
--- /dev/null
+++ b/little_learner/src/gradient_descent.rs
@@ -0,0 +1,408 @@
+use crate::auto_diff::{grad, Differentiable, RankedDifferentiable};
+use crate::hyper::BaseGradientDescentHyper;
+use crate::loss::{l2_loss_2, Predictor};
+use crate::sample::sample2;
+use crate::traits::NumLike;
+use rand::Rng;
+use std::hash::Hash;
+
+fn iterate(mut f: F, start: A, n: u32) -> A
+where
+ F: FnMut(A) -> A,
+{
+ let mut v = start;
+ for _ in 0..n {
+ v = f(v);
+ }
+ v
+}
+
+/// `adjust` takes the previous value and a delta, and returns a deflated new value.
+fn general_gradient_descent_step<
+ A,
+ F,
+ Inflated,
+ Deflate,
+ Adjust,
+ Hyper,
+ const RANK: usize,
+ const PARAM_NUM: usize,
+>(
+ f: &mut F,
+ theta: [Inflated; PARAM_NUM],
+ deflate: Deflate,
+ hyper: Hyper,
+ mut adjust: Adjust,
+) -> [Inflated; PARAM_NUM]
+where
+ A: Clone + NumLike + Hash + Eq,
+ F: FnMut(&[Differentiable; PARAM_NUM]) -> RankedDifferentiable,
+ Deflate: FnMut(Inflated) -> Differentiable,
+ Inflated: Clone,
+ Hyper: Clone,
+ Adjust: FnMut(Inflated, &Differentiable, Hyper) -> Inflated,
+{
+ let deflated = theta.clone().map(deflate);
+ let delta = grad(f, &deflated);
+ let mut i = 0;
+ theta.map(|inflated| {
+ let delta = &delta[i];
+ i += 1;
+ adjust(inflated, delta, hyper.clone())
+ })
+}
+
+pub fn gradient_descent<
+ 'a,
+ T,
+ R,
+ Point,
+ F,
+ G,
+ H,
+ Inflated,
+ Hyper,
+ ImmutableHyper,
+ const IN_SIZE: usize,
+ const PARAM_NUM: usize,
+>(
+ hyper: Hyper,
+ xs: &'a [Point],
+ to_ranked_differentiable: G,
+ ys: &[T],
+ zero_params: [Differentiable; PARAM_NUM],
+ mut predictor: Predictor, ImmutableHyper>,
+ to_immutable: H,
+) -> [Differentiable; PARAM_NUM]
+where
+ T: NumLike + Hash + Copy + Default,
+ Point: 'a + Copy,
+ F: Fn(
+ RankedDifferentiable,
+ &[Differentiable; PARAM_NUM],
+ ) -> RankedDifferentiable,
+ G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable,
+ Inflated: Clone,
+ ImmutableHyper: Clone,
+ Hyper: Into>,
+ H: FnOnce(&Hyper) -> ImmutableHyper,
+ R: Rng,
+{
+ let sub_hypers = to_immutable(&hyper);
+ let mut gradient_hyper: BaseGradientDescentHyper = hyper.into();
+ let iterations = gradient_hyper.iterations;
+ let out = iterate(
+ |theta| {
+ general_gradient_descent_step(
+ &mut |x| match gradient_hyper.sampling.as_mut() {
+ None => RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
+ l2_loss_2(
+ &predictor.predict,
+ to_ranked_differentiable(xs),
+ RankedDifferentiable::of_slice(ys),
+ x,
+ ),
+ )]),
+ Some((rng, batch_size)) => {
+ let (sampled_xs, sampled_ys) = sample2(rng, *batch_size, xs, ys);
+ RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
+ l2_loss_2(
+ &predictor.predict,
+ to_ranked_differentiable(&sampled_xs),
+ RankedDifferentiable::of_slice(&sampled_ys),
+ x,
+ ),
+ )])
+ }
+ },
+ theta,
+ predictor.deflate,
+ sub_hypers.clone(),
+ predictor.update,
+ )
+ },
+ zero_params.map(predictor.inflate),
+ iterations,
+ );
+ out.map(&mut predictor.deflate)
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crate::auto_diff::RankedDifferentiableTagged;
+ use crate::hyper::{RmsGradientDescentHyper, VelocityGradientDescentHyper};
+ use crate::loss::{
+ naked_predictor, predict_line_2_unranked, predict_plane, predict_quadratic_unranked,
+ rms_predictor, velocity_predictor,
+ };
+ use crate::not_nan::{to_not_nan_1, to_not_nan_2};
+ use crate::scalar::Scalar;
+ use crate::traits::Zero;
+ use ordered_float::NotNan;
+ use rand::rngs::StdRng;
+ use rand::SeedableRng;
+
+ #[test]
+ fn test_iterate() {
+ let f = |t: [i32; 3]| t.map(|i| i - 3);
+ assert_eq!(iterate(f, [1, 2, 3], 5u32), [-14, -13, -12]);
+ }
+
+ #[test]
+ fn first_optimisation_test() {
+ let xs = [2.0, 1.0, 4.0, 3.0];
+ let ys = [1.8, 1.2, 4.2, 3.3];
+
+ let zero = Scalar::>::zero();
+
+ let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000);
+ let iterated = {
+ let xs = to_not_nan_1(xs);
+ let ys = to_not_nan_1(ys);
+ let zero_params = [
+ RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
+ RankedDifferentiable::of_scalar(zero).to_unranked(),
+ ];
+ gradient_descent(
+ hyper,
+ &xs,
+ |b| RankedDifferentiable::of_slice(b),
+ &ys,
+ zero_params,
+ naked_predictor(predict_line_2_unranked),
+ BaseGradientDescentHyper::to_immutable,
+ )
+ };
+ let iterated = iterated
+ .into_iter()
+ .map(|x| x.into_scalar().real_part().into_inner())
+ .collect::>();
+
+ assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
+ }
+
+ #[test]
+ fn optimise_quadratic() {
+ let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
+ let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
+
+ let zero = Scalar::>::zero();
+
+ let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000);
+
+ let iterated = {
+ let xs = to_not_nan_1(xs);
+ let ys = to_not_nan_1(ys);
+ let zero_params = [
+ RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
+ RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
+ RankedDifferentiable::of_scalar(zero).to_unranked(),
+ ];
+ gradient_descent(
+ hyper,
+ &xs,
+ |b| RankedDifferentiable::of_slice(b),
+ &ys,
+ zero_params,
+ naked_predictor(predict_quadratic_unranked),
+ BaseGradientDescentHyper::to_immutable,
+ )
+ };
+ let iterated = iterated
+ .into_iter()
+ .map(|x| x.into_scalar().real_part().into_inner())
+ .collect::>();
+
+ assert_eq!(
+ iterated,
+ [2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
+ );
+ }
+
+ const PLANE_XS: [[f64; 2]; 6] = [
+ [1.0, 2.05],
+ [1.0, 3.0],
+ [2.0, 2.0],
+ [2.0, 3.91],
+ [3.0, 6.13],
+ [4.0, 8.09],
+ ];
+ const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
+
+ #[test]
+ fn optimise_plane() {
+ let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000);
+
+ let iterated = {
+ let xs = to_not_nan_2(PLANE_XS);
+ let ys = to_not_nan_1(PLANE_YS);
+ let zero_params = [
+ RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(),
+ Differentiable::of_scalar(Scalar::zero()),
+ ];
+ gradient_descent(
+ hyper,
+ &xs,
+ RankedDifferentiable::of_slice_2::<_, 2>,
+ &ys,
+ zero_params,
+ naked_predictor(predict_plane),
+ BaseGradientDescentHyper::to_immutable,
+ )
+ };
+
+ let [theta0, theta1] = iterated;
+
+ let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
+ let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
+
+ assert_eq!(theta0.collect(), [3.97757644609063, 2.0496557321494446]);
+ assert_eq!(
+ theta1.to_scalar().real_part().into_inner(),
+ 5.786758464448078
+ );
+ }
+
+ #[test]
+ fn optimise_plane_with_sampling() {
+ let rng = StdRng::seed_from_u64(314159);
+ let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
+ .with_rng(rng, 4);
+
+ let iterated = {
+ let xs = to_not_nan_2(PLANE_XS);
+ let ys = to_not_nan_1(PLANE_YS);
+ let zero_params = [
+ RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(),
+ Differentiable::of_scalar(Scalar::zero()),
+ ];
+ gradient_descent(
+ hyper,
+ &xs,
+ RankedDifferentiable::of_slice_2::<_, 2>,
+ &ys,
+ zero_params,
+ naked_predictor(predict_plane),
+ BaseGradientDescentHyper::to_immutable,
+ )
+ };
+
+ let [theta0, theta1] = iterated;
+
+ let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor").collect();
+ let theta1 = theta1
+ .attach_rank::<0>()
+ .expect("rank 0 tensor")
+ .to_scalar()
+ .real_part()
+ .into_inner();
+
+ /*
+ Mathematica code to verify by eye that the optimisation gave a reasonable result:
+
+ xs = {{1.0, 2.05}, {1.0, 3.0}, {2.0, 2.0}, {2.0, 3.91}, {3.0,
+ 6.13}, {4.0, 8.09}};
+ ys = {13.99, 15.99, 18.0, 22.4, 30.2, 37.94};
+ points = ListPointPlot3D[Append @@@ Transpose[{xs, ys}]];
+
+ withoutBatching0 = {3.97757644609063, 2.0496557321494446};
+ withoutBatching1 = 5.2839863438547159;
+ withoutBatching =
+ Plot3D[{x, y} . withoutBatching0 + withoutBatching1, {x, 0, 4}, {y,
+ 0, 8}];
+
+ withBatching0 = {3.8581694055684781, 2.2166222673968554};
+ withBatching1 = 5.2399202468216668;
+ withBatching =
+ Plot3D[{x, y} . withBatching0 + withBatching1, {x, 0, 4}, {y, 0, 8}];
+
+ Show[points, withoutBatching]
+
+ Show[points, withBatching]
+ */
+
+ assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]);
+ assert_eq!(theta1, 5.2839863438547159);
+ }
+
+ #[test]
+ fn test_with_velocity() {
+ let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
+ .with_mu(NotNan::new(0.9).expect("not nan"));
+
+ let iterated = {
+ let xs = to_not_nan_2(PLANE_XS);
+ let ys = to_not_nan_1(PLANE_YS);
+ let zero_params = [
+ RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()])
+ .to_unranked(),
+ Differentiable::of_scalar(Scalar::zero()),
+ ];
+
+ gradient_descent(
+ hyper,
+ &xs,
+ RankedDifferentiableTagged::of_slice_2::<_, 2>,
+ &ys,
+ zero_params,
+ velocity_predictor(predict_plane),
+ VelocityGradientDescentHyper::to_immutable,
+ )
+ };
+
+ let [theta0, theta1] = iterated;
+
+ let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
+ let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
+
+ assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]);
+ assert_eq!(
+ theta1.to_scalar().real_part().into_inner(),
+ 6.169579045974949
+ );
+ }
+
+ #[test]
+ fn test_with_rms() {
+ let beta = NotNan::new(0.9).expect("not nan");
+ let stabilizer = NotNan::new(0.00000001).expect("not nan");
+ let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000)
+ .with_stabilizer(stabilizer)
+ .with_beta(beta);
+
+ let iterated = {
+ let xs = to_not_nan_2(PLANE_XS);
+ let ys = to_not_nan_1(PLANE_YS);
+ let zero_params = [
+ RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()])
+ .to_unranked(),
+ Differentiable::of_scalar(Scalar::zero()),
+ ];
+
+ gradient_descent(
+ hyper,
+ &xs,
+ RankedDifferentiableTagged::of_slice_2::<_, 2>,
+ &ys,
+ zero_params,
+ rms_predictor(predict_plane),
+ RmsGradientDescentHyper::to_immutable,
+ )
+ };
+
+ let [theta0, theta1] = iterated;
+
+ let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
+ let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
+
+ let fitted_theta0 = theta0
+ .collect()
+ .iter()
+ .map(|x| x.into_inner())
+ .collect::>();
+ let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
+ assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]);
+ assert_eq!(fitted_theta1, 6.1642229831811681);
+ }
+}
diff --git a/little_learner_app/src/hyper.rs b/little_learner/src/hyper.rs
similarity index 97%
rename from little_learner_app/src/hyper.rs
rename to little_learner/src/hyper.rs
index 54502bc..58b203d 100644
--- a/little_learner_app/src/hyper.rs
+++ b/little_learner/src/hyper.rs
@@ -1,5 +1,5 @@
-use little_learner::loss::{NakedHypers, RmsHyper, VelocityHypers};
-use little_learner::traits::{NumLike, Zero};
+use crate::loss::{NakedHypers, RmsHyper, VelocityHypers};
+use crate::traits::{NumLike, Zero};
use rand::{rngs::StdRng, Rng};
pub struct BaseGradientDescentHyper {
diff --git a/little_learner/src/lib.rs b/little_learner/src/lib.rs
index ad4f0d8..c163e0d 100644
--- a/little_learner/src/lib.rs
+++ b/little_learner/src/lib.rs
@@ -5,8 +5,11 @@
pub mod auto_diff;
pub mod const_teq;
pub mod expr_syntax_tree;
+pub mod gradient_descent;
+pub mod hyper;
pub mod loss;
pub mod not_nan;
+pub mod sample;
pub mod scalar;
pub mod smooth;
pub mod tensor;
diff --git a/little_learner/src/loss.rs b/little_learner/src/loss.rs
index 3c1146d..22766e0 100644
--- a/little_learner/src/loss.rs
+++ b/little_learner/src/loss.rs
@@ -245,12 +245,6 @@ pub struct Predictor {
pub update: fn(Inflated, &Deflated, Params) -> Inflated,
}
-type ParameterPredictor =
- fn(
- RankedDifferentiable,
- &[Differentiable; THETA],
- ) -> RankedDifferentiable;
-
#[derive(Clone)]
pub struct NakedHypers {
pub learning_rate: A,
@@ -343,42 +337,6 @@ where
}
}
-pub const fn plane_predictor(
-) -> Predictor, Differentiable, Differentiable, NakedHypers>
-where
- T: NumLike + Default,
-{
- naked_predictor(predict_plane)
-}
-
-pub const fn velocity_plane_predictor() -> Predictor<
- ParameterPredictor,
- DifferentiableTagged,
- Differentiable,
- VelocityHypers,
->
-where
- T: NumLike + Default,
-{
- velocity_predictor(predict_plane)
-}
-
-pub const fn line_unranked_predictor(
-) -> Predictor, Differentiable, Differentiable, NakedHypers>
-where
- T: NumLike + Default + Copy,
-{
- naked_predictor(predict_line_2_unranked)
-}
-
-pub const fn quadratic_unranked_predictor(
-) -> Predictor, Differentiable, Differentiable, NakedHypers>
-where
- T: NumLike + Default,
-{
- naked_predictor(predict_quadratic_unranked)
-}
-
#[cfg(test)]
mod test_loss {
use crate::auto_diff::RankedDifferentiable;
diff --git a/little_learner_app/src/sample.rs b/little_learner/src/sample.rs
similarity index 100%
rename from little_learner_app/src/sample.rs
rename to little_learner/src/sample.rs
diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs
index 4a6e99a..5dd8178 100644
--- a/little_learner_app/src/main.rs
+++ b/little_learner_app/src/main.rs
@@ -1,145 +1,18 @@
#![allow(incomplete_features)]
#![feature(generic_const_exprs)]
-mod hyper;
-mod sample;
mod with_tensor;
-use core::hash::Hash;
-use rand::Rng;
+use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged};
-use little_learner::auto_diff::{
- grad, Differentiable, RankedDifferentiable, RankedDifferentiableTagged,
-};
-
-use crate::hyper::{BaseGradientDescentHyper, VelocityGradientDescentHyper};
-use crate::sample::sample2;
-use little_learner::loss::{l2_loss_2, velocity_plane_predictor, Predictor};
+use little_learner::gradient_descent::gradient_descent;
+use little_learner::hyper::VelocityGradientDescentHyper;
+use little_learner::loss::{predict_plane, velocity_predictor};
use little_learner::not_nan::{to_not_nan_1, to_not_nan_2};
use little_learner::scalar::Scalar;
-use little_learner::traits::{NumLike, Zero};
+use little_learner::traits::Zero;
use ordered_float::NotNan;
-fn iterate(mut f: F, start: A, n: u32) -> A
-where
- F: FnMut(A) -> A,
-{
- let mut v = start;
- for _ in 0..n {
- v = f(v);
- }
- v
-}
-
-/// `adjust` takes the previous value and a delta, and returns a deflated new value.
-fn general_gradient_descent_step<
- A,
- F,
- Inflated,
- Deflate,
- Adjust,
- Hyper,
- const RANK: usize,
- const PARAM_NUM: usize,
->(
- f: &mut F,
- theta: [Inflated; PARAM_NUM],
- deflate: Deflate,
- hyper: Hyper,
- mut adjust: Adjust,
-) -> [Inflated; PARAM_NUM]
-where
- A: Clone + NumLike + Hash + Eq,
- F: FnMut(&[Differentiable; PARAM_NUM]) -> RankedDifferentiable,
- Deflate: FnMut(Inflated) -> Differentiable,
- Inflated: Clone,
- Hyper: Clone,
- Adjust: FnMut(Inflated, &Differentiable, Hyper) -> Inflated,
-{
- let deflated = theta.clone().map(deflate);
- let delta = grad(f, &deflated);
- let mut i = 0;
- theta.map(|inflated| {
- let delta = &delta[i];
- i += 1;
- adjust(inflated, delta, hyper.clone())
- })
-}
-
-fn gradient_descent<
- 'a,
- T,
- R: Rng,
- Point,
- F,
- G,
- H,
- Inflated,
- Hyper,
- ImmutableHyper,
- const IN_SIZE: usize,
- const PARAM_NUM: usize,
->(
- hyper: Hyper,
- xs: &'a [Point],
- to_ranked_differentiable: G,
- ys: &[T],
- zero_params: [Differentiable; PARAM_NUM],
- mut predictor: Predictor, ImmutableHyper>,
- to_immutable: H,
-) -> [Differentiable; PARAM_NUM]
-where
- T: NumLike + Hash + Copy + Default,
- Point: 'a + Copy,
- F: Fn(
- RankedDifferentiable,
- &[Differentiable; PARAM_NUM],
- ) -> RankedDifferentiable,
- G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable,
- Inflated: Clone,
- ImmutableHyper: Clone,
- Hyper: Into>,
- H: FnOnce(&Hyper) -> ImmutableHyper,
-{
- let sub_hypers = to_immutable(&hyper);
- let mut gradient_hyper: BaseGradientDescentHyper = hyper.into();
- let iterations = gradient_hyper.iterations;
- let out = iterate(
- |theta| {
- general_gradient_descent_step(
- &mut |x| match gradient_hyper.sampling.as_mut() {
- None => RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
- l2_loss_2(
- &predictor.predict,
- to_ranked_differentiable(xs),
- RankedDifferentiable::of_slice(ys),
- x,
- ),
- )]),
- Some((rng, batch_size)) => {
- let (sampled_xs, sampled_ys) = sample2(rng, *batch_size, xs, ys);
- RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
- l2_loss_2(
- &predictor.predict,
- to_ranked_differentiable(&sampled_xs),
- RankedDifferentiable::of_slice(&sampled_ys),
- x,
- ),
- )])
- }
- },
- theta,
- predictor.deflate,
- sub_hypers.clone(),
- predictor.update,
- )
- },
- zero_params.map(predictor.inflate),
- iterations,
- );
- out.map(&mut predictor.deflate)
-}
-
fn main() {
let plane_xs = [
[1.0, 2.05],
@@ -169,7 +42,7 @@ fn main() {
RankedDifferentiableTagged::of_slice_2::<_, 2>,
&ys,
zero_params,
- velocity_plane_predictor(),
+ velocity_predictor(predict_plane),
VelocityGradientDescentHyper::to_immutable,
)
};
@@ -187,276 +60,4 @@ fn main() {
}
#[cfg(test)]
-mod tests {
- use super::*;
- use crate::hyper::RmsGradientDescentHyper;
- use little_learner::loss::{
- line_unranked_predictor, plane_predictor, predict_plane, quadratic_unranked_predictor,
- rms_predictor,
- };
- use rand::rngs::StdRng;
- use rand::SeedableRng;
-
- #[test]
- fn test_iterate() {
- let f = |t: [i32; 3]| t.map(|i| i - 3);
- assert_eq!(iterate(f, [1, 2, 3], 5u32), [-14, -13, -12]);
- }
-
- #[test]
- fn first_optimisation_test() {
- let xs = [2.0, 1.0, 4.0, 3.0];
- let ys = [1.8, 1.2, 4.2, 3.3];
-
- let zero = Scalar::>::zero();
-
- let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000);
- let iterated = {
- let xs = to_not_nan_1(xs);
- let ys = to_not_nan_1(ys);
- let zero_params = [
- RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
- RankedDifferentiable::of_scalar(zero).to_unranked(),
- ];
- gradient_descent(
- hyper,
- &xs,
- |b| RankedDifferentiable::of_slice(b),
- &ys,
- zero_params,
- line_unranked_predictor(),
- BaseGradientDescentHyper::to_immutable,
- )
- };
- let iterated = iterated
- .into_iter()
- .map(|x| x.into_scalar().real_part().into_inner())
- .collect::>();
-
- assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
- }
-
- #[test]
- fn optimise_quadratic() {
- let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
- let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
-
- let zero = Scalar::>::zero();
-
- let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000);
-
- let iterated = {
- let xs = to_not_nan_1(xs);
- let ys = to_not_nan_1(ys);
- let zero_params = [
- RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
- RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
- RankedDifferentiable::of_scalar(zero).to_unranked(),
- ];
- gradient_descent(
- hyper,
- &xs,
- |b| RankedDifferentiable::of_slice(b),
- &ys,
- zero_params,
- quadratic_unranked_predictor(),
- BaseGradientDescentHyper::to_immutable,
- )
- };
- let iterated = iterated
- .into_iter()
- .map(|x| x.into_scalar().real_part().into_inner())
- .collect::>();
-
- assert_eq!(
- iterated,
- [2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
- );
- }
-
- const PLANE_XS: [[f64; 2]; 6] = [
- [1.0, 2.05],
- [1.0, 3.0],
- [2.0, 2.0],
- [2.0, 3.91],
- [3.0, 6.13],
- [4.0, 8.09],
- ];
- const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
-
- #[test]
- fn optimise_plane() {
- let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000);
-
- let iterated = {
- let xs = to_not_nan_2(PLANE_XS);
- let ys = to_not_nan_1(PLANE_YS);
- let zero_params = [
- RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(),
- Differentiable::of_scalar(Scalar::zero()),
- ];
- gradient_descent(
- hyper,
- &xs,
- RankedDifferentiable::of_slice_2::<_, 2>,
- &ys,
- zero_params,
- plane_predictor(),
- BaseGradientDescentHyper::to_immutable,
- )
- };
-
- let [theta0, theta1] = iterated;
-
- let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
- let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
-
- assert_eq!(theta0.collect(), [3.97757644609063, 2.0496557321494446]);
- assert_eq!(
- theta1.to_scalar().real_part().into_inner(),
- 5.786758464448078
- );
- }
-
- #[test]
- fn optimise_plane_with_sampling() {
- let rng = StdRng::seed_from_u64(314159);
- let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
- .with_rng(rng, 4);
-
- let iterated = {
- let xs = to_not_nan_2(PLANE_XS);
- let ys = to_not_nan_1(PLANE_YS);
- let zero_params = [
- RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(),
- Differentiable::of_scalar(Scalar::zero()),
- ];
- gradient_descent(
- hyper,
- &xs,
- RankedDifferentiable::of_slice_2::<_, 2>,
- &ys,
- zero_params,
- plane_predictor(),
- BaseGradientDescentHyper::to_immutable,
- )
- };
-
- let [theta0, theta1] = iterated;
-
- let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor").collect();
- let theta1 = theta1
- .attach_rank::<0>()
- .expect("rank 0 tensor")
- .to_scalar()
- .real_part()
- .into_inner();
-
- /*
- Mathematica code to verify by eye that the optimisation gave a reasonable result:
-
- xs = {{1.0, 2.05}, {1.0, 3.0}, {2.0, 2.0}, {2.0, 3.91}, {3.0,
- 6.13}, {4.0, 8.09}};
- ys = {13.99, 15.99, 18.0, 22.4, 30.2, 37.94};
- points = ListPointPlot3D[Append @@@ Transpose[{xs, ys}]];
-
- withoutBatching0 = {3.97757644609063, 2.0496557321494446};
- withoutBatching1 = 5.2839863438547159;
- withoutBatching =
- Plot3D[{x, y} . withoutBatching0 + withoutBatching1, {x, 0, 4}, {y,
- 0, 8}];
-
- withBatching0 = {3.8581694055684781, 2.2166222673968554};
- withBatching1 = 5.2399202468216668;
- withBatching =
- Plot3D[{x, y} . withBatching0 + withBatching1, {x, 0, 4}, {y, 0, 8}];
-
- Show[points, withoutBatching]
-
- Show[points, withBatching]
- */
-
- assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]);
- assert_eq!(theta1, 5.2839863438547159);
- }
-
- #[test]
- fn test_with_velocity() {
- let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
- .with_mu(NotNan::new(0.9).expect("not nan"));
-
- let iterated = {
- let xs = to_not_nan_2(PLANE_XS);
- let ys = to_not_nan_1(PLANE_YS);
- let zero_params = [
- RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()])
- .to_unranked(),
- Differentiable::of_scalar(Scalar::zero()),
- ];
-
- gradient_descent(
- hyper,
- &xs,
- RankedDifferentiableTagged::of_slice_2::<_, 2>,
- &ys,
- zero_params,
- velocity_plane_predictor(),
- VelocityGradientDescentHyper::to_immutable,
- )
- };
-
- let [theta0, theta1] = iterated;
-
- let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
- let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
-
- assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]);
- assert_eq!(
- theta1.to_scalar().real_part().into_inner(),
- 6.169579045974949
- );
- }
-
- #[test]
- fn test_with_rms() {
- let beta = NotNan::new(0.9).expect("not nan");
- let stabilizer = NotNan::new(0.00000001).expect("not nan");
- let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000)
- .with_stabilizer(stabilizer)
- .with_beta(beta);
-
- let iterated = {
- let xs = to_not_nan_2(PLANE_XS);
- let ys = to_not_nan_1(PLANE_YS);
- let zero_params = [
- RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()])
- .to_unranked(),
- Differentiable::of_scalar(Scalar::zero()),
- ];
-
- gradient_descent(
- hyper,
- &xs,
- RankedDifferentiableTagged::of_slice_2::<_, 2>,
- &ys,
- zero_params,
- rms_predictor(predict_plane),
- RmsGradientDescentHyper::to_immutable,
- )
- };
-
- let [theta0, theta1] = iterated;
-
- let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
- let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
-
- let fitted_theta0 = theta0
- .collect()
- .iter()
- .map(|x| x.into_inner())
- .collect::>();
- let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
- assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]);
- assert_eq!(fitted_theta1, 6.1642229831811681);
- }
-}
+mod tests {}