From deb0ec67cafe852ed28654bb547fb7bf82980d5b Mon Sep 17 00:00:00 2001
From: Patrick Stevens <patrick+github@patrickstevens.co.uk>
Date: Sun, 7 May 2023 23:57:58 +0100
Subject: [PATCH] Add docs, delete old code, truncate scalars where possible
 (#21)

---
 little_learner/src/auto_diff.rs        |  49 ++++---
 little_learner/src/const_teq.rs        |  13 --
 little_learner/src/expr_syntax_tree.rs | 155 --------------------
 little_learner/src/gradient_descent.rs |  66 ++++-----
 little_learner/src/hyper.rs            | 190 +++++++++++++------------
 little_learner/src/lib.rs              |   4 +-
 little_learner/src/loss.rs             | 115 ++-------------
 little_learner/src/predictor.rs        | 111 +++++++++++++++
 little_learner/src/sample.rs           |   1 +
 little_learner/src/scalar.rs           |  20 +--
 little_learner/src/smooth.rs           |  42 +++---
 little_learner/src/tensor.rs           | 107 --------------
 little_learner/src/traits.rs           |   4 +
 little_learner_app/src/main.rs         |  55 +++----
 little_learner_app/src/with_tensor.rs  | 136 ------------------
 15 files changed, 349 insertions(+), 719 deletions(-)
 delete mode 100644 little_learner/src/const_teq.rs
 delete mode 100644 little_learner/src/expr_syntax_tree.rs
 create mode 100644 little_learner/src/predictor.rs
 delete mode 100644 little_learner/src/tensor.rs
 delete mode 100644 little_learner_app/src/with_tensor.rs

diff --git a/little_learner/src/auto_diff.rs b/little_learner/src/auto_diff.rs
index fdb133c..e32511f 100644
--- a/little_learner/src/auto_diff.rs
+++ b/little_learner/src/auto_diff.rs
@@ -84,11 +84,11 @@ where
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            DifferentiableContents::Scalar(s, _) => f.write_fmt(format_args!("{}", s)),
+            DifferentiableContents::Scalar(s, _) => f.write_fmt(format_args!("{s}")),
             DifferentiableContents::Vector(v, _rank) => {
                 f.write_char('[')?;
                 for v in v.iter() {
-                    f.write_fmt(format_args!("{}", v))?;
+                    f.write_fmt(format_args!("{v}"))?;
                     f.write_char(',')?;
                 }
                 f.write_char(']')
@@ -159,6 +159,12 @@ impl<A, Tag> DifferentiableContents<A, Tag> {
         }
     }
 
+    /// This function does *not* check that its inputs are of exactly the same shape, though it
+    /// does check ranks. If you have two vectors of different lengths, you will silently get the
+    /// shorter one.
+    ///
+    /// # Panics
+    /// Panics if the two inputs have different shapes (e.g. if they have different ranks).
     fn map2<B, C, Tag2, Tag3, F>(
         &self,
         other: &DifferentiableContents<B, Tag2>,
@@ -180,9 +186,7 @@ impl<A, Tag> DifferentiableContents<A, Tag> {
                 DifferentiableContents::Vector(slice_a, rank_a),
                 DifferentiableContents::Vector(slice_b, rank_b),
             ) => {
-                if rank_a != rank_b {
-                    panic!("Unexpectedly different ranks in map2");
-                }
+                assert_eq!(rank_a, rank_b, "Unexpectedly different ranks in map2");
                 DifferentiableContents::Vector(
                     slice_a
                         .iter()
@@ -367,10 +371,11 @@ impl<A, Tag> DifferentiableTagged<A, Tag> {
         }
     }
 
+    /// # Panics
+    /// Panics if the input is empty (otherwise we can't determine a rank).
+    #[must_use]
     pub fn of_vec(input: Vec<DifferentiableTagged<A, Tag>>) -> DifferentiableTagged<A, Tag> {
-        if input.is_empty() {
-            panic!("Can't make an empty tensor");
-        }
+        assert!(!input.is_empty(), "Can't make an empty tensor");
         let rank = input[0].rank();
         DifferentiableTagged {
             contents: DifferentiableContents::Vector(input, 1 + rank),
@@ -413,7 +418,7 @@ where
                 k.invoke(y, A::one(), acc);
             }
             DifferentiableContents::Vector(y, _rank) => {
-                DifferentiableContents::accumulate_gradients_vec(y, acc)
+                DifferentiableContents::accumulate_gradients_vec(y, acc);
             }
         }
     }
@@ -543,6 +548,7 @@ impl<A, Tag, const RANK: usize> RankedDifferentiableTagged<A, Tag, RANK> {
         &self.contents
     }
 
+    #[must_use]
     pub fn of_vector(
         s: Vec<RankedDifferentiableTagged<A, Tag, RANK>>,
     ) -> RankedDifferentiableTagged<A, Tag, { RANK + 1 }> {
@@ -683,19 +689,16 @@ mod tests {
 
     #[test]
     fn test_map() {
-        let v = DifferentiableTagged::of_vec(
-            vec![
-                Differentiable::of_scalar(Scalar::Number(
-                    NotNan::new(3.0).expect("3 is not NaN"),
-                    Some(0usize),
-                )),
-                DifferentiableTagged::of_scalar(Scalar::Number(
-                    NotNan::new(4.0).expect("4 is not NaN"),
-                    Some(1usize),
-                )),
-            ]
-            .into(),
-        );
+        let v = DifferentiableTagged::of_vec(vec![
+            Differentiable::of_scalar(Scalar::Number(
+                NotNan::new(3.0).expect("3 is not NaN"),
+                Some(0usize),
+            )),
+            DifferentiableTagged::of_scalar(Scalar::Number(
+                NotNan::new(4.0).expect("4 is not NaN"),
+                Some(1usize),
+            )),
+        ]);
         let mapped = v.map(&mut |x: Scalar<NotNan<f64>>| match x {
             Scalar::Number(i, n) => Scalar::Number(i + NotNan::new(1.0).expect("1 is not NaN"), n),
             Scalar::Dual(_, _) => panic!("Not hit"),
@@ -704,7 +707,7 @@ mod tests {
         let v = mapped
             .into_vector()
             .iter()
-            .map(|d| extract_scalar(d).clone())
+            .map(|d| *extract_scalar(d))
             .collect::<Vec<_>>();
 
         assert_eq!(v, [4.0, 5.0]);
diff --git a/little_learner/src/const_teq.rs b/little_learner/src/const_teq.rs
deleted file mode 100644
index 6952ee2..0000000
--- a/little_learner/src/const_teq.rs
+++ /dev/null
@@ -1,13 +0,0 @@
-use std::marker::PhantomData;
-
-pub struct ConstTeq<const A: usize, const B: usize> {
-    phantom_a: PhantomData<[(); A]>,
-    phantom_b: PhantomData<[(); B]>,
-}
-
-pub fn make<const A: usize>() -> ConstTeq<A, A> {
-    ConstTeq {
-        phantom_a: Default::default(),
-        phantom_b: Default::default(),
-    }
-}
diff --git a/little_learner/src/expr_syntax_tree.rs b/little_learner/src/expr_syntax_tree.rs
deleted file mode 100644
index 435af58..0000000
--- a/little_learner/src/expr_syntax_tree.rs
+++ /dev/null
@@ -1,155 +0,0 @@
-use immutable_chunkmap::map;
-use std::ops::{Add, Mul};
-
-/*
-An untyped syntax tree for an expression whose constants are all of type `A`.
-*/
-#[derive(Clone, Debug)]
-pub enum Expr<A> {
-    Const(A),
-    Sum(Box<Expr<A>>, Box<Expr<A>>),
-    Variable(u32),
-    // The first `Expr` here is a function, which may reference the input variable `Variable(i)`.
-    // For example, `(fun x y -> x + y) 3 4` is expressed as:
-    // Apply(0, Apply(1, Sum(Variable(0), Variable(1)), Const(4)), Const(3))
-    Apply(u32, Box<Expr<A>>, Box<Expr<A>>),
-    Mul(Box<Expr<A>>, Box<Expr<A>>),
-}
-
-impl<A> Expr<A> {
-    fn eval_inner<const SIZE: usize>(e: &Expr<A>, ctx: &map::Map<u32, A, SIZE>) -> A
-    where
-        A: Clone + Add<Output = A> + Mul<Output = A>,
-    {
-        match &e {
-            Expr::Const(x) => x.clone(),
-            Expr::Sum(x, y) => Expr::eval_inner(x, ctx) + Expr::eval_inner(y, ctx),
-            Expr::Variable(id) => ctx
-                .get(id)
-                .unwrap_or_else(|| panic!("No binding found for free variable {}", id))
-                .clone(),
-            Expr::Apply(variable, func, arg) => {
-                let arg = Expr::eval_inner(arg, ctx);
-                let (updated_context, _) = ctx.insert(*variable, arg);
-                Expr::eval_inner(func, &updated_context)
-            }
-            Expr::Mul(x, y) => Expr::eval_inner(x, ctx) * Expr::eval_inner(y, ctx),
-        }
-    }
-
-    pub fn eval<const MAX_VAR_NUM: usize>(e: &Expr<A>) -> A
-    where
-        A: Clone + Add<Output = A> + Mul<Output = A>,
-    {
-        Expr::eval_inner(e, &map::Map::<u32, A, MAX_VAR_NUM>::new())
-    }
-
-    pub fn apply(var: u32, f: Expr<A>, arg: Expr<A>) -> Expr<A> {
-        Expr::Apply(var, Box::new(f), Box::new(arg))
-    }
-
-    pub fn differentiate(one: &A, zero: &A, var: u32, f: &Expr<A>) -> Expr<A>
-    where
-        A: Clone,
-    {
-        match f {
-            Expr::Const(_) => Expr::Const(zero.clone()),
-            Expr::Sum(x, y) => {
-                Expr::differentiate(one, zero, var, x) + Expr::differentiate(one, zero, var, y)
-            }
-            Expr::Variable(i) => {
-                if *i == var {
-                    Expr::Const(one.clone())
-                } else {
-                    Expr::Const(zero.clone())
-                }
-            }
-            Expr::Mul(x, y) => {
-                Expr::Mul(
-                    Box::new(Expr::differentiate(one, zero, var, x.as_ref())),
-                    (*y).clone(),
-                ) + Expr::Mul(
-                    Box::new(Expr::differentiate(one, zero, var, y.as_ref())),
-                    (*x).clone(),
-                )
-            }
-            Expr::Apply(new_var, func, expr) => {
-                if *new_var == var {
-                    panic!(
-                        "cannot differentiate with respect to variable {} that's been assigned",
-                        var
-                    )
-                }
-                let expr_deriv = Expr::differentiate(one, zero, var, expr);
-                Expr::mul(
-                    expr_deriv,
-                    Expr::Apply(
-                        *new_var,
-                        Box::new(Expr::differentiate(one, zero, *new_var, func)),
-                        (*expr).clone(),
-                    ),
-                )
-            }
-        }
-    }
-}
-
-impl<A> Add for Expr<A> {
-    type Output = Expr<A>;
-    fn add(self: Expr<A>, y: Expr<A>) -> Expr<A> {
-        Expr::Sum(Box::new(self), Box::new(y))
-    }
-}
-
-impl<A> Mul for Expr<A> {
-    type Output = Expr<A>;
-    fn mul(self: Expr<A>, y: Expr<A>) -> Expr<A> {
-        Expr::Mul(Box::new(self), Box::new(y))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_expr() {
-        let expr = Expr::apply(
-            0,
-            Expr::apply(1, Expr::Variable(0) + Expr::Variable(1), Expr::Const(4)),
-            Expr::Const(3),
-        );
-
-        assert_eq!(Expr::eval::<2>(&expr), 7);
-    }
-
-    #[test]
-    fn test_derivative() {
-        let add_four = Expr::Variable(0) + Expr::Const(4);
-        let mul_five = Expr::Variable(1) * Expr::Const(5);
-
-        {
-            let mul_five_then_add_four = Expr::apply(0, add_four.clone(), mul_five.clone());
-            let mul_then_add_diff = Expr::differentiate(&1, &0, 1, &mul_five_then_add_four);
-            for i in 3..10 {
-                // (5x + 4) differentiates to 5
-                assert_eq!(
-                    Expr::eval::<2>(&Expr::apply(1, mul_then_add_diff.clone(), Expr::Const(i))),
-                    5
-                );
-            }
-        }
-
-        {
-            let add_four_then_mul_five = Expr::apply(1, mul_five.clone(), add_four.clone());
-            let add_then_mul_diff = Expr::differentiate(&1, &0, 0, &add_four_then_mul_five);
-            for i in 3..10 {
-                // ((x + 4) * 5) differentiates to 5
-                assert_eq!(
-                    Expr::eval::<2>(&Expr::apply(0, add_then_mul_diff.clone(), Expr::Const(i))),
-                    5
-                );
-            }
-        }
-    }
-}
diff --git a/little_learner/src/gradient_descent.rs b/little_learner/src/gradient_descent.rs
index f136352..0dc7a24 100644
--- a/little_learner/src/gradient_descent.rs
+++ b/little_learner/src/gradient_descent.rs
@@ -1,6 +1,7 @@
 use crate::auto_diff::{grad, Differentiable, RankedDifferentiable};
-use crate::hyper::BaseGradientDescentHyper;
-use crate::loss::{l2_loss_2, Predictor};
+use crate::hyper;
+use crate::loss::l2_loss_2;
+use crate::predictor::Predictor;
 use crate::sample::sample2;
 use crate::traits::NumLike;
 use rand::Rng;
@@ -84,12 +85,12 @@ where
     G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable<T, IN_SIZE>,
     Inflated: Clone,
     ImmutableHyper: Clone,
-    Hyper: Into<BaseGradientDescentHyper<T, R>>,
+    Hyper: Into<hyper::BaseGradientDescent<R>>,
     H: FnOnce(&Hyper) -> ImmutableHyper,
     R: Rng,
 {
     let sub_hypers = to_immutable(&hyper);
-    let mut gradient_hyper: BaseGradientDescentHyper<T, R> = hyper.into();
+    let mut gradient_hyper: hyper::BaseGradientDescent<R> = hyper.into();
     let iterations = gradient_hyper.iterations;
     let out = iterate(
         |theta| {
@@ -131,12 +132,10 @@ where
 mod tests {
     use super::*;
     use crate::auto_diff::RankedDifferentiableTagged;
-    use crate::hyper::{RmsGradientDescentHyper, VelocityGradientDescentHyper};
-    use crate::loss::{
-        naked_predictor, predict_line_2_unranked, predict_plane, predict_quadratic_unranked,
-        rms_predictor, velocity_predictor,
-    };
+    use crate::hyper;
+    use crate::loss::{predict_line_2_unranked, predict_plane, predict_quadratic_unranked};
     use crate::not_nan::{to_not_nan_1, to_not_nan_2};
+    use crate::predictor;
     use crate::scalar::Scalar;
     use crate::traits::Zero;
     use ordered_float::NotNan;
@@ -156,7 +155,7 @@ mod tests {
 
         let zero = Scalar::<NotNan<f64>>::zero();
 
-        let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000);
+        let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.01).expect("not nan"), 1000);
         let iterated = {
             let xs = to_not_nan_1(xs);
             let ys = to_not_nan_1(ys);
@@ -170,8 +169,8 @@ mod tests {
                 |b| RankedDifferentiable::of_slice(b),
                 &ys,
                 zero_params,
-                naked_predictor(predict_line_2_unranked),
-                BaseGradientDescentHyper::to_immutable,
+                predictor::naked(predict_line_2_unranked),
+                hyper::NakedGradientDescent::to_immutable,
             )
         };
         let iterated = iterated
@@ -189,7 +188,7 @@ mod tests {
 
         let zero = Scalar::<NotNan<f64>>::zero();
 
-        let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000);
+        let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000);
 
         let iterated = {
             let xs = to_not_nan_1(xs);
@@ -205,8 +204,8 @@ mod tests {
                 |b| RankedDifferentiable::of_slice(b),
                 &ys,
                 zero_params,
-                naked_predictor(predict_quadratic_unranked),
-                BaseGradientDescentHyper::to_immutable,
+                predictor::naked(predict_quadratic_unranked),
+                hyper::NakedGradientDescent::to_immutable,
             )
         };
         let iterated = iterated
@@ -232,7 +231,7 @@ mod tests {
 
     #[test]
     fn optimise_plane() {
-        let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000);
+        let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000);
 
         let iterated = {
             let xs = to_not_nan_2(PLANE_XS);
@@ -247,8 +246,8 @@ mod tests {
                 RankedDifferentiable::of_slice_2::<_, 2>,
                 &ys,
                 zero_params,
-                naked_predictor(predict_plane),
-                BaseGradientDescentHyper::to_immutable,
+                predictor::naked(predict_plane),
+                hyper::NakedGradientDescent::to_immutable,
             )
         };
 
@@ -267,7 +266,7 @@ mod tests {
     #[test]
     fn optimise_plane_with_sampling() {
         let rng = StdRng::seed_from_u64(314159);
-        let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
+        let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000)
             .with_rng(rng, 4);
 
         let iterated = {
@@ -283,8 +282,8 @@ mod tests {
                 RankedDifferentiable::of_slice_2::<_, 2>,
                 &ys,
                 zero_params,
-                naked_predictor(predict_plane),
-                BaseGradientDescentHyper::to_immutable,
+                predictor::naked(predict_plane),
+                hyper::NakedGradientDescent::to_immutable,
             )
         };
 
@@ -322,14 +321,17 @@ mod tests {
         Show[points, withBatching]
          */
 
-        assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]);
-        assert_eq!(theta1, 5.2839863438547159);
+        assert_eq!(theta0, [3.858_169_405_568_478, 2.2166222673968554]);
+        assert_eq!(theta1, 5.283_986_343_854_716);
     }
 
     #[test]
     fn test_with_velocity() {
-        let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
-            .with_mu(NotNan::new(0.9).expect("not nan"));
+        let hyper = hyper::VelocityGradientDescent::zero_momentum(
+            NotNan::new(0.001).expect("not nan"),
+            1000,
+        )
+        .with_mu(NotNan::new(0.9).expect("not nan"));
 
         let iterated = {
             let xs = to_not_nan_2(PLANE_XS);
@@ -346,8 +348,8 @@ mod tests {
                 RankedDifferentiableTagged::of_slice_2::<_, 2>,
                 &ys,
                 zero_params,
-                velocity_predictor(predict_plane),
-                VelocityGradientDescentHyper::to_immutable,
+                predictor::velocity(predict_plane),
+                hyper::VelocityGradientDescent::to_immutable,
             )
         };
 
@@ -367,7 +369,7 @@ mod tests {
     fn test_with_rms() {
         let beta = NotNan::new(0.9).expect("not nan");
         let stabilizer = NotNan::new(0.00000001).expect("not nan");
-        let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000)
+        let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.001).expect("not nan"), 3000)
             .with_stabilizer(stabilizer)
             .with_beta(beta);
 
@@ -386,8 +388,8 @@ mod tests {
                 RankedDifferentiableTagged::of_slice_2::<_, 2>,
                 &ys,
                 zero_params,
-                rms_predictor(predict_plane),
-                RmsGradientDescentHyper::to_immutable,
+                predictor::rms(predict_plane),
+                hyper::RmsGradientDescent::to_immutable,
             )
         };
 
@@ -402,7 +404,7 @@ mod tests {
             .map(|x| x.into_inner())
             .collect::<Vec<_>>();
         let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
-        assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]);
-        assert_eq!(fitted_theta1, 6.1642229831811681);
+        assert_eq!(fitted_theta0, [3.985_350_099_342_649, 1.9745945728216352]);
+        assert_eq!(fitted_theta1, 6.164_222_983_181_168);
     }
 }
diff --git a/little_learner/src/hyper.rs b/little_learner/src/hyper.rs
index 58b203d..862e424 100644
--- a/little_learner/src/hyper.rs
+++ b/little_learner/src/hyper.rs
@@ -1,117 +1,135 @@
-use crate::loss::{NakedHypers, RmsHyper, VelocityHypers};
+use crate::predictor::{NakedHypers, RmsHyper, VelocityHypers};
 use crate::traits::{NumLike, Zero};
-use rand::{rngs::StdRng, Rng};
+use rand::rngs::StdRng;
 
-pub struct BaseGradientDescentHyper<A, R: Rng> {
-    pub sampling: Option<(R, usize)>,
+/// Hyperparameters which apply to any possible optimisation algorithm that uses gradient descent.
+pub struct BaseGradientDescent<Rng> {
+    pub sampling: Option<(Rng, usize)>,
     pub iterations: u32,
-    params: NakedHypers<A>,
 }
 
-impl<A> BaseGradientDescentHyper<A, StdRng>
-where
-    A: NumLike + NumLike,
-{
-    #[allow(dead_code)]
-    pub fn naked(learning_rate: A, iterations: u32) -> Self {
-        BaseGradientDescentHyper {
-            params: NakedHypers { learning_rate },
-            iterations,
+impl BaseGradientDescent<StdRng> {
+    #[must_use]
+    pub fn new(iterations: u32) -> BaseGradientDescent<StdRng> {
+        BaseGradientDescent {
             sampling: None,
+            iterations,
         }
     }
+}
 
-    #[allow(dead_code)]
-    pub fn with_rng<S: Rng>(self, rng: S, size: usize) -> BaseGradientDescentHyper<A, S> {
-        BaseGradientDescentHyper {
-            params: self.params,
+impl<Rng> BaseGradientDescent<Rng> {
+    #[must_use]
+    pub fn with_rng<Rng2>(self, rng: Rng2, size: usize) -> BaseGradientDescent<Rng2> {
+        BaseGradientDescent {
             iterations: self.iterations,
             sampling: Some((rng, size)),
         }
     }
 
-    #[allow(dead_code)]
+    #[must_use]
     pub fn with_iterations(self, n: u32) -> Self {
-        BaseGradientDescentHyper {
+        BaseGradientDescent {
             sampling: self.sampling,
             iterations: n,
-            params: self.params,
         }
     }
-
-    #[allow(dead_code)]
-    pub fn to_immutable(&self) -> NakedHypers<A> {
-        self.params.clone()
-    }
 }
 
-#[derive(Clone)]
-pub struct VelocityGradientDescentHyper<A, R: Rng> {
-    sampling: Option<(R, usize)>,
-    learning_rate: A,
-    iterations: u32,
-    mu: A,
+pub struct NakedGradientDescent<A, Rng> {
+    base: BaseGradientDescent<Rng>,
+    naked: NakedHypers<A>,
 }
 
-impl<A> VelocityGradientDescentHyper<A, StdRng>
+impl<A> NakedGradientDescent<A, StdRng>
 where
     A: Zero,
 {
-    #[allow(dead_code)]
-    pub fn naked(learning_rate: A, iterations: u32) -> Self {
-        VelocityGradientDescentHyper {
-            sampling: None,
-            learning_rate,
-            iterations,
-            mu: A::zero(),
+    #[must_use]
+    pub fn new(learning_rate: A, iterations: u32) -> Self {
+        NakedGradientDescent {
+            base: BaseGradientDescent::new(iterations),
+            naked: NakedHypers { learning_rate },
         }
     }
 }
 
-impl<A, R: Rng> VelocityGradientDescentHyper<A, R> {
-    #[allow(dead_code)]
-    pub fn with_mu(self, mu: A) -> Self {
-        VelocityGradientDescentHyper {
-            sampling: self.sampling,
-            mu,
-            learning_rate: self.learning_rate,
-            iterations: self.iterations,
-        }
-    }
-
-    #[allow(dead_code)]
-    pub fn to_immutable(&self) -> VelocityHypers<A>
+impl<A, Rng> NakedGradientDescent<A, Rng> {
+    pub fn to_immutable(&self) -> NakedHypers<A>
     where
         A: Clone,
     {
-        VelocityHypers {
-            mu: self.mu.clone(),
-            learning_rate: self.learning_rate.clone(),
+        self.naked.clone()
+    }
+
+    #[must_use]
+    pub fn with_rng<Rng2>(self, rng: Rng2, size: usize) -> NakedGradientDescent<A, Rng2> {
+        NakedGradientDescent {
+            base: self.base.with_rng(rng, size),
+            naked: self.naked,
         }
     }
 }
 
-impl<A, R: Rng> From<VelocityGradientDescentHyper<A, R>> for BaseGradientDescentHyper<A, R> {
-    fn from(val: VelocityGradientDescentHyper<A, R>) -> BaseGradientDescentHyper<A, R> {
-        BaseGradientDescentHyper {
-            sampling: val.sampling,
-            iterations: val.iterations,
-            params: NakedHypers {
-                learning_rate: val.learning_rate,
+impl<A, Rng> From<NakedGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
+    fn from(val: NakedGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
+        val.base
+    }
+}
+
+pub struct VelocityGradientDescent<A, Rng> {
+    base: BaseGradientDescent<Rng>,
+    velocity: VelocityHypers<A>,
+}
+
+impl<A> VelocityGradientDescent<A, StdRng>
+where
+    A: Zero,
+{
+    #[must_use]
+    pub fn zero_momentum(learning_rate: A, iterations: u32) -> Self {
+        VelocityGradientDescent {
+            base: BaseGradientDescent::new(iterations),
+            velocity: VelocityHypers {
+                learning_rate,
+                mu: A::zero(),
             },
         }
     }
 }
 
-#[derive(Clone)]
-pub struct RmsGradientDescentHyper<A, R: Rng> {
-    sampling: Option<(R, usize)>,
-    iterations: u32,
+impl<A, Rng> VelocityGradientDescent<A, Rng> {
+    #[must_use]
+    pub fn with_mu(self, mu: A) -> Self {
+        VelocityGradientDescent {
+            base: self.base,
+            velocity: VelocityHypers {
+                learning_rate: self.velocity.learning_rate,
+                mu,
+            },
+        }
+    }
+
+    pub fn to_immutable(&self) -> VelocityHypers<A>
+    where
+        A: Clone,
+    {
+        self.velocity.clone()
+    }
+}
+
+impl<A, Rng> From<VelocityGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
+    fn from(val: VelocityGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
+        val.base
+    }
+}
+
+pub struct RmsGradientDescent<A, Rng> {
+    base: BaseGradientDescent<Rng>,
     rms: RmsHyper<A>,
 }
 
-impl<A> RmsGradientDescentHyper<A, StdRng> {
-    #[allow(dead_code)]
+impl<A> RmsGradientDescent<A, StdRng> {
     pub fn default(learning_rate: A, iterations: u32) -> Self
     where
         A: NumLike,
@@ -122,9 +140,8 @@ impl<A> RmsGradientDescentHyper<A, StdRng> {
         let one_hundredth = one_tenth.clone() * one_tenth;
         let one_ten_k = one_hundredth.clone() * one_hundredth;
 
-        RmsGradientDescentHyper {
-            sampling: None,
-            iterations,
+        RmsGradientDescent {
+            base: BaseGradientDescent::new(iterations),
             rms: RmsHyper {
                 stabilizer: one_ten_k.clone() * one_ten_k,
                 beta: A::one() + -(A::one() / ten),
@@ -134,34 +151,31 @@ impl<A> RmsGradientDescentHyper<A, StdRng> {
     }
 }
 
-impl<A, R: Rng> RmsGradientDescentHyper<A, R> {
-    #[allow(dead_code)]
+impl<A, Rng> RmsGradientDescent<A, Rng> {
+    #[must_use]
     pub fn with_stabilizer(self, stabilizer: A) -> Self {
-        RmsGradientDescentHyper {
-            sampling: self.sampling,
+        RmsGradientDescent {
+            base: self.base,
             rms: RmsHyper {
                 stabilizer,
                 beta: self.rms.beta,
                 learning_rate: self.rms.learning_rate,
             },
-            iterations: self.iterations,
         }
     }
 
-    #[allow(dead_code)]
+    #[must_use]
     pub fn with_beta(self, beta: A) -> Self {
-        RmsGradientDescentHyper {
-            sampling: self.sampling,
+        RmsGradientDescent {
+            base: self.base,
             rms: RmsHyper {
                 stabilizer: self.rms.stabilizer,
                 beta,
                 learning_rate: self.rms.learning_rate,
             },
-            iterations: self.iterations,
         }
     }
 
-    #[allow(dead_code)]
     pub fn to_immutable(&self) -> RmsHyper<A>
     where
         A: Clone,
@@ -170,14 +184,8 @@ impl<A, R: Rng> RmsGradientDescentHyper<A, R> {
     }
 }
 
-impl<A, R: Rng> From<RmsGradientDescentHyper<A, R>> for BaseGradientDescentHyper<A, R> {
-    fn from(val: RmsGradientDescentHyper<A, R>) -> BaseGradientDescentHyper<A, R> {
-        BaseGradientDescentHyper {
-            sampling: val.sampling,
-            iterations: val.iterations,
-            params: NakedHypers {
-                learning_rate: val.rms.learning_rate,
-            },
-        }
+impl<A, Rng> From<RmsGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
+    fn from(val: RmsGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
+        val.base
     }
 }
diff --git a/little_learner/src/lib.rs b/little_learner/src/lib.rs
index c163e0d..e0aa15e 100644
--- a/little_learner/src/lib.rs
+++ b/little_learner/src/lib.rs
@@ -3,14 +3,12 @@
 #![feature(array_methods)]
 
 pub mod auto_diff;
-pub mod const_teq;
-pub mod expr_syntax_tree;
 pub mod gradient_descent;
 pub mod hyper;
 pub mod loss;
 pub mod not_nan;
+pub mod predictor;
 pub mod sample;
 pub mod scalar;
 pub mod smooth;
-pub mod tensor;
 pub mod traits;
diff --git a/little_learner/src/loss.rs b/little_learner/src/loss.rs
index 22766e0..3e08e15 100644
--- a/little_learner/src/loss.rs
+++ b/little_learner/src/loss.rs
@@ -4,8 +4,6 @@ use std::{
 };
 
 use crate::auto_diff::Differentiable;
-use crate::smooth::smooth;
-use crate::traits::{NumLike, Sqrt};
 use crate::{
     auto_diff::{DifferentiableTagged, RankedDifferentiable},
     scalar::Scalar,
@@ -210,7 +208,10 @@ where
     })
 }
 
-// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
+/// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
+///
+/// # Panics
+/// Panics if the input `theta` is not of rank 1 consisting of a tensor1 and a scalar.
 pub fn predict_plane<A>(
     xs: RankedDifferentiable<A, 2>,
     theta: &[Differentiable<A>; 2],
@@ -218,9 +219,12 @@ pub fn predict_plane<A>(
 where
     A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
 {
-    if theta[0].rank() != 1 {
-        panic!("theta0 must be of rank 1, got: {}", theta[0].rank())
-    }
+    assert_eq!(
+        theta[0].rank(),
+        1,
+        "theta0 must be of rank 1, got: {}",
+        theta[0].rank()
+    );
     let theta0 = RankedDifferentiable::of_vector(
         theta[0]
             .borrow_vector()
@@ -238,105 +242,6 @@ where
     RankedDifferentiable::of_vector(dotted)
 }
 
-pub struct Predictor<F, Inflated, Deflated, Params> {
-    pub predict: F,
-    pub inflate: fn(Deflated) -> Inflated,
-    pub deflate: fn(Inflated) -> Deflated,
-    pub update: fn(Inflated, &Deflated, Params) -> Inflated,
-}
-
-#[derive(Clone)]
-pub struct NakedHypers<A> {
-    pub learning_rate: A,
-}
-
-pub const fn naked_predictor<F, A>(
-    f: F,
-) -> Predictor<F, Differentiable<A>, Differentiable<A>, NakedHypers<A>>
-where
-    A: NumLike,
-{
-    Predictor {
-        predict: f,
-        inflate: |x| x,
-        deflate: |x| x,
-
-        update: |theta, delta, hyper| {
-            let learning_rate = Scalar::make(hyper.learning_rate);
-            Differentiable::map2(&theta, delta, &mut |theta, delta| {
-                theta.clone() - delta.clone() * learning_rate.clone()
-            })
-        },
-    }
-}
-
-#[derive(Clone)]
-pub struct RmsHyper<A> {
-    pub stabilizer: A,
-    pub beta: A,
-    pub learning_rate: A,
-}
-
-pub const fn rms_predictor<F, A>(
-    f: F,
-) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, RmsHyper<A>>
-where
-    A: NumLike,
-{
-    Predictor {
-        predict: f,
-        inflate: |x| x.map_tag(&mut |()| A::zero()),
-        deflate: |x| x.map_tag(&mut |_| ()),
-        update: |theta, delta, hyper| {
-            DifferentiableTagged::map2_tagged(
-                &theta,
-                delta,
-                &mut |theta, smoothed_grad, delta, ()| {
-                    let r = smooth(
-                        Scalar::make(hyper.beta.clone()),
-                        &Differentiable::of_scalar(Scalar::make(smoothed_grad)),
-                        &Differentiable::of_scalar(delta.clone() * delta.clone()),
-                    )
-                    .into_scalar();
-                    let learning_rate = Scalar::make(hyper.learning_rate.clone())
-                        / (r.sqrt() + Scalar::make(hyper.stabilizer.clone()));
-                    (
-                        theta.clone()
-                            + -(delta.clone() * Scalar::make(hyper.learning_rate.clone())),
-                        learning_rate.clone_real_part(),
-                    )
-                },
-            )
-        },
-    }
-}
-
-#[derive(Clone)]
-pub struct VelocityHypers<A> {
-    pub learning_rate: A,
-    pub mu: A,
-}
-
-pub const fn velocity_predictor<F, A>(
-    f: F,
-) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, VelocityHypers<A>>
-where
-    A: NumLike,
-{
-    Predictor {
-        predict: f,
-        inflate: |x| x.map_tag(&mut |()| A::zero()),
-        deflate: |x| x.map_tag(&mut |_| ()),
-        update: |theta, delta, hyper| {
-            DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| {
-                let velocity = hyper.mu.clone() * velocity
-                    + -(delta.clone_real_part() * hyper.learning_rate.clone());
-                (theta.clone() + Scalar::make(velocity.clone()), velocity)
-            })
-        },
-    }
-}
-
 #[cfg(test)]
 mod test_loss {
     use crate::auto_diff::RankedDifferentiable;
diff --git a/little_learner/src/predictor.rs b/little_learner/src/predictor.rs
new file mode 100644
index 0000000..a16e4a1
--- /dev/null
+++ b/little_learner/src/predictor.rs
@@ -0,0 +1,111 @@
+use crate::auto_diff::{Differentiable, DifferentiableTagged};
+use crate::scalar::Scalar;
+use crate::smooth::smooth;
+use crate::traits::{NumLike, Sqrt};
+
+/// A Predictor is a function (`predict`) we're optimising, an `inflate` which adds any metadata
+/// that the prediction engine might require, a corresponding `deflate` which removes the metadata,
+/// and an `update` which computes the next guess based on the previous guess.
+pub struct Predictor<F, Inflated, Deflated, Params> {
+    /// The function we're trying to optimise.
+    pub predict: F,
+    /// Attach prediction metadata to an input to the function we're trying to optimise.
+    pub inflate: fn(Deflated) -> Inflated,
+    /// Remove prediction metadata.
+    pub deflate: fn(Inflated) -> Deflated,
+    /// Given a guess at an optimum, the gradient at that point, and any hyperparameters,
+    /// compute the next guess at the optimum.
+    pub update: fn(Inflated, &Deflated, Params) -> Inflated,
+}
+
+/// Hyperparameters applying to the most basic way to calculate the next step.
+#[derive(Clone)]
+pub struct NakedHypers<A> {
+    pub learning_rate: A,
+}
+
+pub const fn naked<F, A>(f: F) -> Predictor<F, Differentiable<A>, Differentiable<A>, NakedHypers<A>>
+where
+    A: NumLike,
+{
+    Predictor {
+        predict: f,
+        inflate: |x| x,
+        deflate: |x| x,
+
+        update: |theta, delta, hyper| {
+            let learning_rate = Scalar::make(hyper.learning_rate);
+            Differentiable::map2(&theta, delta, &mut |theta, delta| {
+                (theta.clone() - delta.clone() * learning_rate.clone()).truncate_dual(None)
+            })
+        },
+    }
+}
+
+#[derive(Clone)]
+pub struct RmsHyper<A> {
+    pub stabilizer: A,
+    pub beta: A,
+    pub learning_rate: A,
+}
+
+pub const fn rms<F, A>(
+    f: F,
+) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, RmsHyper<A>>
+where
+    A: NumLike,
+{
+    Predictor {
+        predict: f,
+        inflate: |x| x.map_tag(&mut |()| A::zero()),
+        deflate: |x| x.map_tag(&mut |_| ()),
+        update: |theta, delta, hyper| {
+            DifferentiableTagged::map2_tagged(
+                &theta,
+                delta,
+                &mut |theta, smoothed_grad, delta, ()| {
+                    let r = smooth(
+                        Scalar::make(hyper.beta.clone()),
+                        &Differentiable::of_scalar(Scalar::make(smoothed_grad)),
+                        &Differentiable::of_scalar(delta.clone() * delta.clone()),
+                    )
+                    .into_scalar();
+                    let learning_rate = Scalar::make(hyper.learning_rate.clone())
+                        / (r.sqrt() + Scalar::make(hyper.stabilizer.clone()));
+                    (
+                        (theta.clone()
+                            + -(delta.clone() * Scalar::make(hyper.learning_rate.clone())))
+                        .truncate_dual(None),
+                        learning_rate.clone_real_part(),
+                    )
+                },
+            )
+        },
+    }
+}
+
+#[derive(Clone)]
+pub struct VelocityHypers<A> {
+    pub learning_rate: A,
+    pub mu: A,
+}
+
+pub const fn velocity<F, A>(
+    f: F,
+) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, VelocityHypers<A>>
+where
+    A: NumLike,
+{
+    Predictor {
+        predict: f,
+        inflate: |x| x.map_tag(&mut |()| A::zero()),
+        deflate: |x| x.map_tag(&mut |_| ()),
+        update: |theta, delta, hyper| {
+            DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| {
+                let velocity = hyper.mu.clone() * velocity
+                    + -(delta.clone_real_part() * hyper.learning_rate.clone());
+                (theta.clone() + Scalar::make(velocity.clone()), velocity)
+            })
+        },
+    }
+}
diff --git a/little_learner/src/sample.rs b/little_learner/src/sample.rs
index 6e953ef..de6b4eb 100644
--- a/little_learner/src/sample.rs
+++ b/little_learner/src/sample.rs
@@ -1,5 +1,6 @@
 use rand::Rng;
 
+/// Grab `n` random samples from `from_x` and `from_y`, collecting them into a vector.
 pub fn sample2<R: Rng, T, U, I, J>(rng: &mut R, n: usize, from_x: I, from_y: J) -> (Vec<T>, Vec<U>)
 where
     T: Copy,
diff --git a/little_learner/src/scalar.rs b/little_learner/src/scalar.rs
index 5a38e88..ebe5936 100644
--- a/little_learner/src/scalar.rs
+++ b/little_learner/src/scalar.rs
@@ -117,7 +117,7 @@ impl<A> Link<A> {
                             -left.clone_real_part() * z
                                 / (right.clone_real_part() * right.clone_real_part()),
                             acc,
-                        )
+                        );
                     }
                     LinkData::Log(arg) => {
                         // d/dx(log y) = 1/y dy/dx
@@ -181,7 +181,7 @@ where
     A: Add<Output = A> + Clone,
 {
     fn add_assign(&mut self, rhs: Self) {
-        *self = self.clone() + rhs
+        *self = self.clone() + rhs;
     }
 }
 
@@ -287,8 +287,7 @@ where
 impl<A> Scalar<A> {
     pub fn real_part(&self) -> &A {
         match self {
-            Scalar::Number(a, _) => a,
-            Scalar::Dual(a, _) => a,
+            Scalar::Number(a, _) | Scalar::Dual(a, _) => a,
         }
     }
 
@@ -297,8 +296,7 @@ impl<A> Scalar<A> {
         A: Clone,
     {
         match self {
-            Scalar::Number(a, _) => (*a).clone(),
-            Scalar::Dual(a, _) => (*a).clone(),
+            Scalar::Number(a, _) | Scalar::Dual(a, _) => (*a).clone(),
         }
     }
 
@@ -319,6 +317,7 @@ impl<A> Scalar<A> {
         }
     }
 
+    #[must_use]
     pub fn truncate_dual(self, index: Option<usize>) -> Scalar<A>
     where
         A: Clone,
@@ -326,6 +325,7 @@ impl<A> Scalar<A> {
         Scalar::Dual(self.clone_real_part(), Link::EndOfLink(index))
     }
 
+    #[must_use]
     pub fn make(x: A) -> Scalar<A> {
         Scalar::Number(x, None)
     }
@@ -337,9 +337,9 @@ where
 {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{}_{}", n, index)),
-            Scalar::Number(n, None) => f.write_fmt(format_args!("{}", n)),
-            Scalar::Dual(n, link) => f.write_fmt(format_args!("<{}, link: {}>", n, link)),
+            Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{n}_{index}")),
+            Scalar::Number(n, None) => f.write_fmt(format_args!("{n}")),
+            Scalar::Dual(n, link) => f.write_fmt(format_args!("<{n}, link: {link}>")),
         }
     }
 }
@@ -385,7 +385,7 @@ mod test_loss {
     fn sqrt_gradient() {
         let nine = Differentiable::of_scalar(Scalar::make(NotNan::new(9.0).expect("not nan")));
         let graded: [Differentiable<NotNan<f64>>; 1] = grad(
-            |x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().clone().sqrt()),
+            |x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().sqrt()),
             &[nine],
         );
         let graded = graded.map(|x| x.into_scalar().clone_real_part().into_inner())[0];
diff --git a/little_learner/src/smooth.rs b/little_learner/src/smooth.rs
index 2a1d22e..8d596b0 100644
--- a/little_learner/src/smooth.rs
+++ b/little_learner/src/smooth.rs
@@ -3,10 +3,12 @@ use crate::scalar::Scalar;
 use crate::traits::One;
 use std::ops::{Add, Mul, Neg};
 
+/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor
+/// of `decay`.
 pub fn smooth_tagged<A, F, Tag1, Tag2, Tag3>(
     decay: Scalar<A>,
-    current_avg: &DifferentiableTagged<A, Tag1>,
-    grad: &DifferentiableTagged<A, Tag2>,
+    old_value: &DifferentiableTagged<A, Tag1>,
+    new_value: &DifferentiableTagged<A, Tag2>,
     mut tags: F,
 ) -> DifferentiableTagged<A, Tag3>
 where
@@ -15,23 +17,25 @@ where
     Tag1: Clone,
     Tag2: Clone,
 {
-    DifferentiableTagged::map2_tagged(current_avg, grad, &mut |avg, tag1, grad, tag2| {
+    DifferentiableTagged::map2_tagged(old_value, new_value, &mut |old, tag1, new, tag2| {
         (
-            (avg.clone() * decay.clone()) + (grad.clone() * (Scalar::<A>::one() + -decay.clone())),
+            (old.clone() * decay.clone()) + (new.clone() * (Scalar::<A>::one() + -decay.clone())),
             tags(tag1, tag2),
         )
     })
 }
 
+/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor
+/// of `decay`.
 pub fn smooth<A>(
     decay: Scalar<A>,
-    current_avg: &Differentiable<A>,
-    grad: &Differentiable<A>,
+    old_value: &Differentiable<A>,
+    new_value: &Differentiable<A>,
 ) -> Differentiable<A>
 where
     A: One + Clone + Mul<Output = A> + Neg<Output = A> + Add<Output = A>,
 {
-    smooth_tagged(decay, current_avg, grad, |(), ()| ())
+    smooth_tagged(decay, old_value, new_value, |(), ()| ())
 }
 
 #[cfg(test)]
@@ -72,17 +76,17 @@ mod test_smooth {
             output,
             vec![
                 5.0299999999999985,
-                6.7969999999999979,
-                6.5472999999999981,
-                6.1625699999999979,
+                6.796_999_999_999_998,
+                6.547_299_999_999_998,
+                6.162_569_999_999_998,
                 5.7263129999999975,
-                5.3736816999999979,
-                4.8963135299999978
+                5.373_681_699_999_998,
+                4.896_313_529_999_998
             ]
-        )
+        );
     }
 
-    fn hydrate(v: Vec<f64>) -> Differentiable<NotNan<f64>> {
+    fn hydrate(v: &[f64]) -> Differentiable<NotNan<f64>> {
         Differentiable::of_vec(
             v.iter()
                 .cloned()
@@ -100,9 +104,9 @@ mod test_smooth {
             vec![13.4, 18.2, 41.4],
             vec![1.1, 0.3, 67.3],
         ]
-        .map(hydrate);
+        .map(|x| hydrate(&x));
 
-        let mut current = hydrate(vec![0.8, 3.1, 2.2]);
+        let mut current = hydrate(&vec![0.8, 3.1, 2.2]);
         let mut output = Vec::with_capacity(inputs.len());
         for input in inputs {
             current = smooth(decay.clone(), &current, &input);
@@ -112,10 +116,10 @@ mod test_smooth {
         assert_eq!(
             output,
             vec![
-                vec![0.82000000000000006, 2.9, 2.2800000000000002],
-                vec![2.0779999999999998, 4.4299999999999997, 6.1919999999999993],
+                vec![0.820_000_000_000_000_1, 2.9, 2.2800000000000002],
+                vec![2.078, 4.43, 6.191_999_999_999_999],
                 vec![1.9802, 4.0169999999999995, 12.302799999999998]
             ]
-        )
+        );
     }
 }
diff --git a/little_learner/src/tensor.rs b/little_learner/src/tensor.rs
deleted file mode 100644
index c769b40..0000000
--- a/little_learner/src/tensor.rs
+++ /dev/null
@@ -1,107 +0,0 @@
-#[macro_export]
-macro_rules! tensor {
-    ($x:ty , $i: expr) => {[$x; $i]};
-    ($x:ty , $i: expr, $($is:expr),+) => {[tensor!($x, $($is),+); $i]};
-}
-
-#[cfg(test)]
-mod tests {
-    #[test]
-    fn test_tensor_type() {
-        let _: tensor!(f64, 1, 2, 3) = [[[1.0, 3.0, 6.0], [-1.3, -30.0, -0.0]]];
-    }
-}
-
-pub trait Extensible1<A> {
-    fn apply<F>(&self, other: &A, op: &F) -> Self
-    where
-        F: Fn(&A, &A) -> A;
-}
-
-pub trait Extensible2<A> {
-    fn apply<F>(&self, other: &Self, op: &F) -> Self
-    where
-        F: Fn(&A, &A) -> A;
-}
-
-impl<A, T, const N: usize> Extensible1<A> for [T; N]
-where
-    T: Extensible1<A> + Copy + Default,
-{
-    fn apply<F>(&self, other: &A, op: &F) -> Self
-    where
-        F: Fn(&A, &A) -> A,
-    {
-        let mut result = [Default::default(); N];
-        for (i, coord) in self.iter().enumerate() {
-            result[i] = T::apply(coord, other, op);
-        }
-        result
-    }
-}
-
-impl<A, T, const N: usize> Extensible2<A> for [T; N]
-where
-    T: Extensible2<A> + Copy + Default,
-{
-    fn apply<F>(&self, other: &Self, op: &F) -> Self
-    where
-        F: Fn(&A, &A) -> A,
-    {
-        let mut result = [Default::default(); N];
-        for (i, coord) in self.iter().enumerate() {
-            result[i] = T::apply(coord, &other[i], op);
-        }
-        result
-    }
-}
-
-#[macro_export]
-macro_rules! extensible1 {
-    ($x: ty) => {
-        impl Extensible1<$x> for $x {
-            fn apply<F>(&self, other: &$x, op: &F) -> Self
-            where
-                F: Fn(&Self, &Self) -> Self,
-            {
-                op(self, other)
-            }
-        }
-    };
-}
-
-#[macro_export]
-macro_rules! extensible2 {
-    ($x: ty) => {
-        impl Extensible2<$x> for $x {
-            fn apply<F>(&self, other: &Self, op: &F) -> Self
-            where
-                F: Fn(&Self, &Self) -> Self,
-            {
-                op(self, other)
-            }
-        }
-    };
-}
-
-extensible1!(u8);
-extensible1!(f64);
-
-extensible2!(u8);
-extensible2!(f64);
-
-pub fn extension1<T, A, F>(t1: &T, t2: &A, op: F) -> T
-where
-    T: Extensible1<A>,
-    F: Fn(&A, &A) -> A,
-{
-    t1.apply::<F>(t2, &op)
-}
-
-pub fn extension2<T, A, F>(t1: &T, t2: &T, op: F) -> T
-where
-    T: Extensible2<A>,
-    F: Fn(&A, &A) -> A,
-{
-    t1.apply::<F>(t2, &op)
-}
diff --git a/little_learner/src/traits.rs b/little_learner/src/traits.rs
index 86d9876..cd73bdc 100644
--- a/little_learner/src/traits.rs
+++ b/little_learner/src/traits.rs
@@ -4,6 +4,7 @@ use std::iter::Sum;
 use std::ops::{Add, AddAssign, Div, Mul, Neg};
 
 pub trait Exp {
+    #[must_use]
     fn exp(self) -> Self;
 }
 
@@ -14,6 +15,7 @@ impl Exp for NotNan<f64> {
 }
 
 pub trait Sqrt {
+    #[must_use]
     fn sqrt(self) -> Self;
 }
 
@@ -24,10 +26,12 @@ impl Sqrt for NotNan<f64> {
 }
 
 pub trait Zero {
+    #[must_use]
     fn zero() -> Self;
 }
 
 pub trait One {
+    #[must_use]
     fn one() -> Self;
 }
 
diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs
index 5dd8178..391d0ec 100644
--- a/little_learner_app/src/main.rs
+++ b/little_learner_app/src/main.rs
@@ -1,35 +1,37 @@
 #![allow(incomplete_features)]
 #![feature(generic_const_exprs)]
 
-mod with_tensor;
-
 use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged};
 
 use little_learner::gradient_descent::gradient_descent;
-use little_learner::hyper::VelocityGradientDescentHyper;
-use little_learner::loss::{predict_plane, velocity_predictor};
+use little_learner::hyper;
+use little_learner::loss::predict_plane;
 use little_learner::not_nan::{to_not_nan_1, to_not_nan_2};
+use little_learner::predictor;
 use little_learner::scalar::Scalar;
 use little_learner::traits::Zero;
 use ordered_float::NotNan;
 
-fn main() {
-    let plane_xs = [
-        [1.0, 2.05],
-        [1.0, 3.0],
-        [2.0, 2.0],
-        [2.0, 3.91],
-        [3.0, 6.13],
-        [4.0, 8.09],
-    ];
-    let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
+const PLANE_XS: [[f64; 2]; 6] = [
+    [1.0, 2.05],
+    [1.0, 3.0],
+    [2.0, 2.0],
+    [2.0, 3.91],
+    [3.0, 6.13],
+    [4.0, 8.09],
+];
+const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
 
-    let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000)
-        .with_mu(NotNan::new(0.9).expect("not nan"));
+fn main() {
+    let beta = NotNan::new(0.9).expect("not nan");
+    let stabilizer = NotNan::new(0.000_000_01).expect("not nan");
+    let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.001).expect("not nan"), 3000)
+        .with_stabilizer(stabilizer)
+        .with_beta(beta);
 
     let iterated = {
-        let xs = to_not_nan_2(plane_xs);
-        let ys = to_not_nan_1(plane_ys);
+        let xs = to_not_nan_2(PLANE_XS);
+        let ys = to_not_nan_1(PLANE_YS);
         let zero_params = [
             RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()])
                 .to_unranked(),
@@ -42,8 +44,8 @@ fn main() {
             RankedDifferentiableTagged::of_slice_2::<_, 2>,
             &ys,
             zero_params,
-            velocity_predictor(predict_plane),
-            VelocityGradientDescentHyper::to_immutable,
+            predictor::rms(predict_plane),
+            hyper::RmsGradientDescent::to_immutable,
         )
     };
 
@@ -52,11 +54,14 @@ fn main() {
     let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
     let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
 
-    assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]);
-    assert_eq!(
-        theta1.to_scalar().real_part().into_inner(),
-        6.169579045974949
-    );
+    let fitted_theta0 = theta0
+        .collect()
+        .iter()
+        .map(|x| x.into_inner())
+        .collect::<Vec<_>>();
+    let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
+    assert_eq!(fitted_theta0, [3.985_350_099_342_649, 1.9745945728216352]);
+    assert_eq!(fitted_theta1, 6.164_222_983_181_168);
 }
 
 #[cfg(test)]
diff --git a/little_learner_app/src/with_tensor.rs b/little_learner_app/src/with_tensor.rs
deleted file mode 100644
index cc78083..0000000
--- a/little_learner_app/src/with_tensor.rs
+++ /dev/null
@@ -1,136 +0,0 @@
-#![allow(dead_code)]
-
-use std::iter::Sum;
-use std::ops::{Mul, Sub};
-
-use little_learner::tensor;
-use little_learner::tensor::{extension2, Extensible2};
-use little_learner::traits::One;
-
-type Point<A, const N: usize> = [A; N];
-
-type Parameters<A, const N: usize, const M: usize> = [Point<A, N>; M];
-
-fn dot_points<A: Mul, const N: usize>(x: &Point<A, N>, y: &Point<A, N>) -> A
-where
-    A: Sum<<A as Mul>::Output> + Copy + Default + Mul<Output = A> + Extensible2<A>,
-{
-    extension2(x, y, |&x, &y| x * y).into_iter().sum()
-}
-
-fn dot<A, const N: usize, const M: usize>(x: &Point<A, N>, y: &Parameters<A, N, M>) -> Point<A, M>
-where
-    A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + Extensible2<A>,
-{
-    let mut result = [Default::default(); M];
-    for (i, coord) in y.iter().map(|y| dot_points(x, y)).enumerate() {
-        result[i] = coord;
-    }
-    result
-}
-
-fn sum<A, const N: usize>(x: &tensor!(A, N)) -> A
-where
-    A: Sum<A> + Copy,
-{
-    A::sum(x.iter().cloned())
-}
-
-fn squared<A, const N: usize>(x: &tensor!(A, N)) -> tensor!(A, N)
-where
-    A: Mul<Output = A> + Extensible2<A> + Copy + Default,
-{
-    extension2(x, x, |&a, &b| (a * b))
-}
-
-fn l2_norm<A, const N: usize>(prediction: &tensor!(A, N), data: &tensor!(A, N)) -> A
-where
-    A: Sum<A> + Mul<Output = A> + Extensible2<A> + Copy + Default + Sub<Output = A>,
-{
-    let diff = extension2(prediction, data, |&x, &y| x - y);
-    sum(&squared(&diff))
-}
-
-pub fn l2_loss<A, F, Params, const N: usize>(
-    target: F,
-    data_xs: &tensor!(A, N),
-    data_ys: &tensor!(A, N),
-    params: &Params,
-) -> A
-where
-    F: Fn(&tensor!(A, N), &Params) -> tensor!(A, N),
-    A: Sum<A> + Mul<Output = A> + Extensible2<A> + Copy + Default + Sub<Output = A>,
-{
-    let pred_ys = target(data_xs, params);
-    l2_norm(&pred_ys, data_ys)
-}
-
-pub fn predict_line<A, const N: usize>(xs: &tensor!(A, N), theta: &tensor!(A, 2)) -> tensor!(A, N)
-where
-    A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + Extensible2<A> + One,
-{
-    let mut result: tensor!(A, N) = [Default::default(); N];
-    for (i, &x) in xs.iter().enumerate() {
-        result[i] = dot(&[x, One::one()], &[*theta])[0];
-    }
-    result
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use little_learner::tensor::extension1;
-
-    #[test]
-    fn test_extension() {
-        let x: tensor!(u8, 1) = [2];
-        assert_eq!(extension1(&x, &7, |x, y| x + y), [9]);
-        let y: tensor!(u8, 1) = [7];
-        assert_eq!(extension2(&x, &y, |x, y| x + y), [9]);
-
-        let x: tensor!(u8, 3) = [5, 6, 7];
-        assert_eq!(extension1(&x, &2, |x, y| x + y), [7, 8, 9]);
-        let y: tensor!(u8, 3) = [2, 0, 1];
-        assert_eq!(extension2(&x, &y, |x, y| x + y), [7, 6, 8]);
-
-        let x: tensor!(u8, 2, 3) = [[4, 6, 7], [2, 0, 1]];
-        assert_eq!(extension1(&x, &2, |x, y| x + y), [[6, 8, 9], [4, 2, 3]]);
-        let y: tensor!(u8, 2, 3) = [[1, 2, 2], [6, 3, 1]];
-        assert_eq!(extension2(&x, &y, |x, y| x + y), [[5, 8, 9], [8, 3, 2]]);
-    }
-
-    #[test]
-    fn test_l2_norm() {
-        assert_eq!(
-            l2_norm(&[4.0, -3.0, 0.0, -4.0, 3.0], &[0.0, 0.0, 0.0, 0.0, 0.0]),
-            50.0
-        )
-    }
-
-    #[test]
-    fn test_l2_loss() {
-        let loss = l2_loss(
-            predict_line,
-            &[2.0, 1.0, 4.0, 3.0],
-            &[1.8, 1.2, 4.2, 3.3],
-            &[0.0, 0.0],
-        );
-        assert_eq!(loss, 33.21);
-
-        let loss = l2_loss(
-            predict_line,
-            &[2.0, 1.0, 4.0, 3.0],
-            &[1.8, 1.2, 4.2, 3.3],
-            &[0.0099, 0.0],
-        );
-        assert_eq!((100.0 * loss).round() / 100.0, 32.59);
-    }
-
-    #[test]
-    fn l2_loss_non_autodiff_example() {
-        let xs = [2.0, 1.0, 4.0, 3.0];
-        let ys = [1.8, 1.2, 4.2, 3.3];
-        let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
-        assert_eq!(loss, 32.5892403);
-    }
-}