From 1ea0383710bdbe79bab7f94ac733d22ec3dddc8a Mon Sep 17 00:00:00 2001
From: Patrick Stevens <patrick+github@patrickstevens.co.uk>
Date: Mon, 3 Apr 2023 11:36:31 +0100
Subject: [PATCH] Optimise my first function (#8)

---
 Cargo.lock                      |   1 +
 little_learner/src/auto_diff.rs |  10 +-
 little_learner/src/scalar.rs    |   4 +
 little_learner_app/Cargo.toml   |   3 +-
 little_learner_app/src/main.rs  | 191 +++++++++++++++++++++++++++-----
 5 files changed, 177 insertions(+), 32 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index c30cc64..1e333c7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -55,6 +55,7 @@ dependencies = [
 name = "little_learner_app"
 version = "0.1.0"
 dependencies = [
+ "arrayvec",
  "immutable-chunkmap",
  "little_learner",
  "ordered-float",
diff --git a/little_learner/src/auto_diff.rs b/little_learner/src/auto_diff.rs
index de60ff8..00d649f 100644
--- a/little_learner/src/auto_diff.rs
+++ b/little_learner/src/auto_diff.rs
@@ -132,8 +132,7 @@ where
         + Div<Output = A>
         + Zero
         + One
-        + Neg<Output = A>
-        + Display,
+        + Neg<Output = A>,
 {
     fn accumulate_gradients_vec(v: &[DifferentiableHidden<A>], acc: &mut HashMap<Scalar<A>, A>) {
         for v in v.iter().rev() {
@@ -242,7 +241,7 @@ impl<A, const RANK: usize> Differentiable<A, RANK> {
         }
     }
 
-    pub fn grad<F>(f: F, theta: Differentiable<A, RANK>) -> Differentiable<A, RANK>
+    pub fn grad<F>(f: F, theta: &Differentiable<A, RANK>) -> Differentiable<A, RANK>
     where
         F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
         A: Clone
@@ -254,8 +253,7 @@ impl<A, const RANK: usize> Differentiable<A, RANK> {
             + Zero
             + One
             + Neg<Output = A>
-            + Eq
-            + std::fmt::Display,
+            + Eq,
     {
         let mut i = 0usize;
         let wrt = theta.contents.map(&mut |x| {
@@ -332,7 +330,7 @@ mod tests {
                     x,
                 ))])
             },
-            input_vec,
+            &input_vec,
         );
 
         let grad_vec: Vec<f64> = Differentiable::to_vector(grad)
diff --git a/little_learner/src/scalar.rs b/little_learner/src/scalar.rs
index e20c2ed..719e4e7 100644
--- a/little_learner/src/scalar.rs
+++ b/little_learner/src/scalar.rs
@@ -237,6 +237,10 @@ impl<A> Scalar<A> {
     {
         Scalar::Dual(self.clone_real_part(), Link::EndOfLink(Some(index)))
     }
+
+    pub fn make(x: A) -> Scalar<A> {
+        Scalar::Number(x, None)
+    }
 }
 
 impl<A> Display for Scalar<A>
diff --git a/little_learner_app/Cargo.toml b/little_learner_app/Cargo.toml
index 2324893..721f1e5 100644
--- a/little_learner_app/Cargo.toml
+++ b/little_learner_app/Cargo.toml
@@ -8,4 +8,5 @@ edition = "2021"
 [dependencies]
 immutable-chunkmap = "1.0.5"
 ordered-float = "3.6.0"
-little_learner = { path = "../little_learner" }
\ No newline at end of file
+little_learner = { path = "../little_learner" }
+arrayvec = "0.7.2"
diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs
index be1d840..0d4b1a0 100644
--- a/little_learner_app/src/main.rs
+++ b/little_learner_app/src/main.rs
@@ -3,10 +3,14 @@
 
 mod with_tensor;
 
-use little_learner::auto_diff::{of_scalar, of_slice, Differentiable};
+use core::hash::Hash;
+use std::ops::{Add, AddAssign, Div, Mul, Neg};
+
+use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable};
 
 use little_learner::loss::{l2_loss_2, predict_line_2, square};
-use little_learner::traits::Zero;
+use little_learner::scalar::Scalar;
+use little_learner::traits::{Exp, One, Zero};
 use ordered_float::NotNan;
 
 use crate::with_tensor::{l2_loss, predict_line};
@@ -19,37 +23,174 @@ fn l2_loss_non_autodiff_example() {
     println!("{:?}", loss);
 }
 
+fn iterate<A, F>(f: &F, start: A, n: u32) -> A
+where
+    F: Fn(A) -> A,
+{
+    if n == 0 {
+        return start;
+    }
+    iterate(f, f(start), n - 1)
+}
+
+fn gradient_descent_step<A, F, const RANK: usize>(
+    f: &F,
+    learning_rate: A,
+    theta: Differentiable<A, RANK>,
+) -> Differentiable<A, RANK>
+where
+    A: Clone
+        + Mul<Output = A>
+        + Neg<Output = A>
+        + Add<Output = A>
+        + Hash
+        + AddAssign
+        + Div<Output = A>
+        + Zero
+        + One
+        + Eq
+        + Exp,
+    F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
+{
+    let delta = Differentiable::grad(f, &theta);
+    Differentiable::map2(&theta, &delta, &|theta, delta| {
+        (*theta).clone() - (Scalar::make(learning_rate.clone()) * (*delta).clone())
+    })
+}
+
 fn main() {
     let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
 
-    let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), input_vec);
+    let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), &input_vec);
     println!("Gradient of the x^2 function at x=27: {}", grad);
 
     let xs = [2.0, 1.0, 4.0, 3.0];
     let ys = [1.8, 1.2, 4.2, 3.3];
 
-    let loss = l2_loss_2(
-        predict_line_2,
-        of_slice(&xs),
-        of_slice(&ys),
-        of_slice(&[0.0, 0.0]),
-    );
-    println!("Computation of L2 loss: {}", loss);
+    let alpha = NotNan::new(0.01).expect("not nan");
+    let iterated = {
+        let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
+        let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
+        iterate(
+            &|theta| {
+                gradient_descent_step(
+                    &|x| {
+                        Differentiable::of_vector(vec![of_scalar(l2_loss_2(
+                            predict_line_2,
+                            of_slice(&xs),
+                            of_slice(&ys),
+                            x,
+                        ))])
+                    },
+                    alpha,
+                    theta,
+                )
+            },
+            of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
+            1000,
+        )
+    };
 
-    let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
-    let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
-    let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
-    let grad = Differentiable::grad(
-        |x| {
-            Differentiable::of_vector(vec![of_scalar(l2_loss_2(
-                predict_line_2,
-                of_slice(&xs),
-                of_slice(&ys),
-                x,
-            ))])
-        },
-        input_vec,
+    println!(
+        "After iteration: {:?}",
+        Differentiable::to_vector(iterated)
+            .into_iter()
+            .map(|x| to_scalar(x).real_part().into_inner())
+            .collect::<Vec<_>>()
     );
-
-    println!("{}", grad);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrayvec::ArrayVec;
+    use little_learner::auto_diff::to_scalar;
+
+    #[test]
+    fn loss_example() {
+        let xs = [2.0, 1.0, 4.0, 3.0];
+        let ys = [1.8, 1.2, 4.2, 3.3];
+        let loss = l2_loss_2(
+            predict_line_2,
+            of_slice(&xs),
+            of_slice(&ys),
+            of_slice(&[0.0, 0.0]),
+        );
+
+        assert_eq!(*loss.real_part(), 33.21);
+    }
+
+    #[test]
+    fn loss_gradient() {
+        let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
+        let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
+        let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
+        let grad = Differentiable::grad(
+            |x| {
+                Differentiable::of_vector(vec![of_scalar(l2_loss_2(
+                    predict_line_2,
+                    of_slice(&xs),
+                    of_slice(&ys),
+                    x,
+                ))])
+            },
+            &input_vec,
+        );
+
+        assert_eq!(
+            Differentiable::to_vector(grad)
+                .into_iter()
+                .map(|x| *(to_scalar(x).real_part()))
+                .collect::<Vec<_>>(),
+            [-63.0, -21.0]
+        );
+    }
+
+    #[test]
+    fn test_iterate() {
+        let f = |t: [i32; 3]| {
+            let mut vec = ArrayVec::<i32, 3>::new();
+            for i in t {
+                vec.push(i - 3);
+            }
+            vec.into_inner().unwrap()
+        };
+        assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]);
+    }
+
+    #[test]
+    fn first_optimisation_test() {
+        let xs = [2.0, 1.0, 4.0, 3.0];
+        let ys = [1.8, 1.2, 4.2, 3.3];
+
+        let alpha = NotNan::new(0.01).expect("not nan");
+        let iterated = {
+            let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
+            let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
+            iterate(
+                &|theta| {
+                    gradient_descent_step(
+                        &|x| {
+                            Differentiable::of_vector(vec![of_scalar(l2_loss_2(
+                                predict_line_2,
+                                of_slice(&xs),
+                                of_slice(&ys),
+                                x,
+                            ))])
+                        },
+                        alpha,
+                        theta,
+                    )
+                },
+                of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
+                1000,
+            )
+        };
+        let iterated = Differentiable::to_vector(iterated)
+            .into_iter()
+            .map(|x| to_scalar(x).real_part().into_inner())
+            .collect::<Vec<_>>();
+
+        assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
+    }
 }