Add hyperparameters, optimise quadratic function (#9)

2023-04-03 15:06:45 +01:00
parent 1ea0383710
commit 817775412b
3 changed files with 123 additions and 30 deletions
--- a/little_learner/src/loss.rs
+++ b/little_learner/src/loss.rs
@@ -21,7 +21,7 @@ pub fn dot_2<A, const RANK: usize>(
    y: &Differentiable<A, RANK>,
 ) -> Differentiable<A, RANK>
 where
-    A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default,
+    A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
 {
    Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
 }
@@ -35,7 +35,7 @@ where

 fn sum_2<A>(x: Differentiable<A, 1>) -> Scalar<A>
 where
-    A: Sum<A> + Copy + Add<Output = A> + Zero,
+    A: Sum<A> + Clone + Add<Output = A> + Zero,
 {
    Differentiable::to_vector(x)
        .into_iter()
@@ -91,3 +91,17 @@ where
    }
    Differentiable::of_vector(result)
 }
+
+pub fn predict_quadratic<A>(
+    xs: Differentiable<A, 1>,
+    theta: Differentiable<A, 1>,
+) -> Differentiable<A, 1>
+where
+    A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
+{
+    Differentiable::map(xs, &mut |x| {
+        let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
+        let x_powers = Differentiable::of_vector(x_powers.into_iter().map(of_scalar).collect());
+        sum_2(dot_2(&x_powers, &theta))
+    })
+}
--- a/little_learner_app/src/main.rs
+++ b/little_learner_app/src/main.rs
@@ -8,21 +8,11 @@ use std::ops::{Add, AddAssign, Div, Mul, Neg};

 use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable};

-use little_learner::loss::{l2_loss_2, predict_line_2, square};
+use little_learner::loss::{l2_loss_2, predict_quadratic};
 use little_learner::scalar::Scalar;
 use little_learner::traits::{Exp, One, Zero};
 use ordered_float::NotNan;

-use crate::with_tensor::{l2_loss, predict_line};
-
-#[allow(dead_code)]
-fn l2_loss_non_autodiff_example() {
-    let xs = [2.0, 1.0, 4.0, 3.0];
-    let ys = [1.8, 1.2, 4.2, 3.3];
-    let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
-    println!("{:?}", loss);
-}
-
 fn iterate<A, F>(f: &F, start: A, n: u32) -> A
 where
    F: Fn(A) -> A,
@@ -33,10 +23,15 @@ where
    iterate(f, f(start), n - 1)
 }

+struct GradientDescentHyper<A, const RANK: usize> {
+    learning_rate: A,
+    iterations: u32,
+}
+
 fn gradient_descent_step<A, F, const RANK: usize>(
    f: &F,
-    learning_rate: A,
    theta: Differentiable<A, RANK>,
+    params: &GradientDescentHyper<A, RANK>,
 ) -> Differentiable<A, RANK>
 where
    A: Clone
@@ -54,20 +49,19 @@ where
 {
    let delta = Differentiable::grad(f, &theta);
    Differentiable::map2(&theta, &delta, &|theta, delta| {
-        (*theta).clone() - (Scalar::make(learning_rate.clone()) * (*delta).clone())
+        (*theta).clone() - (Scalar::make((params.learning_rate).clone()) * (*delta).clone())
    })
 }

 fn main() {
-    let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
+    let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
+    let ys = [2.55, 2.1, 4.35, 10.2, 18.25];

-    let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), &input_vec);
-    println!("Gradient of the x^2 function at x=27: {}", grad);
+    let hyper = GradientDescentHyper {
+        learning_rate: NotNan::new(0.001).expect("not nan"),
+        iterations: 1000,
+    };

-    let xs = [2.0, 1.0, 4.0, 3.0];
-    let ys = [1.8, 1.2, 4.2, 3.3];
-
-    let alpha = NotNan::new(0.01).expect("not nan");
    let iterated = {
        let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
        let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
@@ -76,18 +70,22 @@ fn main() {
                gradient_descent_step(
                    &|x| {
                        Differentiable::of_vector(vec![of_scalar(l2_loss_2(
-                            predict_line_2,
+                            predict_quadratic,
                            of_slice(&xs),
                            of_slice(&ys),
                            x,
                        ))])
                    },
-                    alpha,
                    theta,
+                    &hyper,
                )
            },
-            of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
-            1000,
+            of_slice(&[
+                NotNan::<f64>::zero(),
+                NotNan::<f64>::zero(),
+                NotNan::<f64>::zero(),
+            ]),
+            hyper.iterations,
        )
    };

@@ -104,7 +102,12 @@ fn main() {
 mod tests {
    use super::*;
    use arrayvec::ArrayVec;
-    use little_learner::auto_diff::to_scalar;
+    use little_learner::{
+        auto_diff::to_scalar,
+        loss::{predict_line_2, square},
+    };
+
+    use crate::with_tensor::{l2_loss, predict_line};

    #[test]
    fn loss_example() {
@@ -120,6 +123,28 @@ mod tests {
        assert_eq!(*loss.real_part(), 33.21);
    }

+    #[test]
+    fn l2_loss_non_autodiff_example() {
+        let xs = [2.0, 1.0, 4.0, 3.0];
+        let ys = [1.8, 1.2, 4.2, 3.3];
+        let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
+        assert_eq!(loss, 32.5892403);
+    }
+
+    #[test]
+    fn grad_example() {
+        let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
+
+        let grad: Vec<_> = Differentiable::to_vector(Differentiable::grad(
+            |x| Differentiable::map(x, &mut |x| square(&x)),
+            &input_vec,
+        ))
+        .into_iter()
+        .map(|x| to_scalar(x).real_part().into_inner())
+        .collect();
+        assert_eq!(grad, [54.0]);
+    }
+
    #[test]
    fn loss_gradient() {
        let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
@@ -163,7 +188,10 @@ mod tests {
        let xs = [2.0, 1.0, 4.0, 3.0];
        let ys = [1.8, 1.2, 4.2, 3.3];

-        let alpha = NotNan::new(0.01).expect("not nan");
+        let hyper = GradientDescentHyper {
+            learning_rate: NotNan::new(0.01).expect("not nan"),
+            iterations: 1000,
+        };
        let iterated = {
            let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
            let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
@@ -178,12 +206,12 @@ mod tests {
                                x,
                            ))])
                        },
-                        alpha,
                        theta,
+                        &hyper,
                    )
                },
                of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
-                1000,
+                hyper.iterations,
            )
        };
        let iterated = Differentiable::to_vector(iterated)
@@ -193,4 +221,53 @@ mod tests {

        assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
    }
+
+    #[test]
+    fn optimise_quadratic() {
+        let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
+        let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
+
+        let hyper = GradientDescentHyper {
+            learning_rate: NotNan::new(0.001).expect("not nan"),
+            iterations: 1000,
+        };
+
+        let iterated = {
+            let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
+            let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
+            iterate(
+                &|theta| {
+                    gradient_descent_step(
+                        &|x| {
+                            Differentiable::of_vector(vec![of_scalar(l2_loss_2(
+                                predict_quadratic,
+                                of_slice(&xs),
+                                of_slice(&ys),
+                                x,
+                            ))])
+                        },
+                        theta,
+                        &hyper,
+                    )
+                },
+                of_slice(&[
+                    NotNan::<f64>::zero(),
+                    NotNan::<f64>::zero(),
+                    NotNan::<f64>::zero(),
+                ]),
+                hyper.iterations,
+            )
+        };
+        let iterated = Differentiable::to_vector(iterated)
+            .into_iter()
+            .map(|x| to_scalar(x).real_part().into_inner())
+            .collect::<Vec<_>>();
+
+        println!("{:?}", iterated);
+
+        assert_eq!(
+            iterated,
+            [2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
+        );
+    }
 }
--- a/little_learner_app/src/with_tensor.rs
+++ b/little_learner_app/src/with_tensor.rs
@@ -1,3 +1,5 @@
+#![allow(dead_code)]
+
 use std::iter::Sum;
 use std::ops::{Mul, Sub};