From 1ea0383710bdbe79bab7f94ac733d22ec3dddc8a Mon Sep 17 00:00:00 2001 From: Patrick Stevens Date: Mon, 3 Apr 2023 11:36:31 +0100 Subject: [PATCH] Optimise my first function (#8) --- Cargo.lock | 1 + little_learner/src/auto_diff.rs | 10 +- little_learner/src/scalar.rs | 4 + little_learner_app/Cargo.toml | 3 +- little_learner_app/src/main.rs | 191 +++++++++++++++++++++++++++----- 5 files changed, 177 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c30cc64..1e333c7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,6 +55,7 @@ dependencies = [ name = "little_learner_app" version = "0.1.0" dependencies = [ + "arrayvec", "immutable-chunkmap", "little_learner", "ordered-float", diff --git a/little_learner/src/auto_diff.rs b/little_learner/src/auto_diff.rs index de60ff8..00d649f 100644 --- a/little_learner/src/auto_diff.rs +++ b/little_learner/src/auto_diff.rs @@ -132,8 +132,7 @@ where + Div + Zero + One - + Neg - + Display, + + Neg, { fn accumulate_gradients_vec(v: &[DifferentiableHidden], acc: &mut HashMap, A>) { for v in v.iter().rev() { @@ -242,7 +241,7 @@ impl Differentiable { } } - pub fn grad(f: F, theta: Differentiable) -> Differentiable + pub fn grad(f: F, theta: &Differentiable) -> Differentiable where F: Fn(Differentiable) -> Differentiable, A: Clone @@ -254,8 +253,7 @@ impl Differentiable { + Zero + One + Neg - + Eq - + std::fmt::Display, + + Eq, { let mut i = 0usize; let wrt = theta.contents.map(&mut |x| { @@ -332,7 +330,7 @@ mod tests { x, ))]) }, - input_vec, + &input_vec, ); let grad_vec: Vec = Differentiable::to_vector(grad) diff --git a/little_learner/src/scalar.rs b/little_learner/src/scalar.rs index e20c2ed..719e4e7 100644 --- a/little_learner/src/scalar.rs +++ b/little_learner/src/scalar.rs @@ -237,6 +237,10 @@ impl Scalar { { Scalar::Dual(self.clone_real_part(), Link::EndOfLink(Some(index))) } + + pub fn make(x: A) -> Scalar { + Scalar::Number(x, None) + } } impl Display for Scalar diff --git a/little_learner_app/Cargo.toml b/little_learner_app/Cargo.toml index 2324893..721f1e5 100644 --- a/little_learner_app/Cargo.toml +++ b/little_learner_app/Cargo.toml @@ -8,4 +8,5 @@ edition = "2021" [dependencies] immutable-chunkmap = "1.0.5" ordered-float = "3.6.0" -little_learner = { path = "../little_learner" } \ No newline at end of file +little_learner = { path = "../little_learner" } +arrayvec = "0.7.2" diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs index be1d840..0d4b1a0 100644 --- a/little_learner_app/src/main.rs +++ b/little_learner_app/src/main.rs @@ -3,10 +3,14 @@ mod with_tensor; -use little_learner::auto_diff::{of_scalar, of_slice, Differentiable}; +use core::hash::Hash; +use std::ops::{Add, AddAssign, Div, Mul, Neg}; + +use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable}; use little_learner::loss::{l2_loss_2, predict_line_2, square}; -use little_learner::traits::Zero; +use little_learner::scalar::Scalar; +use little_learner::traits::{Exp, One, Zero}; use ordered_float::NotNan; use crate::with_tensor::{l2_loss, predict_line}; @@ -19,37 +23,174 @@ fn l2_loss_non_autodiff_example() { println!("{:?}", loss); } +fn iterate(f: &F, start: A, n: u32) -> A +where + F: Fn(A) -> A, +{ + if n == 0 { + return start; + } + iterate(f, f(start), n - 1) +} + +fn gradient_descent_step( + f: &F, + learning_rate: A, + theta: Differentiable, +) -> Differentiable +where + A: Clone + + Mul + + Neg + + Add + + Hash + + AddAssign + + Div + + Zero + + One + + Eq + + Exp, + F: Fn(Differentiable) -> Differentiable, +{ + let delta = Differentiable::grad(f, &theta); + Differentiable::map2(&theta, &delta, &|theta, delta| { + (*theta).clone() - (Scalar::make(learning_rate.clone()) * (*delta).clone()) + }) +} + fn main() { let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]); - let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), input_vec); + let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), &input_vec); println!("Gradient of the x^2 function at x=27: {}", grad); let xs = [2.0, 1.0, 4.0, 3.0]; let ys = [1.8, 1.2, 4.2, 3.3]; - let loss = l2_loss_2( - predict_line_2, - of_slice(&xs), - of_slice(&ys), - of_slice(&[0.0, 0.0]), - ); - println!("Computation of L2 loss: {}", loss); + let alpha = NotNan::new(0.01).expect("not nan"); + let iterated = { + let xs = xs.map(|x| NotNan::new(x).expect("not nan")); + let ys = ys.map(|x| NotNan::new(x).expect("not nan")); + iterate( + &|theta| { + gradient_descent_step( + &|x| { + Differentiable::of_vector(vec![of_scalar(l2_loss_2( + predict_line_2, + of_slice(&xs), + of_slice(&ys), + x, + ))]) + }, + alpha, + theta, + ) + }, + of_slice(&[NotNan::::zero(), NotNan::::zero()]), + 1000, + ) + }; - let input_vec = of_slice(&[NotNan::::zero(), NotNan::::zero()]); - let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan")); - let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan")); - let grad = Differentiable::grad( - |x| { - Differentiable::of_vector(vec![of_scalar(l2_loss_2( - predict_line_2, - of_slice(&xs), - of_slice(&ys), - x, - ))]) - }, - input_vec, + println!( + "After iteration: {:?}", + Differentiable::to_vector(iterated) + .into_iter() + .map(|x| to_scalar(x).real_part().into_inner()) + .collect::>() ); - - println!("{}", grad); +} + +#[cfg(test)] +mod tests { + use super::*; + use arrayvec::ArrayVec; + use little_learner::auto_diff::to_scalar; + + #[test] + fn loss_example() { + let xs = [2.0, 1.0, 4.0, 3.0]; + let ys = [1.8, 1.2, 4.2, 3.3]; + let loss = l2_loss_2( + predict_line_2, + of_slice(&xs), + of_slice(&ys), + of_slice(&[0.0, 0.0]), + ); + + assert_eq!(*loss.real_part(), 33.21); + } + + #[test] + fn loss_gradient() { + let input_vec = of_slice(&[NotNan::::zero(), NotNan::::zero()]); + let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan")); + let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan")); + let grad = Differentiable::grad( + |x| { + Differentiable::of_vector(vec![of_scalar(l2_loss_2( + predict_line_2, + of_slice(&xs), + of_slice(&ys), + x, + ))]) + }, + &input_vec, + ); + + assert_eq!( + Differentiable::to_vector(grad) + .into_iter() + .map(|x| *(to_scalar(x).real_part())) + .collect::>(), + [-63.0, -21.0] + ); + } + + #[test] + fn test_iterate() { + let f = |t: [i32; 3]| { + let mut vec = ArrayVec::::new(); + for i in t { + vec.push(i - 3); + } + vec.into_inner().unwrap() + }; + assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]); + } + + #[test] + fn first_optimisation_test() { + let xs = [2.0, 1.0, 4.0, 3.0]; + let ys = [1.8, 1.2, 4.2, 3.3]; + + let alpha = NotNan::new(0.01).expect("not nan"); + let iterated = { + let xs = xs.map(|x| NotNan::new(x).expect("not nan")); + let ys = ys.map(|x| NotNan::new(x).expect("not nan")); + iterate( + &|theta| { + gradient_descent_step( + &|x| { + Differentiable::of_vector(vec![of_scalar(l2_loss_2( + predict_line_2, + of_slice(&xs), + of_slice(&ys), + x, + ))]) + }, + alpha, + theta, + ) + }, + of_slice(&[NotNan::::zero(), NotNan::::zero()]), + 1000, + ) + }; + let iterated = Differentiable::to_vector(iterated) + .into_iter() + .map(|x| to_scalar(x).real_part().into_inner()) + .collect::>(); + + assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]); + } }