From 64d98757f4d60dab8ee9e25670057e3cbbf48e52 Mon Sep 17 00:00:00 2001 From: Patrick Stevens Date: Sun, 30 Apr 2023 13:09:16 +0100 Subject: [PATCH] Make Scalar numlike (#15) --- little_learner/src/auto_diff.rs | 50 +++++++++++ little_learner/src/lib.rs | 1 + little_learner/src/loss.rs | 34 +++++-- little_learner/src/not_nan.rs | 15 ++++ little_learner/src/scalar.rs | 101 ++++++++++++++++++++- little_learner/src/traits.rs | 7 +- little_learner_app/src/main.rs | 125 +++----------------------- little_learner_app/src/with_tensor.rs | 8 ++ 8 files changed, 218 insertions(+), 123 deletions(-) create mode 100644 little_learner/src/not_nan.rs diff --git a/little_learner/src/auto_diff.rs b/little_learner/src/auto_diff.rs index 1835286..dbe0515 100644 --- a/little_learner/src/auto_diff.rs +++ b/little_learner/src/auto_diff.rs @@ -478,6 +478,7 @@ mod tests { use ordered_float::NotNan; use crate::loss::{l2_loss_2, predict_line_2_unranked}; + use crate::not_nan::to_not_nan_1; use super::*; @@ -539,4 +540,53 @@ mod tests { .map(|x| f64::from(*x.real_part())); assert_eq!(grad_vec, [-63.0, -21.0]); } + + #[test] + fn grad_example() { + let input_vec = [Differentiable::of_scalar(Scalar::make( + NotNan::new(27.0).expect("not nan"), + ))]; + + let grad: Vec<_> = grad( + |x| { + RankedDifferentiable::of_scalar( + x[0].borrow_scalar().clone() * x[0].borrow_scalar().clone(), + ) + }, + &input_vec, + ) + .into_iter() + .map(|x| x.into_scalar().real_part().into_inner()) + .collect(); + assert_eq!(grad, [54.0]); + } + + #[test] + fn loss_gradient() { + let zero = Scalar::>::zero(); + let input_vec = [ + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero).to_unranked(), + ]; + let xs = to_not_nan_1([2.0, 1.0, 4.0, 3.0]); + let ys = to_not_nan_1([1.8, 1.2, 4.2, 3.3]); + let grad = grad( + |x| { + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2( + predict_line_2_unranked, + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), + x, + ))]) + }, + &input_vec, + ); + + assert_eq!( + grad.into_iter() + .map(|x| *(x.into_scalar().real_part())) + .collect::>(), + [-63.0, -21.0] + ); + } } diff --git a/little_learner/src/lib.rs b/little_learner/src/lib.rs index 3bfa7ab..85e9617 100644 --- a/little_learner/src/lib.rs +++ b/little_learner/src/lib.rs @@ -6,6 +6,7 @@ pub mod auto_diff; pub mod const_teq; pub mod expr_syntax_tree; pub mod loss; +pub mod not_nan; pub mod scalar; pub mod tensor; pub mod traits; diff --git a/little_learner/src/loss.rs b/little_learner/src/loss.rs index 6096ef6..534ed33 100644 --- a/little_learner/src/loss.rs +++ b/little_learner/src/loss.rs @@ -232,8 +232,7 @@ type ParameterPredictor = &[Differentiable; THETA], ) -> RankedDifferentiable; -pub const fn plane_predictor( -) -> Predictor, [Differentiable; 2], [Differentiable; 2]> +pub const fn plane_predictor() -> Predictor, Scalar, Scalar> where T: NumLike + Default, { @@ -245,9 +244,9 @@ where } pub const fn line_unranked_predictor( -) -> Predictor, [Differentiable; 2], [Differentiable; 2]> +) -> Predictor, Scalar, Scalar> where - T: NumLike + Default, + T: NumLike + Default + Copy, { Predictor { predict: predict_line_2_unranked, @@ -257,7 +256,7 @@ where } pub const fn quadratic_unranked_predictor( -) -> Predictor, [Differentiable; 3], [Differentiable; 3]> +) -> Predictor, Scalar, Scalar> where T: NumLike + Default, { @@ -267,3 +266,28 @@ where deflate: |x| x, } } + +#[cfg(test)] +mod test_loss { + use crate::auto_diff::RankedDifferentiable; + use crate::loss::{l2_loss_2, predict_line_2}; + use crate::scalar::Scalar; + use crate::traits::Zero; + + #[test] + fn loss_example() { + let xs = [2.0, 1.0, 4.0, 3.0]; + let ys = [1.8, 1.2, 4.2, 3.3]; + let loss = l2_loss_2( + predict_line_2, + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), + &[ + RankedDifferentiable::of_scalar(Scalar::zero()), + RankedDifferentiable::of_scalar(Scalar::zero()), + ], + ); + + assert_eq!(*loss.real_part(), 33.21); + } +} diff --git a/little_learner/src/not_nan.rs b/little_learner/src/not_nan.rs new file mode 100644 index 0000000..02b698b --- /dev/null +++ b/little_learner/src/not_nan.rs @@ -0,0 +1,15 @@ +use ordered_float::NotNan; + +pub fn to_not_nan_1(xs: [T; N]) -> [NotNan; N] +where + T: ordered_float::Float, +{ + xs.map(|x| NotNan::new(x).expect("not nan")) +} + +pub fn to_not_nan_2(xs: [[T; N]; M]) -> [[NotNan; N]; M] +where + T: ordered_float::Float, +{ + xs.map(to_not_nan_1) +} diff --git a/little_learner/src/scalar.rs b/little_learner/src/scalar.rs index 668fa87..2312678 100644 --- a/little_learner/src/scalar.rs +++ b/little_learner/src/scalar.rs @@ -14,6 +14,7 @@ pub enum LinkData { Mul(Box>, Box>), Exponent(Box>), Log(Box>), + Div(Box>, Box>), } #[derive(Clone, Hash, PartialEq, Eq, Debug)] @@ -41,6 +42,9 @@ where f.write_fmt(format_args!("exp({})", arg.as_ref())) } Link::Link(LinkData::Log(arg)) => f.write_fmt(format_args!("log({})", arg.as_ref())), + Link::Link(LinkData::Div(left, right)) => { + f.write_fmt(format_args!("({} / {})", left.as_ref(), right.as_ref())) + } } } } @@ -96,6 +100,21 @@ impl Link { .clone_link() .invoke(&right, left.clone_real_part() * z, acc); } + LinkData::Div(left, right) => { + // d/dx(f / g) = f d(1/g)/dx + (df/dx) / g + // = -f (dg/dx)/g^2 + (df/dx) / g + left.as_ref().clone_link().invoke( + &left, + z.clone() / right.clone_real_part(), + acc, + ); + right.as_ref().clone_link().invoke( + &right, + -left.clone_real_part() * z + / (right.clone_real_part() * right.clone_real_part()), + acc, + ) + } LinkData::Log(arg) => { // d/dx(log y) = 1/y dy/dx arg.as_ref().clone_link().invoke( @@ -144,6 +163,15 @@ where } } +impl AddAssign for Scalar +where + A: Add + Clone, +{ + fn add_assign(&mut self, rhs: Self) { + *self = self.clone() + rhs + } +} + impl Neg for Scalar where A: Neg + Clone, @@ -190,12 +218,47 @@ where fn sum>(iter: I) -> Self { let mut answer = Zero::zero(); for i in iter { - answer = answer + i; + answer += i; } answer } } +impl Exp for Scalar +where + A: Exp + Clone, +{ + fn exp(self) -> Self { + Self::Dual( + self.clone_real_part().exp(), + Link::Link(LinkData::Exponent(Box::new(self))), + ) + } +} + +impl Div for Scalar +where + A: Div + Clone, +{ + type Output = Scalar; + + fn div(self, rhs: Self) -> Self::Output { + Self::Dual( + self.clone_real_part() / rhs.clone_real_part(), + Link::Link(LinkData::Div(Box::new(self), Box::new(rhs))), + ) + } +} + +impl Default for Scalar +where + A: Default, +{ + fn default() -> Self { + Scalar::Number(A::default(), None) + } +} + impl Scalar { pub fn real_part(&self) -> &A { match self { @@ -255,3 +318,39 @@ where } } } + +#[cfg(test)] +mod test_loss { + use crate::scalar::Scalar; + use ordered_float::NotNan; + use std::collections::HashMap; + + #[test] + fn div_gradient() { + let left = Scalar::make(NotNan::new(3.0).expect("not nan")); + let right = Scalar::make(NotNan::new(5.0).expect("not nan")); + let divided = left / right; + assert_eq!(divided.clone_real_part().into_inner(), 3.0 / 5.0); + let mut acc = HashMap::new(); + divided + .clone_link() + .invoke(÷d, NotNan::new(1.0).expect("not nan"), &mut acc); + + // Derivative of x/5 with respect to x is the constant 1/5 + // Derivative of 3/x with respect to x is -3/x^2, so at the value 5 is -3/25 + assert_eq!(acc.len(), 2); + for (key, value) in acc { + let key = key.real_part().into_inner(); + let value = value.into_inner(); + if key < 4.0 { + // This is the numerator. + assert_eq!(key, 3.0); + assert_eq!(value, 1.0 / 5.0); + } else { + // This is the denominator. + assert_eq!(key, 5.0); + assert_eq!(value, -3.0 / 25.0); + } + } + } +} diff --git a/little_learner/src/traits.rs b/little_learner/src/traits.rs index 4225258..fecebda 100644 --- a/little_learner/src/traits.rs +++ b/little_learner/src/traits.rs @@ -1,3 +1,4 @@ +use crate::scalar::Scalar; use ordered_float::NotNan; use std::iter::Sum; use std::ops::{Add, AddAssign, Div, Mul, Neg}; @@ -54,11 +55,13 @@ pub trait NumLike: + Mul + Div + Sum - + Default + Clone - + Copy + Sized + + PartialEq + + Eq { } impl NumLike for NotNan {} + +impl NumLike for Scalar where A: NumLike {} diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs index efd352c..84a8038 100644 --- a/little_learner_app/src/main.rs +++ b/little_learner_app/src/main.rs @@ -11,6 +11,7 @@ use little_learner::auto_diff::{grad, Differentiable, RankedDifferentiable}; use crate::sample::sample2; use little_learner::loss::{l2_loss_2, plane_predictor, Predictor}; +use little_learner::not_nan::{to_not_nan_1, to_not_nan_2}; use little_learner::scalar::Scalar; use little_learner::traits::{NumLike, Zero}; use ordered_float::NotNan; @@ -47,7 +48,7 @@ where let delta = &delta[i]; i += 1; // For speed, you might want to truncate_dual this. - let learning_rate = Scalar::make(learning_rate); + let learning_rate = Scalar::make(learning_rate.clone()); Differentiable::map2( &theta, &delta.map(&mut |s| s * learning_rate.clone()), @@ -56,37 +57,27 @@ where }) } -fn gradient_descent< - 'a, - T, - R: Rng, - Point, - F, - G, - const IN_SIZE: usize, - const PARAM_NUM: usize, - const INFLATED_NUM: usize, ->( +fn gradient_descent<'a, T, R: Rng, Point, F, G, const IN_SIZE: usize, const PARAM_NUM: usize>( mut hyper: GradientDescentHyper, xs: &'a [Point], to_ranked_differentiable: G, ys: &[T], zero_params: [Differentiable; PARAM_NUM], - predictor: Predictor; INFLATED_NUM], [Differentiable; PARAM_NUM]>, + mut predictor: Predictor, Scalar>, ) -> [Differentiable; PARAM_NUM] where - T: NumLike + Eq + Hash, + T: NumLike + Hash + Copy + Default, Point: 'a + Copy, F: Fn( RankedDifferentiable, - &[Differentiable; INFLATED_NUM], + &[Differentiable; PARAM_NUM], ) -> RankedDifferentiable, G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable, { let iterations = hyper.iterations; iterate( |theta| { - let out = gradient_descent_step::( + let out = gradient_descent_step::( &mut |x| match hyper.sampling.as_mut() { None => RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( l2_loss_2( @@ -108,30 +99,16 @@ where )]) } }, - (predictor.inflate)(theta), + theta.map(|x| x.map(&mut predictor.inflate)), hyper.learning_rate, ); - (predictor.deflate)(out) + out.map(|x| x.map(&mut predictor.deflate)) }, zero_params, iterations, ) } -fn to_not_nan_1(xs: [T; N]) -> [NotNan; N] -where - T: ordered_float::Float, -{ - xs.map(|x| NotNan::new(x).expect("not nan")) -} - -fn to_not_nan_2(xs: [[T; N]; M]) -> [[NotNan; N]; M] -where - T: ordered_float::Float, -{ - xs.map(to_not_nan_1) -} - fn collect_vec(input: RankedDifferentiable, 1>) -> Vec where T: Copy, @@ -194,91 +171,9 @@ fn main() { #[cfg(test)] mod tests { use super::*; - use little_learner::{ - auto_diff::grad, - loss::{ - l2_loss_2, line_unranked_predictor, predict_line_2, predict_line_2_unranked, - quadratic_unranked_predictor, - }, - }; + use little_learner::loss::{line_unranked_predictor, quadratic_unranked_predictor}; use rand::SeedableRng; - use crate::with_tensor::{l2_loss, predict_line}; - - #[test] - fn loss_example() { - let xs = [2.0, 1.0, 4.0, 3.0]; - let ys = [1.8, 1.2, 4.2, 3.3]; - let loss = l2_loss_2( - predict_line_2, - RankedDifferentiable::of_slice(&xs), - RankedDifferentiable::of_slice(&ys), - &[ - RankedDifferentiable::of_scalar(Scalar::zero()), - RankedDifferentiable::of_scalar(Scalar::zero()), - ], - ); - - assert_eq!(*loss.real_part(), 33.21); - } - - #[test] - fn l2_loss_non_autodiff_example() { - let xs = [2.0, 1.0, 4.0, 3.0]; - let ys = [1.8, 1.2, 4.2, 3.3]; - let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]); - assert_eq!(loss, 32.5892403); - } - - #[test] - fn grad_example() { - let input_vec = [Differentiable::of_scalar(Scalar::make( - NotNan::new(27.0).expect("not nan"), - ))]; - - let grad: Vec<_> = grad( - |x| { - RankedDifferentiable::of_scalar( - x[0].borrow_scalar().clone() * x[0].borrow_scalar().clone(), - ) - }, - &input_vec, - ) - .into_iter() - .map(|x| x.into_scalar().real_part().into_inner()) - .collect(); - assert_eq!(grad, [54.0]); - } - - #[test] - fn loss_gradient() { - let zero = Scalar::>::zero(); - let input_vec = [ - RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), - RankedDifferentiable::of_scalar(zero).to_unranked(), - ]; - let xs = to_not_nan_1([2.0, 1.0, 4.0, 3.0]); - let ys = to_not_nan_1([1.8, 1.2, 4.2, 3.3]); - let grad = grad( - |x| { - RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2( - predict_line_2_unranked, - RankedDifferentiable::of_slice(&xs), - RankedDifferentiable::of_slice(&ys), - x, - ))]) - }, - &input_vec, - ); - - assert_eq!( - grad.into_iter() - .map(|x| *(x.into_scalar().real_part())) - .collect::>(), - [-63.0, -21.0] - ); - } - #[test] fn test_iterate() { let f = |t: [i32; 3]| t.map(|i| i - 3); diff --git a/little_learner_app/src/with_tensor.rs b/little_learner_app/src/with_tensor.rs index c5760ef..cc78083 100644 --- a/little_learner_app/src/with_tensor.rs +++ b/little_learner_app/src/with_tensor.rs @@ -125,4 +125,12 @@ mod tests { ); assert_eq!((100.0 * loss).round() / 100.0, 32.59); } + + #[test] + fn l2_loss_non_autodiff_example() { + let xs = [2.0, 1.0, 4.0, 3.0]; + let ys = [1.8, 1.2, 4.2, 3.3]; + let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]); + assert_eq!(loss, 32.5892403); + } }