From deb0ec67cafe852ed28654bb547fb7bf82980d5b Mon Sep 17 00:00:00 2001 From: Patrick Stevens Date: Sun, 7 May 2023 23:57:58 +0100 Subject: [PATCH] Add docs, delete old code, truncate scalars where possible (#21) --- little_learner/src/auto_diff.rs | 49 ++++--- little_learner/src/const_teq.rs | 13 -- little_learner/src/expr_syntax_tree.rs | 155 -------------------- little_learner/src/gradient_descent.rs | 66 ++++----- little_learner/src/hyper.rs | 190 +++++++++++++------------ little_learner/src/lib.rs | 4 +- little_learner/src/loss.rs | 115 ++------------- little_learner/src/predictor.rs | 111 +++++++++++++++ little_learner/src/sample.rs | 1 + little_learner/src/scalar.rs | 20 +-- little_learner/src/smooth.rs | 42 +++--- little_learner/src/tensor.rs | 107 -------------- little_learner/src/traits.rs | 4 + little_learner_app/src/main.rs | 55 +++---- little_learner_app/src/with_tensor.rs | 136 ------------------ 15 files changed, 349 insertions(+), 719 deletions(-) delete mode 100644 little_learner/src/const_teq.rs delete mode 100644 little_learner/src/expr_syntax_tree.rs create mode 100644 little_learner/src/predictor.rs delete mode 100644 little_learner/src/tensor.rs delete mode 100644 little_learner_app/src/with_tensor.rs diff --git a/little_learner/src/auto_diff.rs b/little_learner/src/auto_diff.rs index fdb133c..e32511f 100644 --- a/little_learner/src/auto_diff.rs +++ b/little_learner/src/auto_diff.rs @@ -84,11 +84,11 @@ where { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - DifferentiableContents::Scalar(s, _) => f.write_fmt(format_args!("{}", s)), + DifferentiableContents::Scalar(s, _) => f.write_fmt(format_args!("{s}")), DifferentiableContents::Vector(v, _rank) => { f.write_char('[')?; for v in v.iter() { - f.write_fmt(format_args!("{}", v))?; + f.write_fmt(format_args!("{v}"))?; f.write_char(',')?; } f.write_char(']') @@ -159,6 +159,12 @@ impl DifferentiableContents { } } + /// This function does *not* check that its inputs are of exactly the same shape, though it + /// does check ranks. If you have two vectors of different lengths, you will silently get the + /// shorter one. + /// + /// # Panics + /// Panics if the two inputs have different shapes (e.g. if they have different ranks). fn map2( &self, other: &DifferentiableContents, @@ -180,9 +186,7 @@ impl DifferentiableContents { DifferentiableContents::Vector(slice_a, rank_a), DifferentiableContents::Vector(slice_b, rank_b), ) => { - if rank_a != rank_b { - panic!("Unexpectedly different ranks in map2"); - } + assert_eq!(rank_a, rank_b, "Unexpectedly different ranks in map2"); DifferentiableContents::Vector( slice_a .iter() @@ -367,10 +371,11 @@ impl DifferentiableTagged { } } + /// # Panics + /// Panics if the input is empty (otherwise we can't determine a rank). + #[must_use] pub fn of_vec(input: Vec>) -> DifferentiableTagged { - if input.is_empty() { - panic!("Can't make an empty tensor"); - } + assert!(!input.is_empty(), "Can't make an empty tensor"); let rank = input[0].rank(); DifferentiableTagged { contents: DifferentiableContents::Vector(input, 1 + rank), @@ -413,7 +418,7 @@ where k.invoke(y, A::one(), acc); } DifferentiableContents::Vector(y, _rank) => { - DifferentiableContents::accumulate_gradients_vec(y, acc) + DifferentiableContents::accumulate_gradients_vec(y, acc); } } } @@ -543,6 +548,7 @@ impl RankedDifferentiableTagged { &self.contents } + #[must_use] pub fn of_vector( s: Vec>, ) -> RankedDifferentiableTagged { @@ -683,19 +689,16 @@ mod tests { #[test] fn test_map() { - let v = DifferentiableTagged::of_vec( - vec![ - Differentiable::of_scalar(Scalar::Number( - NotNan::new(3.0).expect("3 is not NaN"), - Some(0usize), - )), - DifferentiableTagged::of_scalar(Scalar::Number( - NotNan::new(4.0).expect("4 is not NaN"), - Some(1usize), - )), - ] - .into(), - ); + let v = DifferentiableTagged::of_vec(vec![ + Differentiable::of_scalar(Scalar::Number( + NotNan::new(3.0).expect("3 is not NaN"), + Some(0usize), + )), + DifferentiableTagged::of_scalar(Scalar::Number( + NotNan::new(4.0).expect("4 is not NaN"), + Some(1usize), + )), + ]); let mapped = v.map(&mut |x: Scalar>| match x { Scalar::Number(i, n) => Scalar::Number(i + NotNan::new(1.0).expect("1 is not NaN"), n), Scalar::Dual(_, _) => panic!("Not hit"), @@ -704,7 +707,7 @@ mod tests { let v = mapped .into_vector() .iter() - .map(|d| extract_scalar(d).clone()) + .map(|d| *extract_scalar(d)) .collect::>(); assert_eq!(v, [4.0, 5.0]); diff --git a/little_learner/src/const_teq.rs b/little_learner/src/const_teq.rs deleted file mode 100644 index 6952ee2..0000000 --- a/little_learner/src/const_teq.rs +++ /dev/null @@ -1,13 +0,0 @@ -use std::marker::PhantomData; - -pub struct ConstTeq { - phantom_a: PhantomData<[(); A]>, - phantom_b: PhantomData<[(); B]>, -} - -pub fn make() -> ConstTeq { - ConstTeq { - phantom_a: Default::default(), - phantom_b: Default::default(), - } -} diff --git a/little_learner/src/expr_syntax_tree.rs b/little_learner/src/expr_syntax_tree.rs deleted file mode 100644 index 435af58..0000000 --- a/little_learner/src/expr_syntax_tree.rs +++ /dev/null @@ -1,155 +0,0 @@ -use immutable_chunkmap::map; -use std::ops::{Add, Mul}; - -/* -An untyped syntax tree for an expression whose constants are all of type `A`. -*/ -#[derive(Clone, Debug)] -pub enum Expr { - Const(A), - Sum(Box>, Box>), - Variable(u32), - // The first `Expr` here is a function, which may reference the input variable `Variable(i)`. - // For example, `(fun x y -> x + y) 3 4` is expressed as: - // Apply(0, Apply(1, Sum(Variable(0), Variable(1)), Const(4)), Const(3)) - Apply(u32, Box>, Box>), - Mul(Box>, Box>), -} - -impl Expr { - fn eval_inner(e: &Expr, ctx: &map::Map) -> A - where - A: Clone + Add + Mul, - { - match &e { - Expr::Const(x) => x.clone(), - Expr::Sum(x, y) => Expr::eval_inner(x, ctx) + Expr::eval_inner(y, ctx), - Expr::Variable(id) => ctx - .get(id) - .unwrap_or_else(|| panic!("No binding found for free variable {}", id)) - .clone(), - Expr::Apply(variable, func, arg) => { - let arg = Expr::eval_inner(arg, ctx); - let (updated_context, _) = ctx.insert(*variable, arg); - Expr::eval_inner(func, &updated_context) - } - Expr::Mul(x, y) => Expr::eval_inner(x, ctx) * Expr::eval_inner(y, ctx), - } - } - - pub fn eval(e: &Expr) -> A - where - A: Clone + Add + Mul, - { - Expr::eval_inner(e, &map::Map::::new()) - } - - pub fn apply(var: u32, f: Expr, arg: Expr) -> Expr { - Expr::Apply(var, Box::new(f), Box::new(arg)) - } - - pub fn differentiate(one: &A, zero: &A, var: u32, f: &Expr) -> Expr - where - A: Clone, - { - match f { - Expr::Const(_) => Expr::Const(zero.clone()), - Expr::Sum(x, y) => { - Expr::differentiate(one, zero, var, x) + Expr::differentiate(one, zero, var, y) - } - Expr::Variable(i) => { - if *i == var { - Expr::Const(one.clone()) - } else { - Expr::Const(zero.clone()) - } - } - Expr::Mul(x, y) => { - Expr::Mul( - Box::new(Expr::differentiate(one, zero, var, x.as_ref())), - (*y).clone(), - ) + Expr::Mul( - Box::new(Expr::differentiate(one, zero, var, y.as_ref())), - (*x).clone(), - ) - } - Expr::Apply(new_var, func, expr) => { - if *new_var == var { - panic!( - "cannot differentiate with respect to variable {} that's been assigned", - var - ) - } - let expr_deriv = Expr::differentiate(one, zero, var, expr); - Expr::mul( - expr_deriv, - Expr::Apply( - *new_var, - Box::new(Expr::differentiate(one, zero, *new_var, func)), - (*expr).clone(), - ), - ) - } - } - } -} - -impl Add for Expr { - type Output = Expr; - fn add(self: Expr, y: Expr) -> Expr { - Expr::Sum(Box::new(self), Box::new(y)) - } -} - -impl Mul for Expr { - type Output = Expr; - fn mul(self: Expr, y: Expr) -> Expr { - Expr::Mul(Box::new(self), Box::new(y)) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_expr() { - let expr = Expr::apply( - 0, - Expr::apply(1, Expr::Variable(0) + Expr::Variable(1), Expr::Const(4)), - Expr::Const(3), - ); - - assert_eq!(Expr::eval::<2>(&expr), 7); - } - - #[test] - fn test_derivative() { - let add_four = Expr::Variable(0) + Expr::Const(4); - let mul_five = Expr::Variable(1) * Expr::Const(5); - - { - let mul_five_then_add_four = Expr::apply(0, add_four.clone(), mul_five.clone()); - let mul_then_add_diff = Expr::differentiate(&1, &0, 1, &mul_five_then_add_four); - for i in 3..10 { - // (5x + 4) differentiates to 5 - assert_eq!( - Expr::eval::<2>(&Expr::apply(1, mul_then_add_diff.clone(), Expr::Const(i))), - 5 - ); - } - } - - { - let add_four_then_mul_five = Expr::apply(1, mul_five.clone(), add_four.clone()); - let add_then_mul_diff = Expr::differentiate(&1, &0, 0, &add_four_then_mul_five); - for i in 3..10 { - // ((x + 4) * 5) differentiates to 5 - assert_eq!( - Expr::eval::<2>(&Expr::apply(0, add_then_mul_diff.clone(), Expr::Const(i))), - 5 - ); - } - } - } -} diff --git a/little_learner/src/gradient_descent.rs b/little_learner/src/gradient_descent.rs index f136352..0dc7a24 100644 --- a/little_learner/src/gradient_descent.rs +++ b/little_learner/src/gradient_descent.rs @@ -1,6 +1,7 @@ use crate::auto_diff::{grad, Differentiable, RankedDifferentiable}; -use crate::hyper::BaseGradientDescentHyper; -use crate::loss::{l2_loss_2, Predictor}; +use crate::hyper; +use crate::loss::l2_loss_2; +use crate::predictor::Predictor; use crate::sample::sample2; use crate::traits::NumLike; use rand::Rng; @@ -84,12 +85,12 @@ where G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable, Inflated: Clone, ImmutableHyper: Clone, - Hyper: Into>, + Hyper: Into>, H: FnOnce(&Hyper) -> ImmutableHyper, R: Rng, { let sub_hypers = to_immutable(&hyper); - let mut gradient_hyper: BaseGradientDescentHyper = hyper.into(); + let mut gradient_hyper: hyper::BaseGradientDescent = hyper.into(); let iterations = gradient_hyper.iterations; let out = iterate( |theta| { @@ -131,12 +132,10 @@ where mod tests { use super::*; use crate::auto_diff::RankedDifferentiableTagged; - use crate::hyper::{RmsGradientDescentHyper, VelocityGradientDescentHyper}; - use crate::loss::{ - naked_predictor, predict_line_2_unranked, predict_plane, predict_quadratic_unranked, - rms_predictor, velocity_predictor, - }; + use crate::hyper; + use crate::loss::{predict_line_2_unranked, predict_plane, predict_quadratic_unranked}; use crate::not_nan::{to_not_nan_1, to_not_nan_2}; + use crate::predictor; use crate::scalar::Scalar; use crate::traits::Zero; use ordered_float::NotNan; @@ -156,7 +155,7 @@ mod tests { let zero = Scalar::>::zero(); - let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000); + let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.01).expect("not nan"), 1000); let iterated = { let xs = to_not_nan_1(xs); let ys = to_not_nan_1(ys); @@ -170,8 +169,8 @@ mod tests { |b| RankedDifferentiable::of_slice(b), &ys, zero_params, - naked_predictor(predict_line_2_unranked), - BaseGradientDescentHyper::to_immutable, + predictor::naked(predict_line_2_unranked), + hyper::NakedGradientDescent::to_immutable, ) }; let iterated = iterated @@ -189,7 +188,7 @@ mod tests { let zero = Scalar::>::zero(); - let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); + let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000); let iterated = { let xs = to_not_nan_1(xs); @@ -205,8 +204,8 @@ mod tests { |b| RankedDifferentiable::of_slice(b), &ys, zero_params, - naked_predictor(predict_quadratic_unranked), - BaseGradientDescentHyper::to_immutable, + predictor::naked(predict_quadratic_unranked), + hyper::NakedGradientDescent::to_immutable, ) }; let iterated = iterated @@ -232,7 +231,7 @@ mod tests { #[test] fn optimise_plane() { - let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); + let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000); let iterated = { let xs = to_not_nan_2(PLANE_XS); @@ -247,8 +246,8 @@ mod tests { RankedDifferentiable::of_slice_2::<_, 2>, &ys, zero_params, - naked_predictor(predict_plane), - BaseGradientDescentHyper::to_immutable, + predictor::naked(predict_plane), + hyper::NakedGradientDescent::to_immutable, ) }; @@ -267,7 +266,7 @@ mod tests { #[test] fn optimise_plane_with_sampling() { let rng = StdRng::seed_from_u64(314159); - let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) + let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000) .with_rng(rng, 4); let iterated = { @@ -283,8 +282,8 @@ mod tests { RankedDifferentiable::of_slice_2::<_, 2>, &ys, zero_params, - naked_predictor(predict_plane), - BaseGradientDescentHyper::to_immutable, + predictor::naked(predict_plane), + hyper::NakedGradientDescent::to_immutable, ) }; @@ -322,14 +321,17 @@ mod tests { Show[points, withBatching] */ - assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]); - assert_eq!(theta1, 5.2839863438547159); + assert_eq!(theta0, [3.858_169_405_568_478, 2.2166222673968554]); + assert_eq!(theta1, 5.283_986_343_854_716); } #[test] fn test_with_velocity() { - let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) - .with_mu(NotNan::new(0.9).expect("not nan")); + let hyper = hyper::VelocityGradientDescent::zero_momentum( + NotNan::new(0.001).expect("not nan"), + 1000, + ) + .with_mu(NotNan::new(0.9).expect("not nan")); let iterated = { let xs = to_not_nan_2(PLANE_XS); @@ -346,8 +348,8 @@ mod tests { RankedDifferentiableTagged::of_slice_2::<_, 2>, &ys, zero_params, - velocity_predictor(predict_plane), - VelocityGradientDescentHyper::to_immutable, + predictor::velocity(predict_plane), + hyper::VelocityGradientDescent::to_immutable, ) }; @@ -367,7 +369,7 @@ mod tests { fn test_with_rms() { let beta = NotNan::new(0.9).expect("not nan"); let stabilizer = NotNan::new(0.00000001).expect("not nan"); - let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000) + let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.001).expect("not nan"), 3000) .with_stabilizer(stabilizer) .with_beta(beta); @@ -386,8 +388,8 @@ mod tests { RankedDifferentiableTagged::of_slice_2::<_, 2>, &ys, zero_params, - rms_predictor(predict_plane), - RmsGradientDescentHyper::to_immutable, + predictor::rms(predict_plane), + hyper::RmsGradientDescent::to_immutable, ) }; @@ -402,7 +404,7 @@ mod tests { .map(|x| x.into_inner()) .collect::>(); let fitted_theta1 = theta1.to_scalar().real_part().into_inner(); - assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]); - assert_eq!(fitted_theta1, 6.1642229831811681); + assert_eq!(fitted_theta0, [3.985_350_099_342_649, 1.9745945728216352]); + assert_eq!(fitted_theta1, 6.164_222_983_181_168); } } diff --git a/little_learner/src/hyper.rs b/little_learner/src/hyper.rs index 58b203d..862e424 100644 --- a/little_learner/src/hyper.rs +++ b/little_learner/src/hyper.rs @@ -1,117 +1,135 @@ -use crate::loss::{NakedHypers, RmsHyper, VelocityHypers}; +use crate::predictor::{NakedHypers, RmsHyper, VelocityHypers}; use crate::traits::{NumLike, Zero}; -use rand::{rngs::StdRng, Rng}; +use rand::rngs::StdRng; -pub struct BaseGradientDescentHyper { - pub sampling: Option<(R, usize)>, +/// Hyperparameters which apply to any possible optimisation algorithm that uses gradient descent. +pub struct BaseGradientDescent { + pub sampling: Option<(Rng, usize)>, pub iterations: u32, - params: NakedHypers, } -impl BaseGradientDescentHyper -where - A: NumLike + NumLike, -{ - #[allow(dead_code)] - pub fn naked(learning_rate: A, iterations: u32) -> Self { - BaseGradientDescentHyper { - params: NakedHypers { learning_rate }, - iterations, +impl BaseGradientDescent { + #[must_use] + pub fn new(iterations: u32) -> BaseGradientDescent { + BaseGradientDescent { sampling: None, + iterations, } } +} - #[allow(dead_code)] - pub fn with_rng(self, rng: S, size: usize) -> BaseGradientDescentHyper { - BaseGradientDescentHyper { - params: self.params, +impl BaseGradientDescent { + #[must_use] + pub fn with_rng(self, rng: Rng2, size: usize) -> BaseGradientDescent { + BaseGradientDescent { iterations: self.iterations, sampling: Some((rng, size)), } } - #[allow(dead_code)] + #[must_use] pub fn with_iterations(self, n: u32) -> Self { - BaseGradientDescentHyper { + BaseGradientDescent { sampling: self.sampling, iterations: n, - params: self.params, } } - - #[allow(dead_code)] - pub fn to_immutable(&self) -> NakedHypers { - self.params.clone() - } } -#[derive(Clone)] -pub struct VelocityGradientDescentHyper { - sampling: Option<(R, usize)>, - learning_rate: A, - iterations: u32, - mu: A, +pub struct NakedGradientDescent { + base: BaseGradientDescent, + naked: NakedHypers, } -impl VelocityGradientDescentHyper +impl NakedGradientDescent where A: Zero, { - #[allow(dead_code)] - pub fn naked(learning_rate: A, iterations: u32) -> Self { - VelocityGradientDescentHyper { - sampling: None, - learning_rate, - iterations, - mu: A::zero(), + #[must_use] + pub fn new(learning_rate: A, iterations: u32) -> Self { + NakedGradientDescent { + base: BaseGradientDescent::new(iterations), + naked: NakedHypers { learning_rate }, } } } -impl VelocityGradientDescentHyper { - #[allow(dead_code)] - pub fn with_mu(self, mu: A) -> Self { - VelocityGradientDescentHyper { - sampling: self.sampling, - mu, - learning_rate: self.learning_rate, - iterations: self.iterations, - } - } - - #[allow(dead_code)] - pub fn to_immutable(&self) -> VelocityHypers +impl NakedGradientDescent { + pub fn to_immutable(&self) -> NakedHypers where A: Clone, { - VelocityHypers { - mu: self.mu.clone(), - learning_rate: self.learning_rate.clone(), + self.naked.clone() + } + + #[must_use] + pub fn with_rng(self, rng: Rng2, size: usize) -> NakedGradientDescent { + NakedGradientDescent { + base: self.base.with_rng(rng, size), + naked: self.naked, } } } -impl From> for BaseGradientDescentHyper { - fn from(val: VelocityGradientDescentHyper) -> BaseGradientDescentHyper { - BaseGradientDescentHyper { - sampling: val.sampling, - iterations: val.iterations, - params: NakedHypers { - learning_rate: val.learning_rate, +impl From> for BaseGradientDescent { + fn from(val: NakedGradientDescent) -> BaseGradientDescent { + val.base + } +} + +pub struct VelocityGradientDescent { + base: BaseGradientDescent, + velocity: VelocityHypers, +} + +impl VelocityGradientDescent +where + A: Zero, +{ + #[must_use] + pub fn zero_momentum(learning_rate: A, iterations: u32) -> Self { + VelocityGradientDescent { + base: BaseGradientDescent::new(iterations), + velocity: VelocityHypers { + learning_rate, + mu: A::zero(), }, } } } -#[derive(Clone)] -pub struct RmsGradientDescentHyper { - sampling: Option<(R, usize)>, - iterations: u32, +impl VelocityGradientDescent { + #[must_use] + pub fn with_mu(self, mu: A) -> Self { + VelocityGradientDescent { + base: self.base, + velocity: VelocityHypers { + learning_rate: self.velocity.learning_rate, + mu, + }, + } + } + + pub fn to_immutable(&self) -> VelocityHypers + where + A: Clone, + { + self.velocity.clone() + } +} + +impl From> for BaseGradientDescent { + fn from(val: VelocityGradientDescent) -> BaseGradientDescent { + val.base + } +} + +pub struct RmsGradientDescent { + base: BaseGradientDescent, rms: RmsHyper, } -impl RmsGradientDescentHyper { - #[allow(dead_code)] +impl RmsGradientDescent { pub fn default(learning_rate: A, iterations: u32) -> Self where A: NumLike, @@ -122,9 +140,8 @@ impl RmsGradientDescentHyper { let one_hundredth = one_tenth.clone() * one_tenth; let one_ten_k = one_hundredth.clone() * one_hundredth; - RmsGradientDescentHyper { - sampling: None, - iterations, + RmsGradientDescent { + base: BaseGradientDescent::new(iterations), rms: RmsHyper { stabilizer: one_ten_k.clone() * one_ten_k, beta: A::one() + -(A::one() / ten), @@ -134,34 +151,31 @@ impl RmsGradientDescentHyper { } } -impl RmsGradientDescentHyper { - #[allow(dead_code)] +impl RmsGradientDescent { + #[must_use] pub fn with_stabilizer(self, stabilizer: A) -> Self { - RmsGradientDescentHyper { - sampling: self.sampling, + RmsGradientDescent { + base: self.base, rms: RmsHyper { stabilizer, beta: self.rms.beta, learning_rate: self.rms.learning_rate, }, - iterations: self.iterations, } } - #[allow(dead_code)] + #[must_use] pub fn with_beta(self, beta: A) -> Self { - RmsGradientDescentHyper { - sampling: self.sampling, + RmsGradientDescent { + base: self.base, rms: RmsHyper { stabilizer: self.rms.stabilizer, beta, learning_rate: self.rms.learning_rate, }, - iterations: self.iterations, } } - #[allow(dead_code)] pub fn to_immutable(&self) -> RmsHyper where A: Clone, @@ -170,14 +184,8 @@ impl RmsGradientDescentHyper { } } -impl From> for BaseGradientDescentHyper { - fn from(val: RmsGradientDescentHyper) -> BaseGradientDescentHyper { - BaseGradientDescentHyper { - sampling: val.sampling, - iterations: val.iterations, - params: NakedHypers { - learning_rate: val.rms.learning_rate, - }, - } +impl From> for BaseGradientDescent { + fn from(val: RmsGradientDescent) -> BaseGradientDescent { + val.base } } diff --git a/little_learner/src/lib.rs b/little_learner/src/lib.rs index c163e0d..e0aa15e 100644 --- a/little_learner/src/lib.rs +++ b/little_learner/src/lib.rs @@ -3,14 +3,12 @@ #![feature(array_methods)] pub mod auto_diff; -pub mod const_teq; -pub mod expr_syntax_tree; pub mod gradient_descent; pub mod hyper; pub mod loss; pub mod not_nan; +pub mod predictor; pub mod sample; pub mod scalar; pub mod smooth; -pub mod tensor; pub mod traits; diff --git a/little_learner/src/loss.rs b/little_learner/src/loss.rs index 22766e0..3e08e15 100644 --- a/little_learner/src/loss.rs +++ b/little_learner/src/loss.rs @@ -4,8 +4,6 @@ use std::{ }; use crate::auto_diff::Differentiable; -use crate::smooth::smooth; -use crate::traits::{NumLike, Sqrt}; use crate::{ auto_diff::{DifferentiableTagged, RankedDifferentiable}, scalar::Scalar, @@ -210,7 +208,10 @@ where }) } -// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate). +/// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate). +/// +/// # Panics +/// Panics if the input `theta` is not of rank 1 consisting of a tensor1 and a scalar. pub fn predict_plane( xs: RankedDifferentiable, theta: &[Differentiable; 2], @@ -218,9 +219,12 @@ pub fn predict_plane( where A: Mul + Add + Sum + Default + One + Zero + Clone, { - if theta[0].rank() != 1 { - panic!("theta0 must be of rank 1, got: {}", theta[0].rank()) - } + assert_eq!( + theta[0].rank(), + 1, + "theta0 must be of rank 1, got: {}", + theta[0].rank() + ); let theta0 = RankedDifferentiable::of_vector( theta[0] .borrow_vector() @@ -238,105 +242,6 @@ where RankedDifferentiable::of_vector(dotted) } -pub struct Predictor { - pub predict: F, - pub inflate: fn(Deflated) -> Inflated, - pub deflate: fn(Inflated) -> Deflated, - pub update: fn(Inflated, &Deflated, Params) -> Inflated, -} - -#[derive(Clone)] -pub struct NakedHypers { - pub learning_rate: A, -} - -pub const fn naked_predictor( - f: F, -) -> Predictor, Differentiable, NakedHypers> -where - A: NumLike, -{ - Predictor { - predict: f, - inflate: |x| x, - deflate: |x| x, - - update: |theta, delta, hyper| { - let learning_rate = Scalar::make(hyper.learning_rate); - Differentiable::map2(&theta, delta, &mut |theta, delta| { - theta.clone() - delta.clone() * learning_rate.clone() - }) - }, - } -} - -#[derive(Clone)] -pub struct RmsHyper { - pub stabilizer: A, - pub beta: A, - pub learning_rate: A, -} - -pub const fn rms_predictor( - f: F, -) -> Predictor, Differentiable, RmsHyper> -where - A: NumLike, -{ - Predictor { - predict: f, - inflate: |x| x.map_tag(&mut |()| A::zero()), - deflate: |x| x.map_tag(&mut |_| ()), - update: |theta, delta, hyper| { - DifferentiableTagged::map2_tagged( - &theta, - delta, - &mut |theta, smoothed_grad, delta, ()| { - let r = smooth( - Scalar::make(hyper.beta.clone()), - &Differentiable::of_scalar(Scalar::make(smoothed_grad)), - &Differentiable::of_scalar(delta.clone() * delta.clone()), - ) - .into_scalar(); - let learning_rate = Scalar::make(hyper.learning_rate.clone()) - / (r.sqrt() + Scalar::make(hyper.stabilizer.clone())); - ( - theta.clone() - + -(delta.clone() * Scalar::make(hyper.learning_rate.clone())), - learning_rate.clone_real_part(), - ) - }, - ) - }, - } -} - -#[derive(Clone)] -pub struct VelocityHypers { - pub learning_rate: A, - pub mu: A, -} - -pub const fn velocity_predictor( - f: F, -) -> Predictor, Differentiable, VelocityHypers> -where - A: NumLike, -{ - Predictor { - predict: f, - inflate: |x| x.map_tag(&mut |()| A::zero()), - deflate: |x| x.map_tag(&mut |_| ()), - update: |theta, delta, hyper| { - DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| { - let velocity = hyper.mu.clone() * velocity - + -(delta.clone_real_part() * hyper.learning_rate.clone()); - (theta.clone() + Scalar::make(velocity.clone()), velocity) - }) - }, - } -} - #[cfg(test)] mod test_loss { use crate::auto_diff::RankedDifferentiable; diff --git a/little_learner/src/predictor.rs b/little_learner/src/predictor.rs new file mode 100644 index 0000000..a16e4a1 --- /dev/null +++ b/little_learner/src/predictor.rs @@ -0,0 +1,111 @@ +use crate::auto_diff::{Differentiable, DifferentiableTagged}; +use crate::scalar::Scalar; +use crate::smooth::smooth; +use crate::traits::{NumLike, Sqrt}; + +/// A Predictor is a function (`predict`) we're optimising, an `inflate` which adds any metadata +/// that the prediction engine might require, a corresponding `deflate` which removes the metadata, +/// and an `update` which computes the next guess based on the previous guess. +pub struct Predictor { + /// The function we're trying to optimise. + pub predict: F, + /// Attach prediction metadata to an input to the function we're trying to optimise. + pub inflate: fn(Deflated) -> Inflated, + /// Remove prediction metadata. + pub deflate: fn(Inflated) -> Deflated, + /// Given a guess at an optimum, the gradient at that point, and any hyperparameters, + /// compute the next guess at the optimum. + pub update: fn(Inflated, &Deflated, Params) -> Inflated, +} + +/// Hyperparameters applying to the most basic way to calculate the next step. +#[derive(Clone)] +pub struct NakedHypers { + pub learning_rate: A, +} + +pub const fn naked(f: F) -> Predictor, Differentiable, NakedHypers> +where + A: NumLike, +{ + Predictor { + predict: f, + inflate: |x| x, + deflate: |x| x, + + update: |theta, delta, hyper| { + let learning_rate = Scalar::make(hyper.learning_rate); + Differentiable::map2(&theta, delta, &mut |theta, delta| { + (theta.clone() - delta.clone() * learning_rate.clone()).truncate_dual(None) + }) + }, + } +} + +#[derive(Clone)] +pub struct RmsHyper { + pub stabilizer: A, + pub beta: A, + pub learning_rate: A, +} + +pub const fn rms( + f: F, +) -> Predictor, Differentiable, RmsHyper> +where + A: NumLike, +{ + Predictor { + predict: f, + inflate: |x| x.map_tag(&mut |()| A::zero()), + deflate: |x| x.map_tag(&mut |_| ()), + update: |theta, delta, hyper| { + DifferentiableTagged::map2_tagged( + &theta, + delta, + &mut |theta, smoothed_grad, delta, ()| { + let r = smooth( + Scalar::make(hyper.beta.clone()), + &Differentiable::of_scalar(Scalar::make(smoothed_grad)), + &Differentiable::of_scalar(delta.clone() * delta.clone()), + ) + .into_scalar(); + let learning_rate = Scalar::make(hyper.learning_rate.clone()) + / (r.sqrt() + Scalar::make(hyper.stabilizer.clone())); + ( + (theta.clone() + + -(delta.clone() * Scalar::make(hyper.learning_rate.clone()))) + .truncate_dual(None), + learning_rate.clone_real_part(), + ) + }, + ) + }, + } +} + +#[derive(Clone)] +pub struct VelocityHypers { + pub learning_rate: A, + pub mu: A, +} + +pub const fn velocity( + f: F, +) -> Predictor, Differentiable, VelocityHypers> +where + A: NumLike, +{ + Predictor { + predict: f, + inflate: |x| x.map_tag(&mut |()| A::zero()), + deflate: |x| x.map_tag(&mut |_| ()), + update: |theta, delta, hyper| { + DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| { + let velocity = hyper.mu.clone() * velocity + + -(delta.clone_real_part() * hyper.learning_rate.clone()); + (theta.clone() + Scalar::make(velocity.clone()), velocity) + }) + }, + } +} diff --git a/little_learner/src/sample.rs b/little_learner/src/sample.rs index 6e953ef..de6b4eb 100644 --- a/little_learner/src/sample.rs +++ b/little_learner/src/sample.rs @@ -1,5 +1,6 @@ use rand::Rng; +/// Grab `n` random samples from `from_x` and `from_y`, collecting them into a vector. pub fn sample2(rng: &mut R, n: usize, from_x: I, from_y: J) -> (Vec, Vec) where T: Copy, diff --git a/little_learner/src/scalar.rs b/little_learner/src/scalar.rs index 5a38e88..ebe5936 100644 --- a/little_learner/src/scalar.rs +++ b/little_learner/src/scalar.rs @@ -117,7 +117,7 @@ impl Link { -left.clone_real_part() * z / (right.clone_real_part() * right.clone_real_part()), acc, - ) + ); } LinkData::Log(arg) => { // d/dx(log y) = 1/y dy/dx @@ -181,7 +181,7 @@ where A: Add + Clone, { fn add_assign(&mut self, rhs: Self) { - *self = self.clone() + rhs + *self = self.clone() + rhs; } } @@ -287,8 +287,7 @@ where impl Scalar { pub fn real_part(&self) -> &A { match self { - Scalar::Number(a, _) => a, - Scalar::Dual(a, _) => a, + Scalar::Number(a, _) | Scalar::Dual(a, _) => a, } } @@ -297,8 +296,7 @@ impl Scalar { A: Clone, { match self { - Scalar::Number(a, _) => (*a).clone(), - Scalar::Dual(a, _) => (*a).clone(), + Scalar::Number(a, _) | Scalar::Dual(a, _) => (*a).clone(), } } @@ -319,6 +317,7 @@ impl Scalar { } } + #[must_use] pub fn truncate_dual(self, index: Option) -> Scalar where A: Clone, @@ -326,6 +325,7 @@ impl Scalar { Scalar::Dual(self.clone_real_part(), Link::EndOfLink(index)) } + #[must_use] pub fn make(x: A) -> Scalar { Scalar::Number(x, None) } @@ -337,9 +337,9 @@ where { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{}_{}", n, index)), - Scalar::Number(n, None) => f.write_fmt(format_args!("{}", n)), - Scalar::Dual(n, link) => f.write_fmt(format_args!("<{}, link: {}>", n, link)), + Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{n}_{index}")), + Scalar::Number(n, None) => f.write_fmt(format_args!("{n}")), + Scalar::Dual(n, link) => f.write_fmt(format_args!("<{n}, link: {link}>")), } } } @@ -385,7 +385,7 @@ mod test_loss { fn sqrt_gradient() { let nine = Differentiable::of_scalar(Scalar::make(NotNan::new(9.0).expect("not nan"))); let graded: [Differentiable>; 1] = grad( - |x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().clone().sqrt()), + |x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().sqrt()), &[nine], ); let graded = graded.map(|x| x.into_scalar().clone_real_part().into_inner())[0]; diff --git a/little_learner/src/smooth.rs b/little_learner/src/smooth.rs index 2a1d22e..8d596b0 100644 --- a/little_learner/src/smooth.rs +++ b/little_learner/src/smooth.rs @@ -3,10 +3,12 @@ use crate::scalar::Scalar; use crate::traits::One; use std::ops::{Add, Mul, Neg}; +/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor +/// of `decay`. pub fn smooth_tagged( decay: Scalar, - current_avg: &DifferentiableTagged, - grad: &DifferentiableTagged, + old_value: &DifferentiableTagged, + new_value: &DifferentiableTagged, mut tags: F, ) -> DifferentiableTagged where @@ -15,23 +17,25 @@ where Tag1: Clone, Tag2: Clone, { - DifferentiableTagged::map2_tagged(current_avg, grad, &mut |avg, tag1, grad, tag2| { + DifferentiableTagged::map2_tagged(old_value, new_value, &mut |old, tag1, new, tag2| { ( - (avg.clone() * decay.clone()) + (grad.clone() * (Scalar::::one() + -decay.clone())), + (old.clone() * decay.clone()) + (new.clone() * (Scalar::::one() + -decay.clone())), tags(tag1, tag2), ) }) } +/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor +/// of `decay`. pub fn smooth( decay: Scalar, - current_avg: &Differentiable, - grad: &Differentiable, + old_value: &Differentiable, + new_value: &Differentiable, ) -> Differentiable where A: One + Clone + Mul + Neg + Add, { - smooth_tagged(decay, current_avg, grad, |(), ()| ()) + smooth_tagged(decay, old_value, new_value, |(), ()| ()) } #[cfg(test)] @@ -72,17 +76,17 @@ mod test_smooth { output, vec![ 5.0299999999999985, - 6.7969999999999979, - 6.5472999999999981, - 6.1625699999999979, + 6.796_999_999_999_998, + 6.547_299_999_999_998, + 6.162_569_999_999_998, 5.7263129999999975, - 5.3736816999999979, - 4.8963135299999978 + 5.373_681_699_999_998, + 4.896_313_529_999_998 ] - ) + ); } - fn hydrate(v: Vec) -> Differentiable> { + fn hydrate(v: &[f64]) -> Differentiable> { Differentiable::of_vec( v.iter() .cloned() @@ -100,9 +104,9 @@ mod test_smooth { vec![13.4, 18.2, 41.4], vec![1.1, 0.3, 67.3], ] - .map(hydrate); + .map(|x| hydrate(&x)); - let mut current = hydrate(vec![0.8, 3.1, 2.2]); + let mut current = hydrate(&vec![0.8, 3.1, 2.2]); let mut output = Vec::with_capacity(inputs.len()); for input in inputs { current = smooth(decay.clone(), ¤t, &input); @@ -112,10 +116,10 @@ mod test_smooth { assert_eq!( output, vec![ - vec![0.82000000000000006, 2.9, 2.2800000000000002], - vec![2.0779999999999998, 4.4299999999999997, 6.1919999999999993], + vec![0.820_000_000_000_000_1, 2.9, 2.2800000000000002], + vec![2.078, 4.43, 6.191_999_999_999_999], vec![1.9802, 4.0169999999999995, 12.302799999999998] ] - ) + ); } } diff --git a/little_learner/src/tensor.rs b/little_learner/src/tensor.rs deleted file mode 100644 index c769b40..0000000 --- a/little_learner/src/tensor.rs +++ /dev/null @@ -1,107 +0,0 @@ -#[macro_export] -macro_rules! tensor { - ($x:ty , $i: expr) => {[$x; $i]}; - ($x:ty , $i: expr, $($is:expr),+) => {[tensor!($x, $($is),+); $i]}; -} - -#[cfg(test)] -mod tests { - #[test] - fn test_tensor_type() { - let _: tensor!(f64, 1, 2, 3) = [[[1.0, 3.0, 6.0], [-1.3, -30.0, -0.0]]]; - } -} - -pub trait Extensible1 { - fn apply(&self, other: &A, op: &F) -> Self - where - F: Fn(&A, &A) -> A; -} - -pub trait Extensible2 { - fn apply(&self, other: &Self, op: &F) -> Self - where - F: Fn(&A, &A) -> A; -} - -impl Extensible1 for [T; N] -where - T: Extensible1 + Copy + Default, -{ - fn apply(&self, other: &A, op: &F) -> Self - where - F: Fn(&A, &A) -> A, - { - let mut result = [Default::default(); N]; - for (i, coord) in self.iter().enumerate() { - result[i] = T::apply(coord, other, op); - } - result - } -} - -impl Extensible2 for [T; N] -where - T: Extensible2 + Copy + Default, -{ - fn apply(&self, other: &Self, op: &F) -> Self - where - F: Fn(&A, &A) -> A, - { - let mut result = [Default::default(); N]; - for (i, coord) in self.iter().enumerate() { - result[i] = T::apply(coord, &other[i], op); - } - result - } -} - -#[macro_export] -macro_rules! extensible1 { - ($x: ty) => { - impl Extensible1<$x> for $x { - fn apply(&self, other: &$x, op: &F) -> Self - where - F: Fn(&Self, &Self) -> Self, - { - op(self, other) - } - } - }; -} - -#[macro_export] -macro_rules! extensible2 { - ($x: ty) => { - impl Extensible2<$x> for $x { - fn apply(&self, other: &Self, op: &F) -> Self - where - F: Fn(&Self, &Self) -> Self, - { - op(self, other) - } - } - }; -} - -extensible1!(u8); -extensible1!(f64); - -extensible2!(u8); -extensible2!(f64); - -pub fn extension1(t1: &T, t2: &A, op: F) -> T -where - T: Extensible1, - F: Fn(&A, &A) -> A, -{ - t1.apply::(t2, &op) -} - -pub fn extension2(t1: &T, t2: &T, op: F) -> T -where - T: Extensible2, - F: Fn(&A, &A) -> A, -{ - t1.apply::(t2, &op) -} diff --git a/little_learner/src/traits.rs b/little_learner/src/traits.rs index 86d9876..cd73bdc 100644 --- a/little_learner/src/traits.rs +++ b/little_learner/src/traits.rs @@ -4,6 +4,7 @@ use std::iter::Sum; use std::ops::{Add, AddAssign, Div, Mul, Neg}; pub trait Exp { + #[must_use] fn exp(self) -> Self; } @@ -14,6 +15,7 @@ impl Exp for NotNan { } pub trait Sqrt { + #[must_use] fn sqrt(self) -> Self; } @@ -24,10 +26,12 @@ impl Sqrt for NotNan { } pub trait Zero { + #[must_use] fn zero() -> Self; } pub trait One { + #[must_use] fn one() -> Self; } diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs index 5dd8178..391d0ec 100644 --- a/little_learner_app/src/main.rs +++ b/little_learner_app/src/main.rs @@ -1,35 +1,37 @@ #![allow(incomplete_features)] #![feature(generic_const_exprs)] -mod with_tensor; - use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged}; use little_learner::gradient_descent::gradient_descent; -use little_learner::hyper::VelocityGradientDescentHyper; -use little_learner::loss::{predict_plane, velocity_predictor}; +use little_learner::hyper; +use little_learner::loss::predict_plane; use little_learner::not_nan::{to_not_nan_1, to_not_nan_2}; +use little_learner::predictor; use little_learner::scalar::Scalar; use little_learner::traits::Zero; use ordered_float::NotNan; -fn main() { - let plane_xs = [ - [1.0, 2.05], - [1.0, 3.0], - [2.0, 2.0], - [2.0, 3.91], - [3.0, 6.13], - [4.0, 8.09], - ]; - let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; +const PLANE_XS: [[f64; 2]; 6] = [ + [1.0, 2.05], + [1.0, 3.0], + [2.0, 2.0], + [2.0, 3.91], + [3.0, 6.13], + [4.0, 8.09], +]; +const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; - let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) - .with_mu(NotNan::new(0.9).expect("not nan")); +fn main() { + let beta = NotNan::new(0.9).expect("not nan"); + let stabilizer = NotNan::new(0.000_000_01).expect("not nan"); + let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.001).expect("not nan"), 3000) + .with_stabilizer(stabilizer) + .with_beta(beta); let iterated = { - let xs = to_not_nan_2(plane_xs); - let ys = to_not_nan_1(plane_ys); + let xs = to_not_nan_2(PLANE_XS); + let ys = to_not_nan_1(PLANE_YS); let zero_params = [ RankedDifferentiable::of_slice(&[NotNan::::zero(), NotNan::::zero()]) .to_unranked(), @@ -42,8 +44,8 @@ fn main() { RankedDifferentiableTagged::of_slice_2::<_, 2>, &ys, zero_params, - velocity_predictor(predict_plane), - VelocityGradientDescentHyper::to_immutable, + predictor::rms(predict_plane), + hyper::RmsGradientDescent::to_immutable, ) }; @@ -52,11 +54,14 @@ fn main() { let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); - assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]); - assert_eq!( - theta1.to_scalar().real_part().into_inner(), - 6.169579045974949 - ); + let fitted_theta0 = theta0 + .collect() + .iter() + .map(|x| x.into_inner()) + .collect::>(); + let fitted_theta1 = theta1.to_scalar().real_part().into_inner(); + assert_eq!(fitted_theta0, [3.985_350_099_342_649, 1.9745945728216352]); + assert_eq!(fitted_theta1, 6.164_222_983_181_168); } #[cfg(test)] diff --git a/little_learner_app/src/with_tensor.rs b/little_learner_app/src/with_tensor.rs deleted file mode 100644 index cc78083..0000000 --- a/little_learner_app/src/with_tensor.rs +++ /dev/null @@ -1,136 +0,0 @@ -#![allow(dead_code)] - -use std::iter::Sum; -use std::ops::{Mul, Sub}; - -use little_learner::tensor; -use little_learner::tensor::{extension2, Extensible2}; -use little_learner::traits::One; - -type Point = [A; N]; - -type Parameters = [Point; M]; - -fn dot_points(x: &Point, y: &Point) -> A -where - A: Sum<::Output> + Copy + Default + Mul + Extensible2, -{ - extension2(x, y, |&x, &y| x * y).into_iter().sum() -} - -fn dot(x: &Point, y: &Parameters) -> Point -where - A: Mul + Sum<::Output> + Copy + Default + Extensible2, -{ - let mut result = [Default::default(); M]; - for (i, coord) in y.iter().map(|y| dot_points(x, y)).enumerate() { - result[i] = coord; - } - result -} - -fn sum(x: &tensor!(A, N)) -> A -where - A: Sum + Copy, -{ - A::sum(x.iter().cloned()) -} - -fn squared(x: &tensor!(A, N)) -> tensor!(A, N) -where - A: Mul + Extensible2 + Copy + Default, -{ - extension2(x, x, |&a, &b| (a * b)) -} - -fn l2_norm(prediction: &tensor!(A, N), data: &tensor!(A, N)) -> A -where - A: Sum + Mul + Extensible2 + Copy + Default + Sub, -{ - let diff = extension2(prediction, data, |&x, &y| x - y); - sum(&squared(&diff)) -} - -pub fn l2_loss( - target: F, - data_xs: &tensor!(A, N), - data_ys: &tensor!(A, N), - params: &Params, -) -> A -where - F: Fn(&tensor!(A, N), &Params) -> tensor!(A, N), - A: Sum + Mul + Extensible2 + Copy + Default + Sub, -{ - let pred_ys = target(data_xs, params); - l2_norm(&pred_ys, data_ys) -} - -pub fn predict_line(xs: &tensor!(A, N), theta: &tensor!(A, 2)) -> tensor!(A, N) -where - A: Mul + Sum<::Output> + Copy + Default + Extensible2 + One, -{ - let mut result: tensor!(A, N) = [Default::default(); N]; - for (i, &x) in xs.iter().enumerate() { - result[i] = dot(&[x, One::one()], &[*theta])[0]; - } - result -} - -#[cfg(test)] -mod tests { - use super::*; - use little_learner::tensor::extension1; - - #[test] - fn test_extension() { - let x: tensor!(u8, 1) = [2]; - assert_eq!(extension1(&x, &7, |x, y| x + y), [9]); - let y: tensor!(u8, 1) = [7]; - assert_eq!(extension2(&x, &y, |x, y| x + y), [9]); - - let x: tensor!(u8, 3) = [5, 6, 7]; - assert_eq!(extension1(&x, &2, |x, y| x + y), [7, 8, 9]); - let y: tensor!(u8, 3) = [2, 0, 1]; - assert_eq!(extension2(&x, &y, |x, y| x + y), [7, 6, 8]); - - let x: tensor!(u8, 2, 3) = [[4, 6, 7], [2, 0, 1]]; - assert_eq!(extension1(&x, &2, |x, y| x + y), [[6, 8, 9], [4, 2, 3]]); - let y: tensor!(u8, 2, 3) = [[1, 2, 2], [6, 3, 1]]; - assert_eq!(extension2(&x, &y, |x, y| x + y), [[5, 8, 9], [8, 3, 2]]); - } - - #[test] - fn test_l2_norm() { - assert_eq!( - l2_norm(&[4.0, -3.0, 0.0, -4.0, 3.0], &[0.0, 0.0, 0.0, 0.0, 0.0]), - 50.0 - ) - } - - #[test] - fn test_l2_loss() { - let loss = l2_loss( - predict_line, - &[2.0, 1.0, 4.0, 3.0], - &[1.8, 1.2, 4.2, 3.3], - &[0.0, 0.0], - ); - assert_eq!(loss, 33.21); - - let loss = l2_loss( - predict_line, - &[2.0, 1.0, 4.0, 3.0], - &[1.8, 1.2, 4.2, 3.3], - &[0.0099, 0.0], - ); - assert_eq!((100.0 * loss).round() / 100.0, 32.59); - } - - #[test] - fn l2_loss_non_autodiff_example() { - let xs = [2.0, 1.0, 4.0, 3.0]; - let ys = [1.8, 1.2, 4.2, 3.3]; - let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]); - assert_eq!(loss, 32.5892403); - } -}