diff --git a/Cargo.lock b/Cargo.lock index 1e333c7..c30cc64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,7 +55,6 @@ dependencies = [ name = "little_learner_app" version = "0.1.0" dependencies = [ - "arrayvec", "immutable-chunkmap", "little_learner", "ordered-float", diff --git a/little_learner/src/auto_diff.rs b/little_learner/src/auto_diff.rs index 0b50b4e..97d59b5 100644 --- a/little_learner/src/auto_diff.rs +++ b/little_learner/src/auto_diff.rs @@ -7,12 +7,12 @@ use std::{ ops::{AddAssign, Div, Mul, Neg}, }; -impl Zero for DifferentiableHidden +impl Zero for Differentiable where A: Zero, { - fn zero() -> DifferentiableHidden { - DifferentiableHidden::Scalar(Scalar::Number(A::zero(), None)) + fn zero() -> Differentiable { + Differentiable::Scalar(Scalar::Number(A::zero(), None)) } } @@ -25,16 +25,16 @@ where } } -impl One for DifferentiableHidden +impl One for Differentiable where A: One, { - fn one() -> DifferentiableHidden { - DifferentiableHidden::Scalar(Scalar::one()) + fn one() -> Differentiable { + Differentiable::Scalar(Scalar::one()) } } -impl Clone for DifferentiableHidden +impl Clone for Differentiable where A: Clone, { @@ -47,19 +47,19 @@ where } #[derive(Debug)] -enum DifferentiableHidden { +pub enum Differentiable { Scalar(Scalar), - Vector(Vec>), + Vector(Vec>), } -impl Display for DifferentiableHidden +impl Display for Differentiable where A: Display, { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - DifferentiableHidden::Scalar(s) => f.write_fmt(format_args!("{}", s)), - DifferentiableHidden::Vector(v) => { + Differentiable::Scalar(s) => f.write_fmt(format_args!("{}", s)), + Differentiable::Vector(v) => { f.write_char('[')?; for v in v.iter() { f.write_fmt(format_args!("{}", v))?; @@ -71,32 +71,32 @@ where } } -impl DifferentiableHidden { - fn map(&self, f: &mut F) -> DifferentiableHidden +impl Differentiable { + pub fn map(&self, f: &mut F) -> Differentiable where F: FnMut(Scalar) -> Scalar, A: Clone, { match self { - DifferentiableHidden::Scalar(a) => DifferentiableHidden::Scalar(f(a.clone())), - DifferentiableHidden::Vector(slice) => { - DifferentiableHidden::Vector(slice.iter().map(|x| x.map(f)).collect()) + Differentiable::Scalar(a) => Differentiable::Scalar(f(a.clone())), + Differentiable::Vector(slice) => { + Differentiable::Vector(slice.iter().map(|x| x.map(f)).collect()) } } } - fn map2(&self, other: &DifferentiableHidden, f: &F) -> DifferentiableHidden + pub fn map2(&self, other: &Differentiable, f: &F) -> Differentiable where F: Fn(&Scalar, &Scalar) -> Scalar, A: Clone, B: Clone, { match (self, other) { - (DifferentiableHidden::Scalar(a), DifferentiableHidden::Scalar(b)) => { - DifferentiableHidden::Scalar(f(a, b)) + (Differentiable::Scalar(a), Differentiable::Scalar(b)) => { + Differentiable::Scalar(f(a, b)) } - (DifferentiableHidden::Vector(slice_a), DifferentiableHidden::Vector(slice_b)) => { - DifferentiableHidden::Vector( + (Differentiable::Vector(slice_a), Differentiable::Vector(slice_b)) => { + Differentiable::Vector( slice_a .iter() .zip(slice_b.iter()) @@ -108,20 +108,69 @@ impl DifferentiableHidden { } } - fn of_slice(input: &[A]) -> DifferentiableHidden + fn of_slice(input: T) -> Differentiable where A: Clone, + T: AsRef<[A]>, { - DifferentiableHidden::Vector( + Differentiable::Vector( input + .as_ref() .iter() - .map(|v| DifferentiableHidden::Scalar(Scalar::Number((*v).clone(), None))) + .map(|v| Differentiable::Scalar(Scalar::Number((*v).clone(), None))) .collect(), ) } + + pub fn rank(&self) -> usize { + match self { + Differentiable::Scalar(_) => 0, + Differentiable::Vector(v) => v[0].rank() + 1, + } + } + + pub fn attach_rank( + self: Differentiable, + ) -> Option> { + if self.rank() == RANK { + Some(RankedDifferentiable { contents: self }) + } else { + None + } + } } -impl DifferentiableHidden +impl Differentiable { + pub fn into_scalar(self) -> Scalar { + match self { + Differentiable::Scalar(s) => s, + Differentiable::Vector(_) => panic!("not a scalar"), + } + } + + pub fn into_vector(self) -> Vec> { + match self { + Differentiable::Scalar(_) => panic!("not a vector"), + Differentiable::Vector(v) => v, + } + } + + pub fn borrow_scalar(&self) -> &Scalar { + match self { + Differentiable::Scalar(s) => s, + Differentiable::Vector(_) => panic!("not a scalar"), + } + } + + pub fn borrow_vector(&self) -> &Vec> { + match self { + Differentiable::Scalar(_) => panic!("not a vector"), + Differentiable::Vector(v) => v, + } + } +} + +impl Differentiable where A: Clone + Eq @@ -134,7 +183,7 @@ where + One + Neg, { - fn accumulate_gradients_vec(v: &[DifferentiableHidden], acc: &mut HashMap, A>) { + fn accumulate_gradients_vec(v: &[Differentiable], acc: &mut HashMap, A>) { for v in v.iter().rev() { v.accumulate_gradients(acc); } @@ -142,33 +191,36 @@ where fn accumulate_gradients(&self, acc: &mut HashMap, A>) { match self { - DifferentiableHidden::Scalar(y) => { + Differentiable::Scalar(y) => { let k = y.clone_link(); k.invoke(y, A::one(), acc); } - DifferentiableHidden::Vector(y) => { - DifferentiableHidden::accumulate_gradients_vec(y, acc) - } + Differentiable::Vector(y) => Differentiable::accumulate_gradients_vec(y, acc), } } - fn grad_once(self, wrt: &DifferentiableHidden) -> DifferentiableHidden { + fn grad_once( + self, + wrt: [Differentiable; PARAM_NUM], + ) -> [Differentiable; PARAM_NUM] { let mut acc = HashMap::new(); self.accumulate_gradients(&mut acc); - wrt.map(&mut |d| match acc.get(&d) { - None => Scalar::Number(A::zero(), None), - Some(x) => Scalar::Number(x.clone(), None), + wrt.map(|wrt| { + wrt.map(&mut |d| match acc.get(&d) { + None => Scalar::Number(A::zero(), None), + Some(x) => Scalar::Number(x.clone(), None), + }) }) } } #[derive(Clone, Debug)] -pub struct Differentiable { - contents: DifferentiableHidden, +pub struct RankedDifferentiable { + contents: Differentiable, } -impl Display for Differentiable +impl Display for RankedDifferentiable where A: Display, { @@ -177,123 +229,161 @@ where } } -pub fn of_scalar(s: Scalar) -> Differentiable { - Differentiable { - contents: DifferentiableHidden::Scalar(s), - } -} - -pub fn to_scalar(s: Differentiable) -> Scalar { - match s.contents { - DifferentiableHidden::Scalar(s) => s, - DifferentiableHidden::Vector(_) => panic!("not a vector"), - } -} - -pub fn of_slice(input: &[A]) -> Differentiable -where - A: Clone, -{ - Differentiable { - contents: DifferentiableHidden::of_slice(input), - } -} - -impl Differentiable { - pub fn of_vector(s: Vec>) -> Differentiable { - Differentiable { - contents: DifferentiableHidden::Vector(s.into_iter().map(|v| v.contents).collect()), +impl RankedDifferentiable { + pub fn to_scalar(self) -> Scalar { + match self.contents { + Differentiable::Scalar(s) => s, + Differentiable::Vector(_) => panic!("not a scalar despite teq that we're a scalar"), } } - pub fn map(s: Differentiable, f: &mut F) -> Differentiable + pub fn of_scalar(s: Scalar) -> RankedDifferentiable { + RankedDifferentiable { + contents: Differentiable::Scalar(s), + } + } +} + +impl RankedDifferentiable { + pub fn of_slice(input: T) -> RankedDifferentiable + where + A: Clone, + T: AsRef<[A]>, + { + RankedDifferentiable { + contents: Differentiable::of_slice(input), + } + } +} + +impl RankedDifferentiable { + pub fn of_slice_2(input: &[T]) -> RankedDifferentiable + where + A: Clone, + T: AsRef<[A]>, + { + let v = input + .iter() + .map(|x| Differentiable::of_slice(x)) + .collect::>(); + RankedDifferentiable { + contents: Differentiable::Vector(v), + } + } +} + +impl RankedDifferentiable { + pub fn to_unranked(self) -> Differentiable { + self.contents + } + + pub fn to_unranked_borrow(&self) -> &Differentiable { + &self.contents + } + + pub fn of_vector( + s: Vec>, + ) -> RankedDifferentiable { + RankedDifferentiable { + contents: Differentiable::Vector(s.into_iter().map(|v| v.contents).collect()), + } + } + + pub fn map( + self: RankedDifferentiable, + f: &mut F, + ) -> RankedDifferentiable where F: FnMut(Scalar) -> Scalar, A: Clone, { - Differentiable { - contents: DifferentiableHidden::map(&s.contents, f), + RankedDifferentiable { + contents: Differentiable::map(&self.contents, f), } } pub fn map2( - self: &Differentiable, - other: &Differentiable, + self: &RankedDifferentiable, + other: &RankedDifferentiable, f: &F, - ) -> Differentiable + ) -> RankedDifferentiable where F: Fn(&Scalar, &Scalar) -> Scalar, A: Clone, B: Clone, { - Differentiable { - contents: DifferentiableHidden::map2(&self.contents, &other.contents, f), + RankedDifferentiable { + contents: Differentiable::map2(&self.contents, &other.contents, f), } } - pub fn to_vector(s: Differentiable) -> Vec> { - match s.contents { - DifferentiableHidden::Scalar(_) => panic!("not a scalar"), - DifferentiableHidden::Vector(v) => v + pub fn to_vector( + self: RankedDifferentiable, + ) -> Vec> { + match self.contents { + Differentiable::Scalar(_) => panic!("not a scalar"), + Differentiable::Vector(v) => v .into_iter() - .map(|v| Differentiable { contents: v }) + .map(|v| RankedDifferentiable { contents: v }) .collect(), } } +} - pub fn grad(f: F, theta: &Differentiable) -> Differentiable - where - F: Fn(Differentiable) -> Differentiable, - A: Clone - + Hash - + AddAssign - + Mul - + Exp - + Div - + Zero - + One - + Neg - + Eq, - { - let mut i = 0usize; - let wrt = theta.contents.map(&mut |x| { +pub fn grad( + f: F, + theta: &[Differentiable; PARAM_RANK], +) -> [Differentiable; PARAM_RANK] +where + F: Fn(&[Differentiable; PARAM_RANK]) -> RankedDifferentiable, + A: ?Sized + + Clone + + Hash + + AddAssign + + Mul + + Exp + + Div + + Zero + + One + + Neg + + Eq, +{ + let mut i = 0usize; + let wrt = theta.each_ref().map(|theta| { + theta.map(&mut |x| { let result = Scalar::truncate_dual(x, Some(i)); i += 1; result - }); - let after_f = f(Differentiable { - contents: wrt.clone(), - }); - Differentiable { - contents: DifferentiableHidden::grad_once(after_f.contents, &wrt), - } - } + }) + }); + let after_f = f(&wrt); + Differentiable::grad_once(after_f.contents, wrt) } #[cfg(test)] mod tests { use ordered_float::NotNan; - use crate::loss::{l2_loss_2, predict_line_2}; + use crate::loss::{l2_loss_2, predict_line_2_unranked}; use super::*; - fn extract_scalar<'a, A>(d: &'a DifferentiableHidden) -> &'a A { + fn extract_scalar<'a, A>(d: &'a Differentiable) -> &'a A { match d { - DifferentiableHidden::Scalar(a) => &(a.real_part()), - DifferentiableHidden::Vector(_) => panic!("not a scalar"), + Differentiable::Scalar(a) => &(a.real_part()), + Differentiable::Vector(_) => panic!("not a scalar"), } } #[test] fn test_map() { - let v = DifferentiableHidden::Vector( + let v = Differentiable::Vector( vec![ - DifferentiableHidden::Scalar(Scalar::Number( + Differentiable::Scalar(Scalar::Number( NotNan::new(3.0).expect("3 is not NaN"), Some(0usize), )), - DifferentiableHidden::Scalar(Scalar::Number( + Differentiable::Scalar(Scalar::Number( NotNan::new(4.0).expect("4 is not NaN"), Some(1usize), )), @@ -306,8 +396,8 @@ mod tests { }); let v = match mapped { - DifferentiableHidden::Scalar(_) => panic!("Not a scalar"), - DifferentiableHidden::Vector(v) => v + Differentiable::Scalar(_) => panic!("Not a scalar"), + Differentiable::Vector(v) => v .iter() .map(|d| extract_scalar(d).clone()) .collect::>(), @@ -318,26 +408,27 @@ mod tests { #[test] fn test_autodiff() { - let input_vec = of_slice(&[NotNan::::zero(), NotNan::::zero()]); + let input_vec = [ + RankedDifferentiable::of_scalar(Scalar::>::zero()).contents, + RankedDifferentiable::of_scalar(Scalar::>::zero()).contents, + ]; let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan")); let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan")); - let grad = Differentiable::grad( + let grad = grad( |x| { - Differentiable::of_vector(vec![of_scalar(l2_loss_2( - predict_line_2, - of_slice(&xs), - of_slice(&ys), + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2( + predict_line_2_unranked, + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), x, ))]) }, &input_vec, ); - let grad_vec: Vec = Differentiable::to_vector(grad) - .into_iter() - .map(to_scalar) - .map(|x| f64::from(*x.real_part())) - .collect(); - assert_eq!(grad_vec, vec![-63.0, -21.0]); + let grad_vec = grad + .map(Differentiable::into_scalar) + .map(|x| f64::from(*x.real_part())); + assert_eq!(grad_vec, [-63.0, -21.0]); } } diff --git a/little_learner/src/const_teq.rs b/little_learner/src/const_teq.rs new file mode 100644 index 0000000..6952ee2 --- /dev/null +++ b/little_learner/src/const_teq.rs @@ -0,0 +1,13 @@ +use std::marker::PhantomData; + +pub struct ConstTeq { + phantom_a: PhantomData<[(); A]>, + phantom_b: PhantomData<[(); B]>, +} + +pub fn make() -> ConstTeq { + ConstTeq { + phantom_a: Default::default(), + phantom_b: Default::default(), + } +} diff --git a/little_learner/src/lib.rs b/little_learner/src/lib.rs index cf17061..3bfa7ab 100644 --- a/little_learner/src/lib.rs +++ b/little_learner/src/lib.rs @@ -1,7 +1,9 @@ #![allow(incomplete_features)] #![feature(generic_const_exprs)] +#![feature(array_methods)] pub mod auto_diff; +pub mod const_teq; pub mod expr_syntax_tree; pub mod loss; pub mod scalar; diff --git a/little_learner/src/loss.rs b/little_learner/src/loss.rs index b669a18..9bcbe43 100644 --- a/little_learner/src/loss.rs +++ b/little_learner/src/loss.rs @@ -4,7 +4,7 @@ use std::{ }; use crate::{ - auto_diff::{of_scalar, to_scalar, Differentiable}, + auto_diff::{Differentiable, RankedDifferentiable}, scalar::Scalar, traits::{One, Zero}, }; @@ -16,49 +16,61 @@ where x.clone() * x.clone() } -pub fn dot_2( - x: &Differentiable, - y: &Differentiable, -) -> Differentiable +pub fn elementwise_mul( + x: &RankedDifferentiable, + y: &RankedDifferentiable, +) -> RankedDifferentiable +where + A: Mul + Sum<::Output> + Clone + Default, +{ + RankedDifferentiable::map2(x, y, &|x, y| x.clone() * y.clone()) +} + +pub fn dot_unranked(x: &Differentiable, y: &Differentiable) -> Differentiable where A: Mul + Sum<::Output> + Clone + Default, { Differentiable::map2(x, y, &|x, y| x.clone() * y.clone()) } -fn squared_2(x: &Differentiable) -> Differentiable +fn squared_2( + x: &RankedDifferentiable, +) -> RankedDifferentiable where A: Mul + Copy + Default, { - Differentiable::map2(x, x, &|x, y| x.clone() * y.clone()) + RankedDifferentiable::map2(x, x, &|x, y| x.clone() * y.clone()) } -fn sum_2(x: Differentiable) -> Scalar +fn sum_2(x: RankedDifferentiable) -> Scalar where A: Sum + Clone + Add + Zero, { - Differentiable::to_vector(x) + RankedDifferentiable::to_vector(x) .into_iter() - .map(to_scalar) + .map(|x| x.to_scalar()) .sum() } -fn l2_norm_2(prediction: &Differentiable, data: &Differentiable) -> Scalar +fn l2_norm_2( + prediction: &RankedDifferentiable, + data: &RankedDifferentiable, +) -> Scalar where A: Sum + Mul + Copy + Default + Neg + Add + Zero + Neg, { - let diff = Differentiable::map2(prediction, data, &|x, y| x.clone() - y.clone()); + let diff = RankedDifferentiable::map2(prediction, data, &|x, y| x.clone() - y.clone()); sum_2(squared_2(&diff)) } -pub fn l2_loss_2( +pub fn l2_loss_2( target: F, - data_xs: Differentiable, - data_ys: Differentiable, + data_xs: RankedDifferentiable, + data_ys: RankedDifferentiable, params: Params, ) -> Scalar where - F: Fn(Differentiable, Params) -> Differentiable, + F: Fn(RankedDifferentiable, Params) -> RankedDifferentiable, A: Sum + Mul + Copy + Default + Neg + Add + Zero, { let pred_ys = target(data_xs, params); @@ -66,42 +78,143 @@ where } pub fn predict_line_2( - xs: Differentiable, - theta: Differentiable, -) -> Differentiable + xs: RankedDifferentiable, + theta: &[RankedDifferentiable; 2], +) -> RankedDifferentiable where A: Mul + Add + Sum<::Output> + Copy + Default + One + Zero, { - let xs = Differentiable::to_vector(xs) + let xs = RankedDifferentiable::to_vector(xs) .into_iter() - .map(|v| to_scalar(v)); + .map(|v| v.to_scalar()); let mut result = vec![]; for x in xs { - let left_arg = Differentiable::of_vector(vec![ - of_scalar(x.clone()), - of_scalar( as One>::one()), + let left_arg = RankedDifferentiable::of_vector(vec![ + RankedDifferentiable::of_scalar(x.clone()), + RankedDifferentiable::of_scalar( as One>::one()), ]); - let dotted = of_scalar( - Differentiable::to_vector(dot_2(&left_arg, &theta)) - .iter() - .map(|x| to_scalar((*x).clone())) - .sum(), + let dotted = RankedDifferentiable::of_scalar( + RankedDifferentiable::to_vector(elementwise_mul( + &left_arg, + &RankedDifferentiable::of_vector(theta.to_vec()), + )) + .iter() + .map(|x| (*x).clone().to_scalar()) + .sum(), ); result.push(dotted); } - Differentiable::of_vector(result) + RankedDifferentiable::of_vector(result) +} + +pub fn predict_line_2_unranked( + xs: RankedDifferentiable, + theta: &[Differentiable; 2], +) -> RankedDifferentiable +where + A: Mul + Add + Sum<::Output> + Copy + Default + One + Zero, +{ + let xs = RankedDifferentiable::to_vector(xs) + .into_iter() + .map(|v| v.to_scalar()); + let mut result = vec![]; + for x in xs { + let left_arg = RankedDifferentiable::of_vector(vec![ + RankedDifferentiable::of_scalar(x.clone()), + RankedDifferentiable::of_scalar( as One>::one()), + ]); + let dotted = RankedDifferentiable::of_scalar( + dot_unranked( + left_arg.to_unranked_borrow(), + &Differentiable::Vector(theta.to_vec()), + ) + .into_vector() + .into_iter() + .map(|x| x.into_scalar()) + .sum(), + ); + result.push(dotted); + } + RankedDifferentiable::of_vector(result) } pub fn predict_quadratic( - xs: Differentiable, - theta: Differentiable, -) -> Differentiable + xs: RankedDifferentiable, + theta: &[RankedDifferentiable; 3], +) -> RankedDifferentiable where A: Mul + Add + Sum + Default + One + Zero + Clone, { - Differentiable::map(xs, &mut |x| { + RankedDifferentiable::map(xs, &mut |x| { let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)]; - let x_powers = Differentiable::of_vector(x_powers.into_iter().map(of_scalar).collect()); - sum_2(dot_2(&x_powers, &theta)) + let x_powers = RankedDifferentiable::of_vector( + x_powers + .into_iter() + .map(RankedDifferentiable::of_scalar) + .collect(), + ); + RankedDifferentiable::to_vector(elementwise_mul( + &x_powers, + &RankedDifferentiable::of_vector(theta.to_vec()), + )) + .into_iter() + .map(|x| x.to_scalar()) + .sum() }) } + +pub fn predict_quadratic_unranked( + xs: RankedDifferentiable, + theta: &[Differentiable; 3], +) -> RankedDifferentiable +where + A: Mul + Add + Sum + Default + One + Zero + Clone, +{ + RankedDifferentiable::map(xs, &mut |x| { + let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)]; + let x_powers = RankedDifferentiable::of_vector( + x_powers + .into_iter() + .map(RankedDifferentiable::of_scalar) + .collect(), + ); + dot_unranked( + x_powers.to_unranked_borrow(), + &Differentiable::Vector(theta.to_vec()), + ) + .attach_rank::<1>() + .expect("wanted a tensor1") + .to_vector() + .into_iter() + .map(|x| x.to_scalar()) + .sum() + }) +} + +// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate). +pub fn predict_plane( + xs: RankedDifferentiable, + theta: &[Differentiable; 2], +) -> RankedDifferentiable +where + A: Mul + Add + Sum + Default + One + Zero + Clone, +{ + if theta[0].rank() != 1 { + panic!("theta0 must be of rank 1, got: {}", theta[0].rank()) + } + let theta0 = RankedDifferentiable::of_vector( + theta[0] + .borrow_vector() + .iter() + .map(|v| RankedDifferentiable::of_scalar(v.borrow_scalar().clone())) + .collect::>(), + ); + let theta1 = theta[1].borrow_scalar().clone(); + let dotted: Vec<_> = xs + .to_vector() + .into_iter() + .map(|point| sum_2(elementwise_mul(&theta0, &point))) + .map(|x| RankedDifferentiable::of_scalar(x + theta1.clone())) + .collect(); + RankedDifferentiable::of_vector(dotted) +} diff --git a/little_learner_app/Cargo.toml b/little_learner_app/Cargo.toml index 721f1e5..93592cb 100644 --- a/little_learner_app/Cargo.toml +++ b/little_learner_app/Cargo.toml @@ -9,4 +9,3 @@ edition = "2021" immutable-chunkmap = "1.0.5" ordered-float = "3.6.0" little_learner = { path = "../little_learner" } -arrayvec = "0.7.2" diff --git a/little_learner_app/src/main.rs b/little_learner_app/src/main.rs index a10bb07..da18e8e 100644 --- a/little_learner_app/src/main.rs +++ b/little_learner_app/src/main.rs @@ -6,9 +6,9 @@ mod with_tensor; use core::hash::Hash; use std::ops::{Add, AddAssign, Div, Mul, Neg}; -use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable}; +use little_learner::auto_diff::{grad, Differentiable, RankedDifferentiable}; -use little_learner::loss::{l2_loss_2, predict_quadratic}; +use little_learner::loss::{l2_loss_2, predict_plane}; use little_learner::scalar::Scalar; use little_learner::traits::{Exp, One, Zero}; use ordered_float::NotNan; @@ -24,16 +24,16 @@ where v } -struct GradientDescentHyper { +struct GradientDescentHyper { learning_rate: A, iterations: u32, } -fn gradient_descent_step( +fn gradient_descent_step( f: &F, - theta: Differentiable, - params: &GradientDescentHyper, -) -> Differentiable + theta: [Differentiable; PARAM_NUM], + params: &GradientDescentHyper, +) -> [Differentiable; PARAM_NUM] where A: Clone + Mul @@ -46,17 +46,33 @@ where + One + Eq + Exp, - F: Fn(Differentiable) -> Differentiable, + F: Fn(&[Differentiable; PARAM_NUM]) -> RankedDifferentiable, { - let delta = Differentiable::grad(f, &theta); - Differentiable::map2(&theta, &delta, &|theta, delta| { - (*theta).clone() - (Scalar::make((params.learning_rate).clone()) * (*delta).clone()) + let delta = grad(f, &theta); + let mut i = 0; + theta.map(|theta| { + let delta = &delta[i]; + i += 1; + // For speed, you might want to truncate_dual this. + let learning_rate = Scalar::make((params.learning_rate).clone()); + Differentiable::map2( + &theta, + &delta.map(&mut |s| s * learning_rate.clone()), + &|theta, delta| (*theta).clone() - (*delta).clone(), + ) }) } fn main() { - let xs = [-1.0, 0.0, 1.0, 2.0, 3.0]; - let ys = [2.55, 2.1, 4.35, 10.2, 18.25]; + let plane_xs = [ + [1.0, 2.05], + [1.0, 3.0], + [2.0, 2.0], + [2.0, 3.91], + [3.0, 6.13], + [4.0, 8.09], + ]; + let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; let hyper = GradientDescentHyper { learning_rate: NotNan::new(0.001).expect("not nan"), @@ -64,48 +80,63 @@ fn main() { }; let iterated = { - let xs = xs.map(|x| NotNan::new(x).expect("not nan")); - let ys = ys.map(|x| NotNan::new(x).expect("not nan")); + let xs = plane_xs.map(|x| { + [ + NotNan::new(x[0]).expect("not nan"), + NotNan::new(x[1]).expect("not nan"), + ] + }); + let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan")); iterate( &|theta| { gradient_descent_step( &|x| { - Differentiable::of_vector(vec![of_scalar(l2_loss_2( - predict_quadratic, - of_slice(&xs), - of_slice(&ys), - x, - ))]) + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( + l2_loss_2( + predict_plane, + RankedDifferentiable::of_slice_2::<_, 2>(&xs), + RankedDifferentiable::of_slice(ys), + x, + ), + )]) }, theta, &hyper, ) }, - of_slice(&[ - NotNan::::zero(), - NotNan::::zero(), - NotNan::::zero(), - ]), + [ + RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(), + Differentiable::Scalar(Scalar::zero()), + ], hyper.iterations, ) }; - println!( - "After iteration: {:?}", - Differentiable::to_vector(iterated) + let [theta0, theta1] = iterated; + + let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); + let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); + + assert_eq!( + theta0 + .to_vector() .into_iter() - .map(|x| to_scalar(x).real_part().into_inner()) - .collect::>() + .map(|x| x.to_scalar().real_part().into_inner()) + .collect::>(), + [3.97757644609063, 2.0496557321494446] + ); + assert_eq!( + theta1.to_scalar().real_part().into_inner(), + 5.786758464448078 ); } #[cfg(test)] mod tests { use super::*; - use arrayvec::ArrayVec; use little_learner::{ - auto_diff::to_scalar, - loss::{predict_line_2, square}, + auto_diff::grad, + loss::{l2_loss_2, predict_line_2, predict_line_2_unranked, predict_quadratic_unranked}, }; use crate::with_tensor::{l2_loss, predict_line}; @@ -116,9 +147,12 @@ mod tests { let ys = [1.8, 1.2, 4.2, 3.3]; let loss = l2_loss_2( predict_line_2, - of_slice(&xs), - of_slice(&ys), - of_slice(&[0.0, 0.0]), + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), + &[ + RankedDifferentiable::of_scalar(Scalar::zero()), + RankedDifferentiable::of_scalar(Scalar::zero()), + ], ); assert_eq!(*loss.real_part(), 33.21); @@ -134,29 +168,39 @@ mod tests { #[test] fn grad_example() { - let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]); + let input_vec = [Differentiable::Scalar(Scalar::make( + NotNan::new(27.0).expect("not nan"), + ))]; - let grad: Vec<_> = Differentiable::to_vector(Differentiable::grad( - |x| Differentiable::map(x, &mut |x| square(&x)), + let grad: Vec<_> = grad( + |x| { + RankedDifferentiable::of_scalar( + x[0].borrow_scalar().clone() * x[0].borrow_scalar().clone(), + ) + }, &input_vec, - )) + ) .into_iter() - .map(|x| to_scalar(x).real_part().into_inner()) + .map(|x| x.into_scalar().real_part().into_inner()) .collect(); assert_eq!(grad, [54.0]); } #[test] fn loss_gradient() { - let input_vec = of_slice(&[NotNan::::zero(), NotNan::::zero()]); + let zero = Scalar::>::zero(); + let input_vec = [ + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero).to_unranked(), + ]; let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan")); let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan")); - let grad = Differentiable::grad( + let grad = grad( |x| { - Differentiable::of_vector(vec![of_scalar(l2_loss_2( - predict_line_2, - of_slice(&xs), - of_slice(&ys), + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2( + predict_line_2_unranked, + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), x, ))]) }, @@ -164,9 +208,8 @@ mod tests { ); assert_eq!( - Differentiable::to_vector(grad) - .into_iter() - .map(|x| *(to_scalar(x).real_part())) + grad.into_iter() + .map(|x| *(x.into_scalar().real_part())) .collect::>(), [-63.0, -21.0] ); @@ -174,13 +217,7 @@ mod tests { #[test] fn test_iterate() { - let f = |t: [i32; 3]| { - let mut vec = ArrayVec::::new(); - for i in t { - vec.push(i - 3); - } - vec.into_inner().unwrap() - }; + let f = |t: [i32; 3]| t.map(|i| i - 3); assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]); } @@ -189,6 +226,8 @@ mod tests { let xs = [2.0, 1.0, 4.0, 3.0]; let ys = [1.8, 1.2, 4.2, 3.3]; + let zero = Scalar::>::zero(); + let hyper = GradientDescentHyper { learning_rate: NotNan::new(0.01).expect("not nan"), iterations: 1000, @@ -200,24 +239,29 @@ mod tests { &|theta| { gradient_descent_step( &|x| { - Differentiable::of_vector(vec![of_scalar(l2_loss_2( - predict_line_2, - of_slice(&xs), - of_slice(&ys), - x, - ))]) + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( + l2_loss_2( + predict_line_2_unranked, + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), + x, + ), + )]) }, theta, &hyper, ) }, - of_slice(&[NotNan::::zero(), NotNan::::zero()]), + [ + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero).to_unranked(), + ], hyper.iterations, ) }; - let iterated = Differentiable::to_vector(iterated) + let iterated = iterated .into_iter() - .map(|x| to_scalar(x).real_part().into_inner()) + .map(|x| x.into_scalar().real_part().into_inner()) .collect::>(); assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]); @@ -228,6 +272,8 @@ mod tests { let xs = [-1.0, 0.0, 1.0, 2.0, 3.0]; let ys = [2.55, 2.1, 4.35, 10.2, 18.25]; + let zero = Scalar::>::zero(); + let hyper = GradientDescentHyper { learning_rate: NotNan::new(0.001).expect("not nan"), iterations: 1000, @@ -240,35 +286,104 @@ mod tests { &|theta| { gradient_descent_step( &|x| { - Differentiable::of_vector(vec![of_scalar(l2_loss_2( - predict_quadratic, - of_slice(&xs), - of_slice(&ys), - x, - ))]) + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( + l2_loss_2( + predict_quadratic_unranked, + RankedDifferentiable::of_slice(&xs), + RankedDifferentiable::of_slice(&ys), + x, + ), + )]) }, theta, &hyper, ) }, - of_slice(&[ - NotNan::::zero(), - NotNan::::zero(), - NotNan::::zero(), - ]), + [ + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero.clone()).to_unranked(), + RankedDifferentiable::of_scalar(zero).to_unranked(), + ], hyper.iterations, ) }; - let iterated = Differentiable::to_vector(iterated) + let iterated = iterated .into_iter() - .map(|x| to_scalar(x).real_part().into_inner()) + .map(|x| x.into_scalar().real_part().into_inner()) .collect::>(); - println!("{:?}", iterated); - assert_eq!( iterated, [2.0546423148479684, 0.9928606519360353, 1.4787394427094362] ); } + + #[test] + fn optimise_plane() { + let plane_xs = [ + [1.0, 2.05], + [1.0, 3.0], + [2.0, 2.0], + [2.0, 3.91], + [3.0, 6.13], + [4.0, 8.09], + ]; + let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; + + let hyper = GradientDescentHyper { + learning_rate: NotNan::new(0.001).expect("not nan"), + iterations: 1000, + }; + + let iterated = { + let xs = plane_xs.map(|x| { + [ + NotNan::new(x[0]).expect("not nan"), + NotNan::new(x[1]).expect("not nan"), + ] + }); + let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan")); + iterate( + &|theta| { + gradient_descent_step( + &|x| { + RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar( + l2_loss_2( + predict_plane, + RankedDifferentiable::of_slice_2::<_, 2>(&xs), + RankedDifferentiable::of_slice(ys), + x, + ), + )]) + }, + theta, + &hyper, + ) + }, + [ + RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(), + Differentiable::Scalar(Scalar::zero()), + ], + hyper.iterations, + ) + }; + + let [theta0, theta1] = iterated; + + let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); + let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); + + assert_eq!( + theta0 + .to_vector() + .into_iter() + .map(|x| x.to_scalar().real_part().into_inner()) + .collect::>(), + [3.97757644609063, 2.0496557321494446] + ); + assert_eq!( + theta1.to_scalar().real_part().into_inner(), + 5.786758464448078 + ); + } }