Implement plane prediction (#11)

This commit is contained in:
Patrick Stevens
2023-04-07 20:41:49 +01:00
committed by GitHub
parent 3c964bc132
commit 753722d7ca
7 changed files with 574 additions and 242 deletions

1
Cargo.lock generated
View File

@@ -55,7 +55,6 @@ dependencies = [
name = "little_learner_app" name = "little_learner_app"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"arrayvec",
"immutable-chunkmap", "immutable-chunkmap",
"little_learner", "little_learner",
"ordered-float", "ordered-float",

View File

@@ -7,12 +7,12 @@ use std::{
ops::{AddAssign, Div, Mul, Neg}, ops::{AddAssign, Div, Mul, Neg},
}; };
impl<A> Zero for DifferentiableHidden<A> impl<A> Zero for Differentiable<A>
where where
A: Zero, A: Zero,
{ {
fn zero() -> DifferentiableHidden<A> { fn zero() -> Differentiable<A> {
DifferentiableHidden::Scalar(Scalar::Number(A::zero(), None)) Differentiable::Scalar(Scalar::Number(A::zero(), None))
} }
} }
@@ -25,16 +25,16 @@ where
} }
} }
impl<A> One for DifferentiableHidden<A> impl<A> One for Differentiable<A>
where where
A: One, A: One,
{ {
fn one() -> DifferentiableHidden<A> { fn one() -> Differentiable<A> {
DifferentiableHidden::Scalar(Scalar::one()) Differentiable::Scalar(Scalar::one())
} }
} }
impl<A> Clone for DifferentiableHidden<A> impl<A> Clone for Differentiable<A>
where where
A: Clone, A: Clone,
{ {
@@ -47,19 +47,19 @@ where
} }
#[derive(Debug)] #[derive(Debug)]
enum DifferentiableHidden<A> { pub enum Differentiable<A> {
Scalar(Scalar<A>), Scalar(Scalar<A>),
Vector(Vec<DifferentiableHidden<A>>), Vector(Vec<Differentiable<A>>),
} }
impl<A> Display for DifferentiableHidden<A> impl<A> Display for Differentiable<A>
where where
A: Display, A: Display,
{ {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
DifferentiableHidden::Scalar(s) => f.write_fmt(format_args!("{}", s)), Differentiable::Scalar(s) => f.write_fmt(format_args!("{}", s)),
DifferentiableHidden::Vector(v) => { Differentiable::Vector(v) => {
f.write_char('[')?; f.write_char('[')?;
for v in v.iter() { for v in v.iter() {
f.write_fmt(format_args!("{}", v))?; f.write_fmt(format_args!("{}", v))?;
@@ -71,32 +71,32 @@ where
} }
} }
impl<A> DifferentiableHidden<A> { impl<A> Differentiable<A> {
fn map<B, F>(&self, f: &mut F) -> DifferentiableHidden<B> pub fn map<B, F>(&self, f: &mut F) -> Differentiable<B>
where where
F: FnMut(Scalar<A>) -> Scalar<B>, F: FnMut(Scalar<A>) -> Scalar<B>,
A: Clone, A: Clone,
{ {
match self { match self {
DifferentiableHidden::Scalar(a) => DifferentiableHidden::Scalar(f(a.clone())), Differentiable::Scalar(a) => Differentiable::Scalar(f(a.clone())),
DifferentiableHidden::Vector(slice) => { Differentiable::Vector(slice) => {
DifferentiableHidden::Vector(slice.iter().map(|x| x.map(f)).collect()) Differentiable::Vector(slice.iter().map(|x| x.map(f)).collect())
} }
} }
} }
fn map2<B, C, F>(&self, other: &DifferentiableHidden<B>, f: &F) -> DifferentiableHidden<C> pub fn map2<B, C, F>(&self, other: &Differentiable<B>, f: &F) -> Differentiable<C>
where where
F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>, F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>,
A: Clone, A: Clone,
B: Clone, B: Clone,
{ {
match (self, other) { match (self, other) {
(DifferentiableHidden::Scalar(a), DifferentiableHidden::Scalar(b)) => { (Differentiable::Scalar(a), Differentiable::Scalar(b)) => {
DifferentiableHidden::Scalar(f(a, b)) Differentiable::Scalar(f(a, b))
} }
(DifferentiableHidden::Vector(slice_a), DifferentiableHidden::Vector(slice_b)) => { (Differentiable::Vector(slice_a), Differentiable::Vector(slice_b)) => {
DifferentiableHidden::Vector( Differentiable::Vector(
slice_a slice_a
.iter() .iter()
.zip(slice_b.iter()) .zip(slice_b.iter())
@@ -108,20 +108,69 @@ impl<A> DifferentiableHidden<A> {
} }
} }
fn of_slice(input: &[A]) -> DifferentiableHidden<A> fn of_slice<T>(input: T) -> Differentiable<A>
where where
A: Clone, A: Clone,
T: AsRef<[A]>,
{ {
DifferentiableHidden::Vector( Differentiable::Vector(
input input
.as_ref()
.iter() .iter()
.map(|v| DifferentiableHidden::Scalar(Scalar::Number((*v).clone(), None))) .map(|v| Differentiable::Scalar(Scalar::Number((*v).clone(), None)))
.collect(), .collect(),
) )
} }
pub fn rank(&self) -> usize {
match self {
Differentiable::Scalar(_) => 0,
Differentiable::Vector(v) => v[0].rank() + 1,
}
}
pub fn attach_rank<const RANK: usize>(
self: Differentiable<A>,
) -> Option<RankedDifferentiable<A, RANK>> {
if self.rank() == RANK {
Some(RankedDifferentiable { contents: self })
} else {
None
}
}
} }
impl<A> DifferentiableHidden<A> impl<A> Differentiable<A> {
pub fn into_scalar(self) -> Scalar<A> {
match self {
Differentiable::Scalar(s) => s,
Differentiable::Vector(_) => panic!("not a scalar"),
}
}
pub fn into_vector(self) -> Vec<Differentiable<A>> {
match self {
Differentiable::Scalar(_) => panic!("not a vector"),
Differentiable::Vector(v) => v,
}
}
pub fn borrow_scalar(&self) -> &Scalar<A> {
match self {
Differentiable::Scalar(s) => s,
Differentiable::Vector(_) => panic!("not a scalar"),
}
}
pub fn borrow_vector(&self) -> &Vec<Differentiable<A>> {
match self {
Differentiable::Scalar(_) => panic!("not a vector"),
Differentiable::Vector(v) => v,
}
}
}
impl<A> Differentiable<A>
where where
A: Clone A: Clone
+ Eq + Eq
@@ -134,7 +183,7 @@ where
+ One + One
+ Neg<Output = A>, + Neg<Output = A>,
{ {
fn accumulate_gradients_vec(v: &[DifferentiableHidden<A>], acc: &mut HashMap<Scalar<A>, A>) { fn accumulate_gradients_vec(v: &[Differentiable<A>], acc: &mut HashMap<Scalar<A>, A>) {
for v in v.iter().rev() { for v in v.iter().rev() {
v.accumulate_gradients(acc); v.accumulate_gradients(acc);
} }
@@ -142,33 +191,36 @@ where
fn accumulate_gradients(&self, acc: &mut HashMap<Scalar<A>, A>) { fn accumulate_gradients(&self, acc: &mut HashMap<Scalar<A>, A>) {
match self { match self {
DifferentiableHidden::Scalar(y) => { Differentiable::Scalar(y) => {
let k = y.clone_link(); let k = y.clone_link();
k.invoke(y, A::one(), acc); k.invoke(y, A::one(), acc);
} }
DifferentiableHidden::Vector(y) => { Differentiable::Vector(y) => Differentiable::accumulate_gradients_vec(y, acc),
DifferentiableHidden::accumulate_gradients_vec(y, acc)
}
} }
} }
fn grad_once(self, wrt: &DifferentiableHidden<A>) -> DifferentiableHidden<A> { fn grad_once<const PARAM_NUM: usize>(
self,
wrt: [Differentiable<A>; PARAM_NUM],
) -> [Differentiable<A>; PARAM_NUM] {
let mut acc = HashMap::new(); let mut acc = HashMap::new();
self.accumulate_gradients(&mut acc); self.accumulate_gradients(&mut acc);
wrt.map(&mut |d| match acc.get(&d) { wrt.map(|wrt| {
None => Scalar::Number(A::zero(), None), wrt.map(&mut |d| match acc.get(&d) {
Some(x) => Scalar::Number(x.clone(), None), None => Scalar::Number(A::zero(), None),
Some(x) => Scalar::Number(x.clone(), None),
})
}) })
} }
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Differentiable<A, const RANK: usize> { pub struct RankedDifferentiable<A, const RANK: usize> {
contents: DifferentiableHidden<A>, contents: Differentiable<A>,
} }
impl<A, const RANK: usize> Display for Differentiable<A, RANK> impl<A, const RANK: usize> Display for RankedDifferentiable<A, RANK>
where where
A: Display, A: Display,
{ {
@@ -177,123 +229,161 @@ where
} }
} }
pub fn of_scalar<A>(s: Scalar<A>) -> Differentiable<A, 0> { impl<A> RankedDifferentiable<A, 0> {
Differentiable { pub fn to_scalar(self) -> Scalar<A> {
contents: DifferentiableHidden::Scalar(s), match self.contents {
} Differentiable::Scalar(s) => s,
} Differentiable::Vector(_) => panic!("not a scalar despite teq that we're a scalar"),
pub fn to_scalar<A>(s: Differentiable<A, 0>) -> Scalar<A> {
match s.contents {
DifferentiableHidden::Scalar(s) => s,
DifferentiableHidden::Vector(_) => panic!("not a vector"),
}
}
pub fn of_slice<A>(input: &[A]) -> Differentiable<A, 1>
where
A: Clone,
{
Differentiable {
contents: DifferentiableHidden::of_slice(input),
}
}
impl<A, const RANK: usize> Differentiable<A, RANK> {
pub fn of_vector(s: Vec<Differentiable<A, RANK>>) -> Differentiable<A, { RANK + 1 }> {
Differentiable {
contents: DifferentiableHidden::Vector(s.into_iter().map(|v| v.contents).collect()),
} }
} }
pub fn map<B, F>(s: Differentiable<A, RANK>, f: &mut F) -> Differentiable<B, RANK> pub fn of_scalar(s: Scalar<A>) -> RankedDifferentiable<A, 0> {
RankedDifferentiable {
contents: Differentiable::Scalar(s),
}
}
}
impl<A> RankedDifferentiable<A, 1> {
pub fn of_slice<T>(input: T) -> RankedDifferentiable<A, 1>
where
A: Clone,
T: AsRef<[A]>,
{
RankedDifferentiable {
contents: Differentiable::of_slice(input),
}
}
}
impl<A> RankedDifferentiable<A, 2> {
pub fn of_slice_2<T, const N: usize>(input: &[T]) -> RankedDifferentiable<A, 2>
where
A: Clone,
T: AsRef<[A]>,
{
let v = input
.iter()
.map(|x| Differentiable::of_slice(x))
.collect::<Vec<_>>();
RankedDifferentiable {
contents: Differentiable::Vector(v),
}
}
}
impl<A, const RANK: usize> RankedDifferentiable<A, RANK> {
pub fn to_unranked(self) -> Differentiable<A> {
self.contents
}
pub fn to_unranked_borrow(&self) -> &Differentiable<A> {
&self.contents
}
pub fn of_vector(
s: Vec<RankedDifferentiable<A, RANK>>,
) -> RankedDifferentiable<A, { RANK + 1 }> {
RankedDifferentiable {
contents: Differentiable::Vector(s.into_iter().map(|v| v.contents).collect()),
}
}
pub fn map<B, F>(
self: RankedDifferentiable<A, RANK>,
f: &mut F,
) -> RankedDifferentiable<B, RANK>
where where
F: FnMut(Scalar<A>) -> Scalar<B>, F: FnMut(Scalar<A>) -> Scalar<B>,
A: Clone, A: Clone,
{ {
Differentiable { RankedDifferentiable {
contents: DifferentiableHidden::map(&s.contents, f), contents: Differentiable::map(&self.contents, f),
} }
} }
pub fn map2<B, C, F>( pub fn map2<B, C, F>(
self: &Differentiable<A, RANK>, self: &RankedDifferentiable<A, RANK>,
other: &Differentiable<B, RANK>, other: &RankedDifferentiable<B, RANK>,
f: &F, f: &F,
) -> Differentiable<C, RANK> ) -> RankedDifferentiable<C, RANK>
where where
F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>, F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>,
A: Clone, A: Clone,
B: Clone, B: Clone,
{ {
Differentiable { RankedDifferentiable {
contents: DifferentiableHidden::map2(&self.contents, &other.contents, f), contents: Differentiable::map2(&self.contents, &other.contents, f),
} }
} }
pub fn to_vector(s: Differentiable<A, { RANK + 1 }>) -> Vec<Differentiable<A, RANK>> { pub fn to_vector(
match s.contents { self: RankedDifferentiable<A, RANK>,
DifferentiableHidden::Scalar(_) => panic!("not a scalar"), ) -> Vec<RankedDifferentiable<A, { RANK - 1 }>> {
DifferentiableHidden::Vector(v) => v match self.contents {
Differentiable::Scalar(_) => panic!("not a scalar"),
Differentiable::Vector(v) => v
.into_iter() .into_iter()
.map(|v| Differentiable { contents: v }) .map(|v| RankedDifferentiable { contents: v })
.collect(), .collect(),
} }
} }
}
pub fn grad<F>(f: F, theta: &Differentiable<A, RANK>) -> Differentiable<A, RANK> pub fn grad<A, F, const RANK: usize, const PARAM_RANK: usize>(
where f: F,
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>, theta: &[Differentiable<A>; PARAM_RANK],
A: Clone ) -> [Differentiable<A>; PARAM_RANK]
+ Hash where
+ AddAssign F: Fn(&[Differentiable<A>; PARAM_RANK]) -> RankedDifferentiable<A, RANK>,
+ Mul<Output = A> A: ?Sized
+ Exp + Clone
+ Div<Output = A> + Hash
+ Zero + AddAssign
+ One + Mul<Output = A>
+ Neg<Output = A> + Exp
+ Eq, + Div<Output = A>
{ + Zero
let mut i = 0usize; + One
let wrt = theta.contents.map(&mut |x| { + Neg<Output = A>
+ Eq,
{
let mut i = 0usize;
let wrt = theta.each_ref().map(|theta| {
theta.map(&mut |x| {
let result = Scalar::truncate_dual(x, Some(i)); let result = Scalar::truncate_dual(x, Some(i));
i += 1; i += 1;
result result
}); })
let after_f = f(Differentiable { });
contents: wrt.clone(), let after_f = f(&wrt);
}); Differentiable::grad_once(after_f.contents, wrt)
Differentiable {
contents: DifferentiableHidden::grad_once(after_f.contents, &wrt),
}
}
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use ordered_float::NotNan; use ordered_float::NotNan;
use crate::loss::{l2_loss_2, predict_line_2}; use crate::loss::{l2_loss_2, predict_line_2_unranked};
use super::*; use super::*;
fn extract_scalar<'a, A>(d: &'a DifferentiableHidden<A>) -> &'a A { fn extract_scalar<'a, A>(d: &'a Differentiable<A>) -> &'a A {
match d { match d {
DifferentiableHidden::Scalar(a) => &(a.real_part()), Differentiable::Scalar(a) => &(a.real_part()),
DifferentiableHidden::Vector(_) => panic!("not a scalar"), Differentiable::Vector(_) => panic!("not a scalar"),
} }
} }
#[test] #[test]
fn test_map() { fn test_map() {
let v = DifferentiableHidden::Vector( let v = Differentiable::Vector(
vec![ vec![
DifferentiableHidden::Scalar(Scalar::Number( Differentiable::Scalar(Scalar::Number(
NotNan::new(3.0).expect("3 is not NaN"), NotNan::new(3.0).expect("3 is not NaN"),
Some(0usize), Some(0usize),
)), )),
DifferentiableHidden::Scalar(Scalar::Number( Differentiable::Scalar(Scalar::Number(
NotNan::new(4.0).expect("4 is not NaN"), NotNan::new(4.0).expect("4 is not NaN"),
Some(1usize), Some(1usize),
)), )),
@@ -306,8 +396,8 @@ mod tests {
}); });
let v = match mapped { let v = match mapped {
DifferentiableHidden::Scalar(_) => panic!("Not a scalar"), Differentiable::Scalar(_) => panic!("Not a scalar"),
DifferentiableHidden::Vector(v) => v Differentiable::Vector(v) => v
.iter() .iter()
.map(|d| extract_scalar(d).clone()) .map(|d| extract_scalar(d).clone())
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
@@ -318,26 +408,27 @@ mod tests {
#[test] #[test]
fn test_autodiff() { fn test_autodiff() {
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]); let input_vec = [
RankedDifferentiable::of_scalar(Scalar::<NotNan<f64>>::zero()).contents,
RankedDifferentiable::of_scalar(Scalar::<NotNan<f64>>::zero()).contents,
];
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan")); let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan")); let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
let grad = Differentiable::grad( let grad = grad(
|x| { |x| {
Differentiable::of_vector(vec![of_scalar(l2_loss_2( RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2(
predict_line_2, predict_line_2_unranked,
of_slice(&xs), RankedDifferentiable::of_slice(&xs),
of_slice(&ys), RankedDifferentiable::of_slice(&ys),
x, x,
))]) ))])
}, },
&input_vec, &input_vec,
); );
let grad_vec: Vec<f64> = Differentiable::to_vector(grad) let grad_vec = grad
.into_iter() .map(Differentiable::into_scalar)
.map(to_scalar) .map(|x| f64::from(*x.real_part()));
.map(|x| f64::from(*x.real_part())) assert_eq!(grad_vec, [-63.0, -21.0]);
.collect();
assert_eq!(grad_vec, vec![-63.0, -21.0]);
} }
} }

View File

@@ -0,0 +1,13 @@
use std::marker::PhantomData;
pub struct ConstTeq<const A: usize, const B: usize> {
phantom_a: PhantomData<[(); A]>,
phantom_b: PhantomData<[(); B]>,
}
pub fn make<const A: usize>() -> ConstTeq<A, A> {
ConstTeq {
phantom_a: Default::default(),
phantom_b: Default::default(),
}
}

View File

@@ -1,7 +1,9 @@
#![allow(incomplete_features)] #![allow(incomplete_features)]
#![feature(generic_const_exprs)] #![feature(generic_const_exprs)]
#![feature(array_methods)]
pub mod auto_diff; pub mod auto_diff;
pub mod const_teq;
pub mod expr_syntax_tree; pub mod expr_syntax_tree;
pub mod loss; pub mod loss;
pub mod scalar; pub mod scalar;

View File

@@ -4,7 +4,7 @@ use std::{
}; };
use crate::{ use crate::{
auto_diff::{of_scalar, to_scalar, Differentiable}, auto_diff::{Differentiable, RankedDifferentiable},
scalar::Scalar, scalar::Scalar,
traits::{One, Zero}, traits::{One, Zero},
}; };
@@ -16,49 +16,61 @@ where
x.clone() * x.clone() x.clone() * x.clone()
} }
pub fn dot_2<A, const RANK: usize>( pub fn elementwise_mul<A, const RANK: usize>(
x: &Differentiable<A, RANK>, x: &RankedDifferentiable<A, RANK>,
y: &Differentiable<A, RANK>, y: &RankedDifferentiable<A, RANK>,
) -> Differentiable<A, RANK> ) -> RankedDifferentiable<A, RANK>
where
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
{
RankedDifferentiable::map2(x, y, &|x, y| x.clone() * y.clone())
}
pub fn dot_unranked<A>(x: &Differentiable<A>, y: &Differentiable<A>) -> Differentiable<A>
where where
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default, A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
{ {
Differentiable::map2(x, y, &|x, y| x.clone() * y.clone()) Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
} }
fn squared_2<A, const RANK: usize>(x: &Differentiable<A, RANK>) -> Differentiable<A, RANK> fn squared_2<A, const RANK: usize>(
x: &RankedDifferentiable<A, RANK>,
) -> RankedDifferentiable<A, RANK>
where where
A: Mul<Output = A> + Copy + Default, A: Mul<Output = A> + Copy + Default,
{ {
Differentiable::map2(x, x, &|x, y| x.clone() * y.clone()) RankedDifferentiable::map2(x, x, &|x, y| x.clone() * y.clone())
} }
fn sum_2<A>(x: Differentiable<A, 1>) -> Scalar<A> fn sum_2<A>(x: RankedDifferentiable<A, 1>) -> Scalar<A>
where where
A: Sum<A> + Clone + Add<Output = A> + Zero, A: Sum<A> + Clone + Add<Output = A> + Zero,
{ {
Differentiable::to_vector(x) RankedDifferentiable::to_vector(x)
.into_iter() .into_iter()
.map(to_scalar) .map(|x| x.to_scalar())
.sum() .sum()
} }
fn l2_norm_2<A>(prediction: &Differentiable<A, 1>, data: &Differentiable<A, 1>) -> Scalar<A> fn l2_norm_2<A>(
prediction: &RankedDifferentiable<A, 1>,
data: &RankedDifferentiable<A, 1>,
) -> Scalar<A>
where where
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero + Neg, A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero + Neg,
{ {
let diff = Differentiable::map2(prediction, data, &|x, y| x.clone() - y.clone()); let diff = RankedDifferentiable::map2(prediction, data, &|x, y| x.clone() - y.clone());
sum_2(squared_2(&diff)) sum_2(squared_2(&diff))
} }
pub fn l2_loss_2<A, F, Params>( pub fn l2_loss_2<A, F, Params, const N: usize>(
target: F, target: F,
data_xs: Differentiable<A, 1>, data_xs: RankedDifferentiable<A, N>,
data_ys: Differentiable<A, 1>, data_ys: RankedDifferentiable<A, 1>,
params: Params, params: Params,
) -> Scalar<A> ) -> Scalar<A>
where where
F: Fn(Differentiable<A, 1>, Params) -> Differentiable<A, 1>, F: Fn(RankedDifferentiable<A, N>, Params) -> RankedDifferentiable<A, 1>,
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero, A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero,
{ {
let pred_ys = target(data_xs, params); let pred_ys = target(data_xs, params);
@@ -66,42 +78,143 @@ where
} }
pub fn predict_line_2<A>( pub fn predict_line_2<A>(
xs: Differentiable<A, 1>, xs: RankedDifferentiable<A, 1>,
theta: Differentiable<A, 1>, theta: &[RankedDifferentiable<A, 0>; 2],
) -> Differentiable<A, 1> ) -> RankedDifferentiable<A, 1>
where where
A: Mul<Output = A> + Add<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + One + Zero, A: Mul<Output = A> + Add<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + One + Zero,
{ {
let xs = Differentiable::to_vector(xs) let xs = RankedDifferentiable::to_vector(xs)
.into_iter() .into_iter()
.map(|v| to_scalar(v)); .map(|v| v.to_scalar());
let mut result = vec![]; let mut result = vec![];
for x in xs { for x in xs {
let left_arg = Differentiable::of_vector(vec![ let left_arg = RankedDifferentiable::of_vector(vec![
of_scalar(x.clone()), RankedDifferentiable::of_scalar(x.clone()),
of_scalar(<Scalar<A> as One>::one()), RankedDifferentiable::of_scalar(<Scalar<A> as One>::one()),
]); ]);
let dotted = of_scalar( let dotted = RankedDifferentiable::of_scalar(
Differentiable::to_vector(dot_2(&left_arg, &theta)) RankedDifferentiable::to_vector(elementwise_mul(
.iter() &left_arg,
.map(|x| to_scalar((*x).clone())) &RankedDifferentiable::of_vector(theta.to_vec()),
.sum(), ))
.iter()
.map(|x| (*x).clone().to_scalar())
.sum(),
); );
result.push(dotted); result.push(dotted);
} }
Differentiable::of_vector(result) RankedDifferentiable::of_vector(result)
}
pub fn predict_line_2_unranked<A>(
xs: RankedDifferentiable<A, 1>,
theta: &[Differentiable<A>; 2],
) -> RankedDifferentiable<A, 1>
where
A: Mul<Output = A> + Add<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + One + Zero,
{
let xs = RankedDifferentiable::to_vector(xs)
.into_iter()
.map(|v| v.to_scalar());
let mut result = vec![];
for x in xs {
let left_arg = RankedDifferentiable::of_vector(vec![
RankedDifferentiable::of_scalar(x.clone()),
RankedDifferentiable::of_scalar(<Scalar<A> as One>::one()),
]);
let dotted = RankedDifferentiable::of_scalar(
dot_unranked(
left_arg.to_unranked_borrow(),
&Differentiable::Vector(theta.to_vec()),
)
.into_vector()
.into_iter()
.map(|x| x.into_scalar())
.sum(),
);
result.push(dotted);
}
RankedDifferentiable::of_vector(result)
} }
pub fn predict_quadratic<A>( pub fn predict_quadratic<A>(
xs: Differentiable<A, 1>, xs: RankedDifferentiable<A, 1>,
theta: Differentiable<A, 1>, theta: &[RankedDifferentiable<A, 0>; 3],
) -> Differentiable<A, 1> ) -> RankedDifferentiable<A, 1>
where where
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone, A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
{ {
Differentiable::map(xs, &mut |x| { RankedDifferentiable::map(xs, &mut |x| {
let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)]; let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
let x_powers = Differentiable::of_vector(x_powers.into_iter().map(of_scalar).collect()); let x_powers = RankedDifferentiable::of_vector(
sum_2(dot_2(&x_powers, &theta)) x_powers
.into_iter()
.map(RankedDifferentiable::of_scalar)
.collect(),
);
RankedDifferentiable::to_vector(elementwise_mul(
&x_powers,
&RankedDifferentiable::of_vector(theta.to_vec()),
))
.into_iter()
.map(|x| x.to_scalar())
.sum()
}) })
} }
pub fn predict_quadratic_unranked<A>(
xs: RankedDifferentiable<A, 1>,
theta: &[Differentiable<A>; 3],
) -> RankedDifferentiable<A, 1>
where
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
{
RankedDifferentiable::map(xs, &mut |x| {
let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
let x_powers = RankedDifferentiable::of_vector(
x_powers
.into_iter()
.map(RankedDifferentiable::of_scalar)
.collect(),
);
dot_unranked(
x_powers.to_unranked_borrow(),
&Differentiable::Vector(theta.to_vec()),
)
.attach_rank::<1>()
.expect("wanted a tensor1")
.to_vector()
.into_iter()
.map(|x| x.to_scalar())
.sum()
})
}
// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
pub fn predict_plane<A>(
xs: RankedDifferentiable<A, 2>,
theta: &[Differentiable<A>; 2],
) -> RankedDifferentiable<A, 1>
where
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
{
if theta[0].rank() != 1 {
panic!("theta0 must be of rank 1, got: {}", theta[0].rank())
}
let theta0 = RankedDifferentiable::of_vector(
theta[0]
.borrow_vector()
.iter()
.map(|v| RankedDifferentiable::of_scalar(v.borrow_scalar().clone()))
.collect::<Vec<_>>(),
);
let theta1 = theta[1].borrow_scalar().clone();
let dotted: Vec<_> = xs
.to_vector()
.into_iter()
.map(|point| sum_2(elementwise_mul(&theta0, &point)))
.map(|x| RankedDifferentiable::of_scalar(x + theta1.clone()))
.collect();
RankedDifferentiable::of_vector(dotted)
}

View File

@@ -9,4 +9,3 @@ edition = "2021"
immutable-chunkmap = "1.0.5" immutable-chunkmap = "1.0.5"
ordered-float = "3.6.0" ordered-float = "3.6.0"
little_learner = { path = "../little_learner" } little_learner = { path = "../little_learner" }
arrayvec = "0.7.2"

View File

@@ -6,9 +6,9 @@ mod with_tensor;
use core::hash::Hash; use core::hash::Hash;
use std::ops::{Add, AddAssign, Div, Mul, Neg}; use std::ops::{Add, AddAssign, Div, Mul, Neg};
use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable}; use little_learner::auto_diff::{grad, Differentiable, RankedDifferentiable};
use little_learner::loss::{l2_loss_2, predict_quadratic}; use little_learner::loss::{l2_loss_2, predict_plane};
use little_learner::scalar::Scalar; use little_learner::scalar::Scalar;
use little_learner::traits::{Exp, One, Zero}; use little_learner::traits::{Exp, One, Zero};
use ordered_float::NotNan; use ordered_float::NotNan;
@@ -24,16 +24,16 @@ where
v v
} }
struct GradientDescentHyper<A, const RANK: usize> { struct GradientDescentHyper<A> {
learning_rate: A, learning_rate: A,
iterations: u32, iterations: u32,
} }
fn gradient_descent_step<A, F, const RANK: usize>( fn gradient_descent_step<A, F, const RANK: usize, const PARAM_NUM: usize>(
f: &F, f: &F,
theta: Differentiable<A, RANK>, theta: [Differentiable<A>; PARAM_NUM],
params: &GradientDescentHyper<A, RANK>, params: &GradientDescentHyper<A>,
) -> Differentiable<A, RANK> ) -> [Differentiable<A>; PARAM_NUM]
where where
A: Clone A: Clone
+ Mul<Output = A> + Mul<Output = A>
@@ -46,17 +46,33 @@ where
+ One + One
+ Eq + Eq
+ Exp, + Exp,
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>, F: Fn(&[Differentiable<A>; PARAM_NUM]) -> RankedDifferentiable<A, RANK>,
{ {
let delta = Differentiable::grad(f, &theta); let delta = grad(f, &theta);
Differentiable::map2(&theta, &delta, &|theta, delta| { let mut i = 0;
(*theta).clone() - (Scalar::make((params.learning_rate).clone()) * (*delta).clone()) theta.map(|theta| {
let delta = &delta[i];
i += 1;
// For speed, you might want to truncate_dual this.
let learning_rate = Scalar::make((params.learning_rate).clone());
Differentiable::map2(
&theta,
&delta.map(&mut |s| s * learning_rate.clone()),
&|theta, delta| (*theta).clone() - (*delta).clone(),
)
}) })
} }
fn main() { fn main() {
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0]; let plane_xs = [
let ys = [2.55, 2.1, 4.35, 10.2, 18.25]; [1.0, 2.05],
[1.0, 3.0],
[2.0, 2.0],
[2.0, 3.91],
[3.0, 6.13],
[4.0, 8.09],
];
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
let hyper = GradientDescentHyper { let hyper = GradientDescentHyper {
learning_rate: NotNan::new(0.001).expect("not nan"), learning_rate: NotNan::new(0.001).expect("not nan"),
@@ -64,48 +80,63 @@ fn main() {
}; };
let iterated = { let iterated = {
let xs = xs.map(|x| NotNan::new(x).expect("not nan")); let xs = plane_xs.map(|x| {
let ys = ys.map(|x| NotNan::new(x).expect("not nan")); [
NotNan::new(x[0]).expect("not nan"),
NotNan::new(x[1]).expect("not nan"),
]
});
let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan"));
iterate( iterate(
&|theta| { &|theta| {
gradient_descent_step( gradient_descent_step(
&|x| { &|x| {
Differentiable::of_vector(vec![of_scalar(l2_loss_2( RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
predict_quadratic, l2_loss_2(
of_slice(&xs), predict_plane,
of_slice(&ys), RankedDifferentiable::of_slice_2::<_, 2>(&xs),
x, RankedDifferentiable::of_slice(ys),
))]) x,
),
)])
}, },
theta, theta,
&hyper, &hyper,
) )
}, },
of_slice(&[ [
NotNan::<f64>::zero(), RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(),
NotNan::<f64>::zero(), Differentiable::Scalar(Scalar::zero()),
NotNan::<f64>::zero(), ],
]),
hyper.iterations, hyper.iterations,
) )
}; };
println!( let [theta0, theta1] = iterated;
"After iteration: {:?}",
Differentiable::to_vector(iterated) let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
assert_eq!(
theta0
.to_vector()
.into_iter() .into_iter()
.map(|x| to_scalar(x).real_part().into_inner()) .map(|x| x.to_scalar().real_part().into_inner())
.collect::<Vec<_>>() .collect::<Vec<_>>(),
[3.97757644609063, 2.0496557321494446]
);
assert_eq!(
theta1.to_scalar().real_part().into_inner(),
5.786758464448078
); );
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use arrayvec::ArrayVec;
use little_learner::{ use little_learner::{
auto_diff::to_scalar, auto_diff::grad,
loss::{predict_line_2, square}, loss::{l2_loss_2, predict_line_2, predict_line_2_unranked, predict_quadratic_unranked},
}; };
use crate::with_tensor::{l2_loss, predict_line}; use crate::with_tensor::{l2_loss, predict_line};
@@ -116,9 +147,12 @@ mod tests {
let ys = [1.8, 1.2, 4.2, 3.3]; let ys = [1.8, 1.2, 4.2, 3.3];
let loss = l2_loss_2( let loss = l2_loss_2(
predict_line_2, predict_line_2,
of_slice(&xs), RankedDifferentiable::of_slice(&xs),
of_slice(&ys), RankedDifferentiable::of_slice(&ys),
of_slice(&[0.0, 0.0]), &[
RankedDifferentiable::of_scalar(Scalar::zero()),
RankedDifferentiable::of_scalar(Scalar::zero()),
],
); );
assert_eq!(*loss.real_part(), 33.21); assert_eq!(*loss.real_part(), 33.21);
@@ -134,29 +168,39 @@ mod tests {
#[test] #[test]
fn grad_example() { fn grad_example() {
let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]); let input_vec = [Differentiable::Scalar(Scalar::make(
NotNan::new(27.0).expect("not nan"),
))];
let grad: Vec<_> = Differentiable::to_vector(Differentiable::grad( let grad: Vec<_> = grad(
|x| Differentiable::map(x, &mut |x| square(&x)), |x| {
RankedDifferentiable::of_scalar(
x[0].borrow_scalar().clone() * x[0].borrow_scalar().clone(),
)
},
&input_vec, &input_vec,
)) )
.into_iter() .into_iter()
.map(|x| to_scalar(x).real_part().into_inner()) .map(|x| x.into_scalar().real_part().into_inner())
.collect(); .collect();
assert_eq!(grad, [54.0]); assert_eq!(grad, [54.0]);
} }
#[test] #[test]
fn loss_gradient() { fn loss_gradient() {
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]); let zero = Scalar::<NotNan<f64>>::zero();
let input_vec = [
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
RankedDifferentiable::of_scalar(zero).to_unranked(),
];
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan")); let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan")); let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
let grad = Differentiable::grad( let grad = grad(
|x| { |x| {
Differentiable::of_vector(vec![of_scalar(l2_loss_2( RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2(
predict_line_2, predict_line_2_unranked,
of_slice(&xs), RankedDifferentiable::of_slice(&xs),
of_slice(&ys), RankedDifferentiable::of_slice(&ys),
x, x,
))]) ))])
}, },
@@ -164,9 +208,8 @@ mod tests {
); );
assert_eq!( assert_eq!(
Differentiable::to_vector(grad) grad.into_iter()
.into_iter() .map(|x| *(x.into_scalar().real_part()))
.map(|x| *(to_scalar(x).real_part()))
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
[-63.0, -21.0] [-63.0, -21.0]
); );
@@ -174,13 +217,7 @@ mod tests {
#[test] #[test]
fn test_iterate() { fn test_iterate() {
let f = |t: [i32; 3]| { let f = |t: [i32; 3]| t.map(|i| i - 3);
let mut vec = ArrayVec::<i32, 3>::new();
for i in t {
vec.push(i - 3);
}
vec.into_inner().unwrap()
};
assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]); assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]);
} }
@@ -189,6 +226,8 @@ mod tests {
let xs = [2.0, 1.0, 4.0, 3.0]; let xs = [2.0, 1.0, 4.0, 3.0];
let ys = [1.8, 1.2, 4.2, 3.3]; let ys = [1.8, 1.2, 4.2, 3.3];
let zero = Scalar::<NotNan<f64>>::zero();
let hyper = GradientDescentHyper { let hyper = GradientDescentHyper {
learning_rate: NotNan::new(0.01).expect("not nan"), learning_rate: NotNan::new(0.01).expect("not nan"),
iterations: 1000, iterations: 1000,
@@ -200,24 +239,29 @@ mod tests {
&|theta| { &|theta| {
gradient_descent_step( gradient_descent_step(
&|x| { &|x| {
Differentiable::of_vector(vec![of_scalar(l2_loss_2( RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
predict_line_2, l2_loss_2(
of_slice(&xs), predict_line_2_unranked,
of_slice(&ys), RankedDifferentiable::of_slice(&xs),
x, RankedDifferentiable::of_slice(&ys),
))]) x,
),
)])
}, },
theta, theta,
&hyper, &hyper,
) )
}, },
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]), [
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
RankedDifferentiable::of_scalar(zero).to_unranked(),
],
hyper.iterations, hyper.iterations,
) )
}; };
let iterated = Differentiable::to_vector(iterated) let iterated = iterated
.into_iter() .into_iter()
.map(|x| to_scalar(x).real_part().into_inner()) .map(|x| x.into_scalar().real_part().into_inner())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]); assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
@@ -228,6 +272,8 @@ mod tests {
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0]; let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
let ys = [2.55, 2.1, 4.35, 10.2, 18.25]; let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
let zero = Scalar::<NotNan<f64>>::zero();
let hyper = GradientDescentHyper { let hyper = GradientDescentHyper {
learning_rate: NotNan::new(0.001).expect("not nan"), learning_rate: NotNan::new(0.001).expect("not nan"),
iterations: 1000, iterations: 1000,
@@ -240,35 +286,104 @@ mod tests {
&|theta| { &|theta| {
gradient_descent_step( gradient_descent_step(
&|x| { &|x| {
Differentiable::of_vector(vec![of_scalar(l2_loss_2( RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
predict_quadratic, l2_loss_2(
of_slice(&xs), predict_quadratic_unranked,
of_slice(&ys), RankedDifferentiable::of_slice(&xs),
x, RankedDifferentiable::of_slice(&ys),
))]) x,
),
)])
}, },
theta, theta,
&hyper, &hyper,
) )
}, },
of_slice(&[ [
NotNan::<f64>::zero(), RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
NotNan::<f64>::zero(), RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
NotNan::<f64>::zero(), RankedDifferentiable::of_scalar(zero).to_unranked(),
]), ],
hyper.iterations, hyper.iterations,
) )
}; };
let iterated = Differentiable::to_vector(iterated) let iterated = iterated
.into_iter() .into_iter()
.map(|x| to_scalar(x).real_part().into_inner()) .map(|x| x.into_scalar().real_part().into_inner())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
println!("{:?}", iterated);
assert_eq!( assert_eq!(
iterated, iterated,
[2.0546423148479684, 0.9928606519360353, 1.4787394427094362] [2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
); );
} }
#[test]
fn optimise_plane() {
let plane_xs = [
[1.0, 2.05],
[1.0, 3.0],
[2.0, 2.0],
[2.0, 3.91],
[3.0, 6.13],
[4.0, 8.09],
];
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
let hyper = GradientDescentHyper {
learning_rate: NotNan::new(0.001).expect("not nan"),
iterations: 1000,
};
let iterated = {
let xs = plane_xs.map(|x| {
[
NotNan::new(x[0]).expect("not nan"),
NotNan::new(x[1]).expect("not nan"),
]
});
let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan"));
iterate(
&|theta| {
gradient_descent_step(
&|x| {
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
l2_loss_2(
predict_plane,
RankedDifferentiable::of_slice_2::<_, 2>(&xs),
RankedDifferentiable::of_slice(ys),
x,
),
)])
},
theta,
&hyper,
)
},
[
RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(),
Differentiable::Scalar(Scalar::zero()),
],
hyper.iterations,
)
};
let [theta0, theta1] = iterated;
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
assert_eq!(
theta0
.to_vector()
.into_iter()
.map(|x| x.to_scalar().real_part().into_inner())
.collect::<Vec<_>>(),
[3.97757644609063, 2.0496557321494446]
);
assert_eq!(
theta1.to_scalar().real_part().into_inner(),
5.786758464448078
);
}
} }