Add docs, delete old code, truncate scalars where possible (#21)

This commit is contained in:
Patrick Stevens
2023-05-07 23:57:58 +01:00
committed by GitHub
parent e42cfa22db
commit deb0ec67ca
15 changed files with 349 additions and 719 deletions

View File

@@ -84,11 +84,11 @@ where
{ {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
DifferentiableContents::Scalar(s, _) => f.write_fmt(format_args!("{}", s)), DifferentiableContents::Scalar(s, _) => f.write_fmt(format_args!("{s}")),
DifferentiableContents::Vector(v, _rank) => { DifferentiableContents::Vector(v, _rank) => {
f.write_char('[')?; f.write_char('[')?;
for v in v.iter() { for v in v.iter() {
f.write_fmt(format_args!("{}", v))?; f.write_fmt(format_args!("{v}"))?;
f.write_char(',')?; f.write_char(',')?;
} }
f.write_char(']') f.write_char(']')
@@ -159,6 +159,12 @@ impl<A, Tag> DifferentiableContents<A, Tag> {
} }
} }
/// This function does *not* check that its inputs are of exactly the same shape, though it
/// does check ranks. If you have two vectors of different lengths, you will silently get the
/// shorter one.
///
/// # Panics
/// Panics if the two inputs have different shapes (e.g. if they have different ranks).
fn map2<B, C, Tag2, Tag3, F>( fn map2<B, C, Tag2, Tag3, F>(
&self, &self,
other: &DifferentiableContents<B, Tag2>, other: &DifferentiableContents<B, Tag2>,
@@ -180,9 +186,7 @@ impl<A, Tag> DifferentiableContents<A, Tag> {
DifferentiableContents::Vector(slice_a, rank_a), DifferentiableContents::Vector(slice_a, rank_a),
DifferentiableContents::Vector(slice_b, rank_b), DifferentiableContents::Vector(slice_b, rank_b),
) => { ) => {
if rank_a != rank_b { assert_eq!(rank_a, rank_b, "Unexpectedly different ranks in map2");
panic!("Unexpectedly different ranks in map2");
}
DifferentiableContents::Vector( DifferentiableContents::Vector(
slice_a slice_a
.iter() .iter()
@@ -367,10 +371,11 @@ impl<A, Tag> DifferentiableTagged<A, Tag> {
} }
} }
/// # Panics
/// Panics if the input is empty (otherwise we can't determine a rank).
#[must_use]
pub fn of_vec(input: Vec<DifferentiableTagged<A, Tag>>) -> DifferentiableTagged<A, Tag> { pub fn of_vec(input: Vec<DifferentiableTagged<A, Tag>>) -> DifferentiableTagged<A, Tag> {
if input.is_empty() { assert!(!input.is_empty(), "Can't make an empty tensor");
panic!("Can't make an empty tensor");
}
let rank = input[0].rank(); let rank = input[0].rank();
DifferentiableTagged { DifferentiableTagged {
contents: DifferentiableContents::Vector(input, 1 + rank), contents: DifferentiableContents::Vector(input, 1 + rank),
@@ -413,7 +418,7 @@ where
k.invoke(y, A::one(), acc); k.invoke(y, A::one(), acc);
} }
DifferentiableContents::Vector(y, _rank) => { DifferentiableContents::Vector(y, _rank) => {
DifferentiableContents::accumulate_gradients_vec(y, acc) DifferentiableContents::accumulate_gradients_vec(y, acc);
} }
} }
} }
@@ -543,6 +548,7 @@ impl<A, Tag, const RANK: usize> RankedDifferentiableTagged<A, Tag, RANK> {
&self.contents &self.contents
} }
#[must_use]
pub fn of_vector( pub fn of_vector(
s: Vec<RankedDifferentiableTagged<A, Tag, RANK>>, s: Vec<RankedDifferentiableTagged<A, Tag, RANK>>,
) -> RankedDifferentiableTagged<A, Tag, { RANK + 1 }> { ) -> RankedDifferentiableTagged<A, Tag, { RANK + 1 }> {
@@ -683,8 +689,7 @@ mod tests {
#[test] #[test]
fn test_map() { fn test_map() {
let v = DifferentiableTagged::of_vec( let v = DifferentiableTagged::of_vec(vec![
vec![
Differentiable::of_scalar(Scalar::Number( Differentiable::of_scalar(Scalar::Number(
NotNan::new(3.0).expect("3 is not NaN"), NotNan::new(3.0).expect("3 is not NaN"),
Some(0usize), Some(0usize),
@@ -693,9 +698,7 @@ mod tests {
NotNan::new(4.0).expect("4 is not NaN"), NotNan::new(4.0).expect("4 is not NaN"),
Some(1usize), Some(1usize),
)), )),
] ]);
.into(),
);
let mapped = v.map(&mut |x: Scalar<NotNan<f64>>| match x { let mapped = v.map(&mut |x: Scalar<NotNan<f64>>| match x {
Scalar::Number(i, n) => Scalar::Number(i + NotNan::new(1.0).expect("1 is not NaN"), n), Scalar::Number(i, n) => Scalar::Number(i + NotNan::new(1.0).expect("1 is not NaN"), n),
Scalar::Dual(_, _) => panic!("Not hit"), Scalar::Dual(_, _) => panic!("Not hit"),
@@ -704,7 +707,7 @@ mod tests {
let v = mapped let v = mapped
.into_vector() .into_vector()
.iter() .iter()
.map(|d| extract_scalar(d).clone()) .map(|d| *extract_scalar(d))
.collect::<Vec<_>>(); .collect::<Vec<_>>();
assert_eq!(v, [4.0, 5.0]); assert_eq!(v, [4.0, 5.0]);

View File

@@ -1,13 +0,0 @@
use std::marker::PhantomData;
pub struct ConstTeq<const A: usize, const B: usize> {
phantom_a: PhantomData<[(); A]>,
phantom_b: PhantomData<[(); B]>,
}
pub fn make<const A: usize>() -> ConstTeq<A, A> {
ConstTeq {
phantom_a: Default::default(),
phantom_b: Default::default(),
}
}

View File

@@ -1,155 +0,0 @@
use immutable_chunkmap::map;
use std::ops::{Add, Mul};
/*
An untyped syntax tree for an expression whose constants are all of type `A`.
*/
#[derive(Clone, Debug)]
pub enum Expr<A> {
Const(A),
Sum(Box<Expr<A>>, Box<Expr<A>>),
Variable(u32),
// The first `Expr` here is a function, which may reference the input variable `Variable(i)`.
// For example, `(fun x y -> x + y) 3 4` is expressed as:
// Apply(0, Apply(1, Sum(Variable(0), Variable(1)), Const(4)), Const(3))
Apply(u32, Box<Expr<A>>, Box<Expr<A>>),
Mul(Box<Expr<A>>, Box<Expr<A>>),
}
impl<A> Expr<A> {
fn eval_inner<const SIZE: usize>(e: &Expr<A>, ctx: &map::Map<u32, A, SIZE>) -> A
where
A: Clone + Add<Output = A> + Mul<Output = A>,
{
match &e {
Expr::Const(x) => x.clone(),
Expr::Sum(x, y) => Expr::eval_inner(x, ctx) + Expr::eval_inner(y, ctx),
Expr::Variable(id) => ctx
.get(id)
.unwrap_or_else(|| panic!("No binding found for free variable {}", id))
.clone(),
Expr::Apply(variable, func, arg) => {
let arg = Expr::eval_inner(arg, ctx);
let (updated_context, _) = ctx.insert(*variable, arg);
Expr::eval_inner(func, &updated_context)
}
Expr::Mul(x, y) => Expr::eval_inner(x, ctx) * Expr::eval_inner(y, ctx),
}
}
pub fn eval<const MAX_VAR_NUM: usize>(e: &Expr<A>) -> A
where
A: Clone + Add<Output = A> + Mul<Output = A>,
{
Expr::eval_inner(e, &map::Map::<u32, A, MAX_VAR_NUM>::new())
}
pub fn apply(var: u32, f: Expr<A>, arg: Expr<A>) -> Expr<A> {
Expr::Apply(var, Box::new(f), Box::new(arg))
}
pub fn differentiate(one: &A, zero: &A, var: u32, f: &Expr<A>) -> Expr<A>
where
A: Clone,
{
match f {
Expr::Const(_) => Expr::Const(zero.clone()),
Expr::Sum(x, y) => {
Expr::differentiate(one, zero, var, x) + Expr::differentiate(one, zero, var, y)
}
Expr::Variable(i) => {
if *i == var {
Expr::Const(one.clone())
} else {
Expr::Const(zero.clone())
}
}
Expr::Mul(x, y) => {
Expr::Mul(
Box::new(Expr::differentiate(one, zero, var, x.as_ref())),
(*y).clone(),
) + Expr::Mul(
Box::new(Expr::differentiate(one, zero, var, y.as_ref())),
(*x).clone(),
)
}
Expr::Apply(new_var, func, expr) => {
if *new_var == var {
panic!(
"cannot differentiate with respect to variable {} that's been assigned",
var
)
}
let expr_deriv = Expr::differentiate(one, zero, var, expr);
Expr::mul(
expr_deriv,
Expr::Apply(
*new_var,
Box::new(Expr::differentiate(one, zero, *new_var, func)),
(*expr).clone(),
),
)
}
}
}
}
impl<A> Add for Expr<A> {
type Output = Expr<A>;
fn add(self: Expr<A>, y: Expr<A>) -> Expr<A> {
Expr::Sum(Box::new(self), Box::new(y))
}
}
impl<A> Mul for Expr<A> {
type Output = Expr<A>;
fn mul(self: Expr<A>, y: Expr<A>) -> Expr<A> {
Expr::Mul(Box::new(self), Box::new(y))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expr() {
let expr = Expr::apply(
0,
Expr::apply(1, Expr::Variable(0) + Expr::Variable(1), Expr::Const(4)),
Expr::Const(3),
);
assert_eq!(Expr::eval::<2>(&expr), 7);
}
#[test]
fn test_derivative() {
let add_four = Expr::Variable(0) + Expr::Const(4);
let mul_five = Expr::Variable(1) * Expr::Const(5);
{
let mul_five_then_add_four = Expr::apply(0, add_four.clone(), mul_five.clone());
let mul_then_add_diff = Expr::differentiate(&1, &0, 1, &mul_five_then_add_four);
for i in 3..10 {
// (5x + 4) differentiates to 5
assert_eq!(
Expr::eval::<2>(&Expr::apply(1, mul_then_add_diff.clone(), Expr::Const(i))),
5
);
}
}
{
let add_four_then_mul_five = Expr::apply(1, mul_five.clone(), add_four.clone());
let add_then_mul_diff = Expr::differentiate(&1, &0, 0, &add_four_then_mul_five);
for i in 3..10 {
// ((x + 4) * 5) differentiates to 5
assert_eq!(
Expr::eval::<2>(&Expr::apply(0, add_then_mul_diff.clone(), Expr::Const(i))),
5
);
}
}
}
}

View File

@@ -1,6 +1,7 @@
use crate::auto_diff::{grad, Differentiable, RankedDifferentiable}; use crate::auto_diff::{grad, Differentiable, RankedDifferentiable};
use crate::hyper::BaseGradientDescentHyper; use crate::hyper;
use crate::loss::{l2_loss_2, Predictor}; use crate::loss::l2_loss_2;
use crate::predictor::Predictor;
use crate::sample::sample2; use crate::sample::sample2;
use crate::traits::NumLike; use crate::traits::NumLike;
use rand::Rng; use rand::Rng;
@@ -84,12 +85,12 @@ where
G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable<T, IN_SIZE>, G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable<T, IN_SIZE>,
Inflated: Clone, Inflated: Clone,
ImmutableHyper: Clone, ImmutableHyper: Clone,
Hyper: Into<BaseGradientDescentHyper<T, R>>, Hyper: Into<hyper::BaseGradientDescent<R>>,
H: FnOnce(&Hyper) -> ImmutableHyper, H: FnOnce(&Hyper) -> ImmutableHyper,
R: Rng, R: Rng,
{ {
let sub_hypers = to_immutable(&hyper); let sub_hypers = to_immutable(&hyper);
let mut gradient_hyper: BaseGradientDescentHyper<T, R> = hyper.into(); let mut gradient_hyper: hyper::BaseGradientDescent<R> = hyper.into();
let iterations = gradient_hyper.iterations; let iterations = gradient_hyper.iterations;
let out = iterate( let out = iterate(
|theta| { |theta| {
@@ -131,12 +132,10 @@ where
mod tests { mod tests {
use super::*; use super::*;
use crate::auto_diff::RankedDifferentiableTagged; use crate::auto_diff::RankedDifferentiableTagged;
use crate::hyper::{RmsGradientDescentHyper, VelocityGradientDescentHyper}; use crate::hyper;
use crate::loss::{ use crate::loss::{predict_line_2_unranked, predict_plane, predict_quadratic_unranked};
naked_predictor, predict_line_2_unranked, predict_plane, predict_quadratic_unranked,
rms_predictor, velocity_predictor,
};
use crate::not_nan::{to_not_nan_1, to_not_nan_2}; use crate::not_nan::{to_not_nan_1, to_not_nan_2};
use crate::predictor;
use crate::scalar::Scalar; use crate::scalar::Scalar;
use crate::traits::Zero; use crate::traits::Zero;
use ordered_float::NotNan; use ordered_float::NotNan;
@@ -156,7 +155,7 @@ mod tests {
let zero = Scalar::<NotNan<f64>>::zero(); let zero = Scalar::<NotNan<f64>>::zero();
let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.01).expect("not nan"), 1000); let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.01).expect("not nan"), 1000);
let iterated = { let iterated = {
let xs = to_not_nan_1(xs); let xs = to_not_nan_1(xs);
let ys = to_not_nan_1(ys); let ys = to_not_nan_1(ys);
@@ -170,8 +169,8 @@ mod tests {
|b| RankedDifferentiable::of_slice(b), |b| RankedDifferentiable::of_slice(b),
&ys, &ys,
zero_params, zero_params,
naked_predictor(predict_line_2_unranked), predictor::naked(predict_line_2_unranked),
BaseGradientDescentHyper::to_immutable, hyper::NakedGradientDescent::to_immutable,
) )
}; };
let iterated = iterated let iterated = iterated
@@ -189,7 +188,7 @@ mod tests {
let zero = Scalar::<NotNan<f64>>::zero(); let zero = Scalar::<NotNan<f64>>::zero();
let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000);
let iterated = { let iterated = {
let xs = to_not_nan_1(xs); let xs = to_not_nan_1(xs);
@@ -205,8 +204,8 @@ mod tests {
|b| RankedDifferentiable::of_slice(b), |b| RankedDifferentiable::of_slice(b),
&ys, &ys,
zero_params, zero_params,
naked_predictor(predict_quadratic_unranked), predictor::naked(predict_quadratic_unranked),
BaseGradientDescentHyper::to_immutable, hyper::NakedGradientDescent::to_immutable,
) )
}; };
let iterated = iterated let iterated = iterated
@@ -232,7 +231,7 @@ mod tests {
#[test] #[test]
fn optimise_plane() { fn optimise_plane() {
let mut hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000); let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000);
let iterated = { let iterated = {
let xs = to_not_nan_2(PLANE_XS); let xs = to_not_nan_2(PLANE_XS);
@@ -247,8 +246,8 @@ mod tests {
RankedDifferentiable::of_slice_2::<_, 2>, RankedDifferentiable::of_slice_2::<_, 2>,
&ys, &ys,
zero_params, zero_params,
naked_predictor(predict_plane), predictor::naked(predict_plane),
BaseGradientDescentHyper::to_immutable, hyper::NakedGradientDescent::to_immutable,
) )
}; };
@@ -267,7 +266,7 @@ mod tests {
#[test] #[test]
fn optimise_plane_with_sampling() { fn optimise_plane_with_sampling() {
let rng = StdRng::seed_from_u64(314159); let rng = StdRng::seed_from_u64(314159);
let hyper = BaseGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000)
.with_rng(rng, 4); .with_rng(rng, 4);
let iterated = { let iterated = {
@@ -283,8 +282,8 @@ mod tests {
RankedDifferentiable::of_slice_2::<_, 2>, RankedDifferentiable::of_slice_2::<_, 2>,
&ys, &ys,
zero_params, zero_params,
naked_predictor(predict_plane), predictor::naked(predict_plane),
BaseGradientDescentHyper::to_immutable, hyper::NakedGradientDescent::to_immutable,
) )
}; };
@@ -322,13 +321,16 @@ mod tests {
Show[points, withBatching] Show[points, withBatching]
*/ */
assert_eq!(theta0, [3.8581694055684781, 2.2166222673968554]); assert_eq!(theta0, [3.858_169_405_568_478, 2.2166222673968554]);
assert_eq!(theta1, 5.2839863438547159); assert_eq!(theta1, 5.283_986_343_854_716);
} }
#[test] #[test]
fn test_with_velocity() { fn test_with_velocity() {
let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) let hyper = hyper::VelocityGradientDescent::zero_momentum(
NotNan::new(0.001).expect("not nan"),
1000,
)
.with_mu(NotNan::new(0.9).expect("not nan")); .with_mu(NotNan::new(0.9).expect("not nan"));
let iterated = { let iterated = {
@@ -346,8 +348,8 @@ mod tests {
RankedDifferentiableTagged::of_slice_2::<_, 2>, RankedDifferentiableTagged::of_slice_2::<_, 2>,
&ys, &ys,
zero_params, zero_params,
velocity_predictor(predict_plane), predictor::velocity(predict_plane),
VelocityGradientDescentHyper::to_immutable, hyper::VelocityGradientDescent::to_immutable,
) )
}; };
@@ -367,7 +369,7 @@ mod tests {
fn test_with_rms() { fn test_with_rms() {
let beta = NotNan::new(0.9).expect("not nan"); let beta = NotNan::new(0.9).expect("not nan");
let stabilizer = NotNan::new(0.00000001).expect("not nan"); let stabilizer = NotNan::new(0.00000001).expect("not nan");
let hyper = RmsGradientDescentHyper::default(NotNan::new(0.001).expect("not nan"), 3000) let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.001).expect("not nan"), 3000)
.with_stabilizer(stabilizer) .with_stabilizer(stabilizer)
.with_beta(beta); .with_beta(beta);
@@ -386,8 +388,8 @@ mod tests {
RankedDifferentiableTagged::of_slice_2::<_, 2>, RankedDifferentiableTagged::of_slice_2::<_, 2>,
&ys, &ys,
zero_params, zero_params,
rms_predictor(predict_plane), predictor::rms(predict_plane),
RmsGradientDescentHyper::to_immutable, hyper::RmsGradientDescent::to_immutable,
) )
}; };
@@ -402,7 +404,7 @@ mod tests {
.map(|x| x.into_inner()) .map(|x| x.into_inner())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let fitted_theta1 = theta1.to_scalar().real_part().into_inner(); let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
assert_eq!(fitted_theta0, [3.9853500993426492, 1.9745945728216352]); assert_eq!(fitted_theta0, [3.985_350_099_342_649, 1.9745945728216352]);
assert_eq!(fitted_theta1, 6.1642229831811681); assert_eq!(fitted_theta1, 6.164_222_983_181_168);
} }
} }

View File

@@ -1,117 +1,135 @@
use crate::loss::{NakedHypers, RmsHyper, VelocityHypers}; use crate::predictor::{NakedHypers, RmsHyper, VelocityHypers};
use crate::traits::{NumLike, Zero}; use crate::traits::{NumLike, Zero};
use rand::{rngs::StdRng, Rng}; use rand::rngs::StdRng;
pub struct BaseGradientDescentHyper<A, R: Rng> { /// Hyperparameters which apply to any possible optimisation algorithm that uses gradient descent.
pub sampling: Option<(R, usize)>, pub struct BaseGradientDescent<Rng> {
pub sampling: Option<(Rng, usize)>,
pub iterations: u32, pub iterations: u32,
params: NakedHypers<A>,
} }
impl<A> BaseGradientDescentHyper<A, StdRng> impl BaseGradientDescent<StdRng> {
where #[must_use]
A: NumLike + NumLike, pub fn new(iterations: u32) -> BaseGradientDescent<StdRng> {
{ BaseGradientDescent {
#[allow(dead_code)]
pub fn naked(learning_rate: A, iterations: u32) -> Self {
BaseGradientDescentHyper {
params: NakedHypers { learning_rate },
iterations,
sampling: None, sampling: None,
iterations,
} }
} }
}
#[allow(dead_code)] impl<Rng> BaseGradientDescent<Rng> {
pub fn with_rng<S: Rng>(self, rng: S, size: usize) -> BaseGradientDescentHyper<A, S> { #[must_use]
BaseGradientDescentHyper { pub fn with_rng<Rng2>(self, rng: Rng2, size: usize) -> BaseGradientDescent<Rng2> {
params: self.params, BaseGradientDescent {
iterations: self.iterations, iterations: self.iterations,
sampling: Some((rng, size)), sampling: Some((rng, size)),
} }
} }
#[allow(dead_code)] #[must_use]
pub fn with_iterations(self, n: u32) -> Self { pub fn with_iterations(self, n: u32) -> Self {
BaseGradientDescentHyper { BaseGradientDescent {
sampling: self.sampling, sampling: self.sampling,
iterations: n, iterations: n,
params: self.params,
} }
} }
#[allow(dead_code)]
pub fn to_immutable(&self) -> NakedHypers<A> {
self.params.clone()
}
} }
#[derive(Clone)] pub struct NakedGradientDescent<A, Rng> {
pub struct VelocityGradientDescentHyper<A, R: Rng> { base: BaseGradientDescent<Rng>,
sampling: Option<(R, usize)>, naked: NakedHypers<A>,
learning_rate: A,
iterations: u32,
mu: A,
} }
impl<A> VelocityGradientDescentHyper<A, StdRng> impl<A> NakedGradientDescent<A, StdRng>
where where
A: Zero, A: Zero,
{ {
#[allow(dead_code)] #[must_use]
pub fn naked(learning_rate: A, iterations: u32) -> Self { pub fn new(learning_rate: A, iterations: u32) -> Self {
VelocityGradientDescentHyper { NakedGradientDescent {
sampling: None, base: BaseGradientDescent::new(iterations),
learning_rate, naked: NakedHypers { learning_rate },
iterations,
mu: A::zero(),
} }
} }
} }
impl<A, R: Rng> VelocityGradientDescentHyper<A, R> { impl<A, Rng> NakedGradientDescent<A, Rng> {
#[allow(dead_code)] pub fn to_immutable(&self) -> NakedHypers<A>
pub fn with_mu(self, mu: A) -> Self {
VelocityGradientDescentHyper {
sampling: self.sampling,
mu,
learning_rate: self.learning_rate,
iterations: self.iterations,
}
}
#[allow(dead_code)]
pub fn to_immutable(&self) -> VelocityHypers<A>
where where
A: Clone, A: Clone,
{ {
VelocityHypers { self.naked.clone()
mu: self.mu.clone(), }
learning_rate: self.learning_rate.clone(),
#[must_use]
pub fn with_rng<Rng2>(self, rng: Rng2, size: usize) -> NakedGradientDescent<A, Rng2> {
NakedGradientDescent {
base: self.base.with_rng(rng, size),
naked: self.naked,
} }
} }
} }
impl<A, R: Rng> From<VelocityGradientDescentHyper<A, R>> for BaseGradientDescentHyper<A, R> { impl<A, Rng> From<NakedGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
fn from(val: VelocityGradientDescentHyper<A, R>) -> BaseGradientDescentHyper<A, R> { fn from(val: NakedGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
BaseGradientDescentHyper { val.base
sampling: val.sampling, }
iterations: val.iterations, }
params: NakedHypers {
learning_rate: val.learning_rate, pub struct VelocityGradientDescent<A, Rng> {
base: BaseGradientDescent<Rng>,
velocity: VelocityHypers<A>,
}
impl<A> VelocityGradientDescent<A, StdRng>
where
A: Zero,
{
#[must_use]
pub fn zero_momentum(learning_rate: A, iterations: u32) -> Self {
VelocityGradientDescent {
base: BaseGradientDescent::new(iterations),
velocity: VelocityHypers {
learning_rate,
mu: A::zero(),
}, },
} }
} }
} }
#[derive(Clone)] impl<A, Rng> VelocityGradientDescent<A, Rng> {
pub struct RmsGradientDescentHyper<A, R: Rng> { #[must_use]
sampling: Option<(R, usize)>, pub fn with_mu(self, mu: A) -> Self {
iterations: u32, VelocityGradientDescent {
base: self.base,
velocity: VelocityHypers {
learning_rate: self.velocity.learning_rate,
mu,
},
}
}
pub fn to_immutable(&self) -> VelocityHypers<A>
where
A: Clone,
{
self.velocity.clone()
}
}
impl<A, Rng> From<VelocityGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
fn from(val: VelocityGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
val.base
}
}
pub struct RmsGradientDescent<A, Rng> {
base: BaseGradientDescent<Rng>,
rms: RmsHyper<A>, rms: RmsHyper<A>,
} }
impl<A> RmsGradientDescentHyper<A, StdRng> { impl<A> RmsGradientDescent<A, StdRng> {
#[allow(dead_code)]
pub fn default(learning_rate: A, iterations: u32) -> Self pub fn default(learning_rate: A, iterations: u32) -> Self
where where
A: NumLike, A: NumLike,
@@ -122,9 +140,8 @@ impl<A> RmsGradientDescentHyper<A, StdRng> {
let one_hundredth = one_tenth.clone() * one_tenth; let one_hundredth = one_tenth.clone() * one_tenth;
let one_ten_k = one_hundredth.clone() * one_hundredth; let one_ten_k = one_hundredth.clone() * one_hundredth;
RmsGradientDescentHyper { RmsGradientDescent {
sampling: None, base: BaseGradientDescent::new(iterations),
iterations,
rms: RmsHyper { rms: RmsHyper {
stabilizer: one_ten_k.clone() * one_ten_k, stabilizer: one_ten_k.clone() * one_ten_k,
beta: A::one() + -(A::one() / ten), beta: A::one() + -(A::one() / ten),
@@ -134,34 +151,31 @@ impl<A> RmsGradientDescentHyper<A, StdRng> {
} }
} }
impl<A, R: Rng> RmsGradientDescentHyper<A, R> { impl<A, Rng> RmsGradientDescent<A, Rng> {
#[allow(dead_code)] #[must_use]
pub fn with_stabilizer(self, stabilizer: A) -> Self { pub fn with_stabilizer(self, stabilizer: A) -> Self {
RmsGradientDescentHyper { RmsGradientDescent {
sampling: self.sampling, base: self.base,
rms: RmsHyper { rms: RmsHyper {
stabilizer, stabilizer,
beta: self.rms.beta, beta: self.rms.beta,
learning_rate: self.rms.learning_rate, learning_rate: self.rms.learning_rate,
}, },
iterations: self.iterations,
} }
} }
#[allow(dead_code)] #[must_use]
pub fn with_beta(self, beta: A) -> Self { pub fn with_beta(self, beta: A) -> Self {
RmsGradientDescentHyper { RmsGradientDescent {
sampling: self.sampling, base: self.base,
rms: RmsHyper { rms: RmsHyper {
stabilizer: self.rms.stabilizer, stabilizer: self.rms.stabilizer,
beta, beta,
learning_rate: self.rms.learning_rate, learning_rate: self.rms.learning_rate,
}, },
iterations: self.iterations,
} }
} }
#[allow(dead_code)]
pub fn to_immutable(&self) -> RmsHyper<A> pub fn to_immutable(&self) -> RmsHyper<A>
where where
A: Clone, A: Clone,
@@ -170,14 +184,8 @@ impl<A, R: Rng> RmsGradientDescentHyper<A, R> {
} }
} }
impl<A, R: Rng> From<RmsGradientDescentHyper<A, R>> for BaseGradientDescentHyper<A, R> { impl<A, Rng> From<RmsGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
fn from(val: RmsGradientDescentHyper<A, R>) -> BaseGradientDescentHyper<A, R> { fn from(val: RmsGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
BaseGradientDescentHyper { val.base
sampling: val.sampling,
iterations: val.iterations,
params: NakedHypers {
learning_rate: val.rms.learning_rate,
},
}
} }
} }

View File

@@ -3,14 +3,12 @@
#![feature(array_methods)] #![feature(array_methods)]
pub mod auto_diff; pub mod auto_diff;
pub mod const_teq;
pub mod expr_syntax_tree;
pub mod gradient_descent; pub mod gradient_descent;
pub mod hyper; pub mod hyper;
pub mod loss; pub mod loss;
pub mod not_nan; pub mod not_nan;
pub mod predictor;
pub mod sample; pub mod sample;
pub mod scalar; pub mod scalar;
pub mod smooth; pub mod smooth;
pub mod tensor;
pub mod traits; pub mod traits;

View File

@@ -4,8 +4,6 @@ use std::{
}; };
use crate::auto_diff::Differentiable; use crate::auto_diff::Differentiable;
use crate::smooth::smooth;
use crate::traits::{NumLike, Sqrt};
use crate::{ use crate::{
auto_diff::{DifferentiableTagged, RankedDifferentiable}, auto_diff::{DifferentiableTagged, RankedDifferentiable},
scalar::Scalar, scalar::Scalar,
@@ -210,7 +208,10 @@ where
}) })
} }
// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate). /// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
///
/// # Panics
/// Panics if the input `theta` is not of rank 1 consisting of a tensor1 and a scalar.
pub fn predict_plane<A>( pub fn predict_plane<A>(
xs: RankedDifferentiable<A, 2>, xs: RankedDifferentiable<A, 2>,
theta: &[Differentiable<A>; 2], theta: &[Differentiable<A>; 2],
@@ -218,9 +219,12 @@ pub fn predict_plane<A>(
where where
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone, A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
{ {
if theta[0].rank() != 1 { assert_eq!(
panic!("theta0 must be of rank 1, got: {}", theta[0].rank()) theta[0].rank(),
} 1,
"theta0 must be of rank 1, got: {}",
theta[0].rank()
);
let theta0 = RankedDifferentiable::of_vector( let theta0 = RankedDifferentiable::of_vector(
theta[0] theta[0]
.borrow_vector() .borrow_vector()
@@ -238,105 +242,6 @@ where
RankedDifferentiable::of_vector(dotted) RankedDifferentiable::of_vector(dotted)
} }
pub struct Predictor<F, Inflated, Deflated, Params> {
pub predict: F,
pub inflate: fn(Deflated) -> Inflated,
pub deflate: fn(Inflated) -> Deflated,
pub update: fn(Inflated, &Deflated, Params) -> Inflated,
}
#[derive(Clone)]
pub struct NakedHypers<A> {
pub learning_rate: A,
}
pub const fn naked_predictor<F, A>(
f: F,
) -> Predictor<F, Differentiable<A>, Differentiable<A>, NakedHypers<A>>
where
A: NumLike,
{
Predictor {
predict: f,
inflate: |x| x,
deflate: |x| x,
update: |theta, delta, hyper| {
let learning_rate = Scalar::make(hyper.learning_rate);
Differentiable::map2(&theta, delta, &mut |theta, delta| {
theta.clone() - delta.clone() * learning_rate.clone()
})
},
}
}
#[derive(Clone)]
pub struct RmsHyper<A> {
pub stabilizer: A,
pub beta: A,
pub learning_rate: A,
}
pub const fn rms_predictor<F, A>(
f: F,
) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, RmsHyper<A>>
where
A: NumLike,
{
Predictor {
predict: f,
inflate: |x| x.map_tag(&mut |()| A::zero()),
deflate: |x| x.map_tag(&mut |_| ()),
update: |theta, delta, hyper| {
DifferentiableTagged::map2_tagged(
&theta,
delta,
&mut |theta, smoothed_grad, delta, ()| {
let r = smooth(
Scalar::make(hyper.beta.clone()),
&Differentiable::of_scalar(Scalar::make(smoothed_grad)),
&Differentiable::of_scalar(delta.clone() * delta.clone()),
)
.into_scalar();
let learning_rate = Scalar::make(hyper.learning_rate.clone())
/ (r.sqrt() + Scalar::make(hyper.stabilizer.clone()));
(
theta.clone()
+ -(delta.clone() * Scalar::make(hyper.learning_rate.clone())),
learning_rate.clone_real_part(),
)
},
)
},
}
}
#[derive(Clone)]
pub struct VelocityHypers<A> {
pub learning_rate: A,
pub mu: A,
}
pub const fn velocity_predictor<F, A>(
f: F,
) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, VelocityHypers<A>>
where
A: NumLike,
{
Predictor {
predict: f,
inflate: |x| x.map_tag(&mut |()| A::zero()),
deflate: |x| x.map_tag(&mut |_| ()),
update: |theta, delta, hyper| {
DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| {
let velocity = hyper.mu.clone() * velocity
+ -(delta.clone_real_part() * hyper.learning_rate.clone());
(theta.clone() + Scalar::make(velocity.clone()), velocity)
})
},
}
}
#[cfg(test)] #[cfg(test)]
mod test_loss { mod test_loss {
use crate::auto_diff::RankedDifferentiable; use crate::auto_diff::RankedDifferentiable;

View File

@@ -0,0 +1,111 @@
use crate::auto_diff::{Differentiable, DifferentiableTagged};
use crate::scalar::Scalar;
use crate::smooth::smooth;
use crate::traits::{NumLike, Sqrt};
/// A Predictor is a function (`predict`) we're optimising, an `inflate` which adds any metadata
/// that the prediction engine might require, a corresponding `deflate` which removes the metadata,
/// and an `update` which computes the next guess based on the previous guess.
pub struct Predictor<F, Inflated, Deflated, Params> {
/// The function we're trying to optimise.
pub predict: F,
/// Attach prediction metadata to an input to the function we're trying to optimise.
pub inflate: fn(Deflated) -> Inflated,
/// Remove prediction metadata.
pub deflate: fn(Inflated) -> Deflated,
/// Given a guess at an optimum, the gradient at that point, and any hyperparameters,
/// compute the next guess at the optimum.
pub update: fn(Inflated, &Deflated, Params) -> Inflated,
}
/// Hyperparameters applying to the most basic way to calculate the next step.
#[derive(Clone)]
pub struct NakedHypers<A> {
pub learning_rate: A,
}
pub const fn naked<F, A>(f: F) -> Predictor<F, Differentiable<A>, Differentiable<A>, NakedHypers<A>>
where
A: NumLike,
{
Predictor {
predict: f,
inflate: |x| x,
deflate: |x| x,
update: |theta, delta, hyper| {
let learning_rate = Scalar::make(hyper.learning_rate);
Differentiable::map2(&theta, delta, &mut |theta, delta| {
(theta.clone() - delta.clone() * learning_rate.clone()).truncate_dual(None)
})
},
}
}
#[derive(Clone)]
pub struct RmsHyper<A> {
pub stabilizer: A,
pub beta: A,
pub learning_rate: A,
}
pub const fn rms<F, A>(
f: F,
) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, RmsHyper<A>>
where
A: NumLike,
{
Predictor {
predict: f,
inflate: |x| x.map_tag(&mut |()| A::zero()),
deflate: |x| x.map_tag(&mut |_| ()),
update: |theta, delta, hyper| {
DifferentiableTagged::map2_tagged(
&theta,
delta,
&mut |theta, smoothed_grad, delta, ()| {
let r = smooth(
Scalar::make(hyper.beta.clone()),
&Differentiable::of_scalar(Scalar::make(smoothed_grad)),
&Differentiable::of_scalar(delta.clone() * delta.clone()),
)
.into_scalar();
let learning_rate = Scalar::make(hyper.learning_rate.clone())
/ (r.sqrt() + Scalar::make(hyper.stabilizer.clone()));
(
(theta.clone()
+ -(delta.clone() * Scalar::make(hyper.learning_rate.clone())))
.truncate_dual(None),
learning_rate.clone_real_part(),
)
},
)
},
}
}
#[derive(Clone)]
pub struct VelocityHypers<A> {
pub learning_rate: A,
pub mu: A,
}
pub const fn velocity<F, A>(
f: F,
) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, VelocityHypers<A>>
where
A: NumLike,
{
Predictor {
predict: f,
inflate: |x| x.map_tag(&mut |()| A::zero()),
deflate: |x| x.map_tag(&mut |_| ()),
update: |theta, delta, hyper| {
DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| {
let velocity = hyper.mu.clone() * velocity
+ -(delta.clone_real_part() * hyper.learning_rate.clone());
(theta.clone() + Scalar::make(velocity.clone()), velocity)
})
},
}
}

View File

@@ -1,5 +1,6 @@
use rand::Rng; use rand::Rng;
/// Grab `n` random samples from `from_x` and `from_y`, collecting them into a vector.
pub fn sample2<R: Rng, T, U, I, J>(rng: &mut R, n: usize, from_x: I, from_y: J) -> (Vec<T>, Vec<U>) pub fn sample2<R: Rng, T, U, I, J>(rng: &mut R, n: usize, from_x: I, from_y: J) -> (Vec<T>, Vec<U>)
where where
T: Copy, T: Copy,

View File

@@ -117,7 +117,7 @@ impl<A> Link<A> {
-left.clone_real_part() * z -left.clone_real_part() * z
/ (right.clone_real_part() * right.clone_real_part()), / (right.clone_real_part() * right.clone_real_part()),
acc, acc,
) );
} }
LinkData::Log(arg) => { LinkData::Log(arg) => {
// d/dx(log y) = 1/y dy/dx // d/dx(log y) = 1/y dy/dx
@@ -181,7 +181,7 @@ where
A: Add<Output = A> + Clone, A: Add<Output = A> + Clone,
{ {
fn add_assign(&mut self, rhs: Self) { fn add_assign(&mut self, rhs: Self) {
*self = self.clone() + rhs *self = self.clone() + rhs;
} }
} }
@@ -287,8 +287,7 @@ where
impl<A> Scalar<A> { impl<A> Scalar<A> {
pub fn real_part(&self) -> &A { pub fn real_part(&self) -> &A {
match self { match self {
Scalar::Number(a, _) => a, Scalar::Number(a, _) | Scalar::Dual(a, _) => a,
Scalar::Dual(a, _) => a,
} }
} }
@@ -297,8 +296,7 @@ impl<A> Scalar<A> {
A: Clone, A: Clone,
{ {
match self { match self {
Scalar::Number(a, _) => (*a).clone(), Scalar::Number(a, _) | Scalar::Dual(a, _) => (*a).clone(),
Scalar::Dual(a, _) => (*a).clone(),
} }
} }
@@ -319,6 +317,7 @@ impl<A> Scalar<A> {
} }
} }
#[must_use]
pub fn truncate_dual(self, index: Option<usize>) -> Scalar<A> pub fn truncate_dual(self, index: Option<usize>) -> Scalar<A>
where where
A: Clone, A: Clone,
@@ -326,6 +325,7 @@ impl<A> Scalar<A> {
Scalar::Dual(self.clone_real_part(), Link::EndOfLink(index)) Scalar::Dual(self.clone_real_part(), Link::EndOfLink(index))
} }
#[must_use]
pub fn make(x: A) -> Scalar<A> { pub fn make(x: A) -> Scalar<A> {
Scalar::Number(x, None) Scalar::Number(x, None)
} }
@@ -337,9 +337,9 @@ where
{ {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{}_{}", n, index)), Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{n}_{index}")),
Scalar::Number(n, None) => f.write_fmt(format_args!("{}", n)), Scalar::Number(n, None) => f.write_fmt(format_args!("{n}")),
Scalar::Dual(n, link) => f.write_fmt(format_args!("<{}, link: {}>", n, link)), Scalar::Dual(n, link) => f.write_fmt(format_args!("<{n}, link: {link}>")),
} }
} }
} }
@@ -385,7 +385,7 @@ mod test_loss {
fn sqrt_gradient() { fn sqrt_gradient() {
let nine = Differentiable::of_scalar(Scalar::make(NotNan::new(9.0).expect("not nan"))); let nine = Differentiable::of_scalar(Scalar::make(NotNan::new(9.0).expect("not nan")));
let graded: [Differentiable<NotNan<f64>>; 1] = grad( let graded: [Differentiable<NotNan<f64>>; 1] = grad(
|x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().clone().sqrt()), |x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().sqrt()),
&[nine], &[nine],
); );
let graded = graded.map(|x| x.into_scalar().clone_real_part().into_inner())[0]; let graded = graded.map(|x| x.into_scalar().clone_real_part().into_inner())[0];

View File

@@ -3,10 +3,12 @@ use crate::scalar::Scalar;
use crate::traits::One; use crate::traits::One;
use std::ops::{Add, Mul, Neg}; use std::ops::{Add, Mul, Neg};
/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor
/// of `decay`.
pub fn smooth_tagged<A, F, Tag1, Tag2, Tag3>( pub fn smooth_tagged<A, F, Tag1, Tag2, Tag3>(
decay: Scalar<A>, decay: Scalar<A>,
current_avg: &DifferentiableTagged<A, Tag1>, old_value: &DifferentiableTagged<A, Tag1>,
grad: &DifferentiableTagged<A, Tag2>, new_value: &DifferentiableTagged<A, Tag2>,
mut tags: F, mut tags: F,
) -> DifferentiableTagged<A, Tag3> ) -> DifferentiableTagged<A, Tag3>
where where
@@ -15,23 +17,25 @@ where
Tag1: Clone, Tag1: Clone,
Tag2: Clone, Tag2: Clone,
{ {
DifferentiableTagged::map2_tagged(current_avg, grad, &mut |avg, tag1, grad, tag2| { DifferentiableTagged::map2_tagged(old_value, new_value, &mut |old, tag1, new, tag2| {
( (
(avg.clone() * decay.clone()) + (grad.clone() * (Scalar::<A>::one() + -decay.clone())), (old.clone() * decay.clone()) + (new.clone() * (Scalar::<A>::one() + -decay.clone())),
tags(tag1, tag2), tags(tag1, tag2),
) )
}) })
} }
/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor
/// of `decay`.
pub fn smooth<A>( pub fn smooth<A>(
decay: Scalar<A>, decay: Scalar<A>,
current_avg: &Differentiable<A>, old_value: &Differentiable<A>,
grad: &Differentiable<A>, new_value: &Differentiable<A>,
) -> Differentiable<A> ) -> Differentiable<A>
where where
A: One + Clone + Mul<Output = A> + Neg<Output = A> + Add<Output = A>, A: One + Clone + Mul<Output = A> + Neg<Output = A> + Add<Output = A>,
{ {
smooth_tagged(decay, current_avg, grad, |(), ()| ()) smooth_tagged(decay, old_value, new_value, |(), ()| ())
} }
#[cfg(test)] #[cfg(test)]
@@ -72,17 +76,17 @@ mod test_smooth {
output, output,
vec![ vec![
5.0299999999999985, 5.0299999999999985,
6.7969999999999979, 6.796_999_999_999_998,
6.5472999999999981, 6.547_299_999_999_998,
6.1625699999999979, 6.162_569_999_999_998,
5.7263129999999975, 5.7263129999999975,
5.3736816999999979, 5.373_681_699_999_998,
4.8963135299999978 4.896_313_529_999_998
] ]
) );
} }
fn hydrate(v: Vec<f64>) -> Differentiable<NotNan<f64>> { fn hydrate(v: &[f64]) -> Differentiable<NotNan<f64>> {
Differentiable::of_vec( Differentiable::of_vec(
v.iter() v.iter()
.cloned() .cloned()
@@ -100,9 +104,9 @@ mod test_smooth {
vec![13.4, 18.2, 41.4], vec![13.4, 18.2, 41.4],
vec![1.1, 0.3, 67.3], vec![1.1, 0.3, 67.3],
] ]
.map(hydrate); .map(|x| hydrate(&x));
let mut current = hydrate(vec![0.8, 3.1, 2.2]); let mut current = hydrate(&vec![0.8, 3.1, 2.2]);
let mut output = Vec::with_capacity(inputs.len()); let mut output = Vec::with_capacity(inputs.len());
for input in inputs { for input in inputs {
current = smooth(decay.clone(), &current, &input); current = smooth(decay.clone(), &current, &input);
@@ -112,10 +116,10 @@ mod test_smooth {
assert_eq!( assert_eq!(
output, output,
vec![ vec![
vec![0.82000000000000006, 2.9, 2.2800000000000002], vec![0.820_000_000_000_000_1, 2.9, 2.2800000000000002],
vec![2.0779999999999998, 4.4299999999999997, 6.1919999999999993], vec![2.078, 4.43, 6.191_999_999_999_999],
vec![1.9802, 4.0169999999999995, 12.302799999999998] vec![1.9802, 4.0169999999999995, 12.302799999999998]
] ]
) );
} }
} }

View File

@@ -1,107 +0,0 @@
#[macro_export]
macro_rules! tensor {
($x:ty , $i: expr) => {[$x; $i]};
($x:ty , $i: expr, $($is:expr),+) => {[tensor!($x, $($is),+); $i]};
}
#[cfg(test)]
mod tests {
#[test]
fn test_tensor_type() {
let _: tensor!(f64, 1, 2, 3) = [[[1.0, 3.0, 6.0], [-1.3, -30.0, -0.0]]];
}
}
pub trait Extensible1<A> {
fn apply<F>(&self, other: &A, op: &F) -> Self
where
F: Fn(&A, &A) -> A;
}
pub trait Extensible2<A> {
fn apply<F>(&self, other: &Self, op: &F) -> Self
where
F: Fn(&A, &A) -> A;
}
impl<A, T, const N: usize> Extensible1<A> for [T; N]
where
T: Extensible1<A> + Copy + Default,
{
fn apply<F>(&self, other: &A, op: &F) -> Self
where
F: Fn(&A, &A) -> A,
{
let mut result = [Default::default(); N];
for (i, coord) in self.iter().enumerate() {
result[i] = T::apply(coord, other, op);
}
result
}
}
impl<A, T, const N: usize> Extensible2<A> for [T; N]
where
T: Extensible2<A> + Copy + Default,
{
fn apply<F>(&self, other: &Self, op: &F) -> Self
where
F: Fn(&A, &A) -> A,
{
let mut result = [Default::default(); N];
for (i, coord) in self.iter().enumerate() {
result[i] = T::apply(coord, &other[i], op);
}
result
}
}
#[macro_export]
macro_rules! extensible1 {
($x: ty) => {
impl Extensible1<$x> for $x {
fn apply<F>(&self, other: &$x, op: &F) -> Self
where
F: Fn(&Self, &Self) -> Self,
{
op(self, other)
}
}
};
}
#[macro_export]
macro_rules! extensible2 {
($x: ty) => {
impl Extensible2<$x> for $x {
fn apply<F>(&self, other: &Self, op: &F) -> Self
where
F: Fn(&Self, &Self) -> Self,
{
op(self, other)
}
}
};
}
extensible1!(u8);
extensible1!(f64);
extensible2!(u8);
extensible2!(f64);
pub fn extension1<T, A, F>(t1: &T, t2: &A, op: F) -> T
where
T: Extensible1<A>,
F: Fn(&A, &A) -> A,
{
t1.apply::<F>(t2, &op)
}
pub fn extension2<T, A, F>(t1: &T, t2: &T, op: F) -> T
where
T: Extensible2<A>,
F: Fn(&A, &A) -> A,
{
t1.apply::<F>(t2, &op)
}

View File

@@ -4,6 +4,7 @@ use std::iter::Sum;
use std::ops::{Add, AddAssign, Div, Mul, Neg}; use std::ops::{Add, AddAssign, Div, Mul, Neg};
pub trait Exp { pub trait Exp {
#[must_use]
fn exp(self) -> Self; fn exp(self) -> Self;
} }
@@ -14,6 +15,7 @@ impl Exp for NotNan<f64> {
} }
pub trait Sqrt { pub trait Sqrt {
#[must_use]
fn sqrt(self) -> Self; fn sqrt(self) -> Self;
} }
@@ -24,10 +26,12 @@ impl Sqrt for NotNan<f64> {
} }
pub trait Zero { pub trait Zero {
#[must_use]
fn zero() -> Self; fn zero() -> Self;
} }
pub trait One { pub trait One {
#[must_use]
fn one() -> Self; fn one() -> Self;
} }

View File

@@ -1,35 +1,37 @@
#![allow(incomplete_features)] #![allow(incomplete_features)]
#![feature(generic_const_exprs)] #![feature(generic_const_exprs)]
mod with_tensor;
use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged}; use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged};
use little_learner::gradient_descent::gradient_descent; use little_learner::gradient_descent::gradient_descent;
use little_learner::hyper::VelocityGradientDescentHyper; use little_learner::hyper;
use little_learner::loss::{predict_plane, velocity_predictor}; use little_learner::loss::predict_plane;
use little_learner::not_nan::{to_not_nan_1, to_not_nan_2}; use little_learner::not_nan::{to_not_nan_1, to_not_nan_2};
use little_learner::predictor;
use little_learner::scalar::Scalar; use little_learner::scalar::Scalar;
use little_learner::traits::Zero; use little_learner::traits::Zero;
use ordered_float::NotNan; use ordered_float::NotNan;
fn main() { const PLANE_XS: [[f64; 2]; 6] = [
let plane_xs = [
[1.0, 2.05], [1.0, 2.05],
[1.0, 3.0], [1.0, 3.0],
[2.0, 2.0], [2.0, 2.0],
[2.0, 3.91], [2.0, 3.91],
[3.0, 6.13], [3.0, 6.13],
[4.0, 8.09], [4.0, 8.09],
]; ];
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94]; const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
let hyper = VelocityGradientDescentHyper::naked(NotNan::new(0.001).expect("not nan"), 1000) fn main() {
.with_mu(NotNan::new(0.9).expect("not nan")); let beta = NotNan::new(0.9).expect("not nan");
let stabilizer = NotNan::new(0.000_000_01).expect("not nan");
let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.001).expect("not nan"), 3000)
.with_stabilizer(stabilizer)
.with_beta(beta);
let iterated = { let iterated = {
let xs = to_not_nan_2(plane_xs); let xs = to_not_nan_2(PLANE_XS);
let ys = to_not_nan_1(plane_ys); let ys = to_not_nan_1(PLANE_YS);
let zero_params = [ let zero_params = [
RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]) RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()])
.to_unranked(), .to_unranked(),
@@ -42,8 +44,8 @@ fn main() {
RankedDifferentiableTagged::of_slice_2::<_, 2>, RankedDifferentiableTagged::of_slice_2::<_, 2>,
&ys, &ys,
zero_params, zero_params,
velocity_predictor(predict_plane), predictor::rms(predict_plane),
VelocityGradientDescentHyper::to_immutable, hyper::RmsGradientDescent::to_immutable,
) )
}; };
@@ -52,11 +54,14 @@ fn main() {
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor"); let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor"); let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]); let fitted_theta0 = theta0
assert_eq!( .collect()
theta1.to_scalar().real_part().into_inner(), .iter()
6.169579045974949 .map(|x| x.into_inner())
); .collect::<Vec<_>>();
let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
assert_eq!(fitted_theta0, [3.985_350_099_342_649, 1.9745945728216352]);
assert_eq!(fitted_theta1, 6.164_222_983_181_168);
} }
#[cfg(test)] #[cfg(test)]

View File

@@ -1,136 +0,0 @@
#![allow(dead_code)]
use std::iter::Sum;
use std::ops::{Mul, Sub};
use little_learner::tensor;
use little_learner::tensor::{extension2, Extensible2};
use little_learner::traits::One;
type Point<A, const N: usize> = [A; N];
type Parameters<A, const N: usize, const M: usize> = [Point<A, N>; M];
fn dot_points<A: Mul, const N: usize>(x: &Point<A, N>, y: &Point<A, N>) -> A
where
A: Sum<<A as Mul>::Output> + Copy + Default + Mul<Output = A> + Extensible2<A>,
{
extension2(x, y, |&x, &y| x * y).into_iter().sum()
}
fn dot<A, const N: usize, const M: usize>(x: &Point<A, N>, y: &Parameters<A, N, M>) -> Point<A, M>
where
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + Extensible2<A>,
{
let mut result = [Default::default(); M];
for (i, coord) in y.iter().map(|y| dot_points(x, y)).enumerate() {
result[i] = coord;
}
result
}
fn sum<A, const N: usize>(x: &tensor!(A, N)) -> A
where
A: Sum<A> + Copy,
{
A::sum(x.iter().cloned())
}
fn squared<A, const N: usize>(x: &tensor!(A, N)) -> tensor!(A, N)
where
A: Mul<Output = A> + Extensible2<A> + Copy + Default,
{
extension2(x, x, |&a, &b| (a * b))
}
fn l2_norm<A, const N: usize>(prediction: &tensor!(A, N), data: &tensor!(A, N)) -> A
where
A: Sum<A> + Mul<Output = A> + Extensible2<A> + Copy + Default + Sub<Output = A>,
{
let diff = extension2(prediction, data, |&x, &y| x - y);
sum(&squared(&diff))
}
pub fn l2_loss<A, F, Params, const N: usize>(
target: F,
data_xs: &tensor!(A, N),
data_ys: &tensor!(A, N),
params: &Params,
) -> A
where
F: Fn(&tensor!(A, N), &Params) -> tensor!(A, N),
A: Sum<A> + Mul<Output = A> + Extensible2<A> + Copy + Default + Sub<Output = A>,
{
let pred_ys = target(data_xs, params);
l2_norm(&pred_ys, data_ys)
}
pub fn predict_line<A, const N: usize>(xs: &tensor!(A, N), theta: &tensor!(A, 2)) -> tensor!(A, N)
where
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + Extensible2<A> + One,
{
let mut result: tensor!(A, N) = [Default::default(); N];
for (i, &x) in xs.iter().enumerate() {
result[i] = dot(&[x, One::one()], &[*theta])[0];
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use little_learner::tensor::extension1;
#[test]
fn test_extension() {
let x: tensor!(u8, 1) = [2];
assert_eq!(extension1(&x, &7, |x, y| x + y), [9]);
let y: tensor!(u8, 1) = [7];
assert_eq!(extension2(&x, &y, |x, y| x + y), [9]);
let x: tensor!(u8, 3) = [5, 6, 7];
assert_eq!(extension1(&x, &2, |x, y| x + y), [7, 8, 9]);
let y: tensor!(u8, 3) = [2, 0, 1];
assert_eq!(extension2(&x, &y, |x, y| x + y), [7, 6, 8]);
let x: tensor!(u8, 2, 3) = [[4, 6, 7], [2, 0, 1]];
assert_eq!(extension1(&x, &2, |x, y| x + y), [[6, 8, 9], [4, 2, 3]]);
let y: tensor!(u8, 2, 3) = [[1, 2, 2], [6, 3, 1]];
assert_eq!(extension2(&x, &y, |x, y| x + y), [[5, 8, 9], [8, 3, 2]]);
}
#[test]
fn test_l2_norm() {
assert_eq!(
l2_norm(&[4.0, -3.0, 0.0, -4.0, 3.0], &[0.0, 0.0, 0.0, 0.0, 0.0]),
50.0
)
}
#[test]
fn test_l2_loss() {
let loss = l2_loss(
predict_line,
&[2.0, 1.0, 4.0, 3.0],
&[1.8, 1.2, 4.2, 3.3],
&[0.0, 0.0],
);
assert_eq!(loss, 33.21);
let loss = l2_loss(
predict_line,
&[2.0, 1.0, 4.0, 3.0],
&[1.8, 1.2, 4.2, 3.3],
&[0.0099, 0.0],
);
assert_eq!((100.0 * loss).round() / 100.0, 32.59);
}
#[test]
fn l2_loss_non_autodiff_example() {
let xs = [2.0, 1.0, 4.0, 3.0];
let ys = [1.8, 1.2, 4.2, 3.3];
let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
assert_eq!(loss, 32.5892403);
}
}