Optimise my first function (#8)
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -55,6 +55,7 @@ dependencies = [
|
|||||||
name = "little_learner_app"
|
name = "little_learner_app"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"arrayvec",
|
||||||
"immutable-chunkmap",
|
"immutable-chunkmap",
|
||||||
"little_learner",
|
"little_learner",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
|
@@ -132,8 +132,7 @@ where
|
|||||||
+ Div<Output = A>
|
+ Div<Output = A>
|
||||||
+ Zero
|
+ Zero
|
||||||
+ One
|
+ One
|
||||||
+ Neg<Output = A>
|
+ Neg<Output = A>,
|
||||||
+ Display,
|
|
||||||
{
|
{
|
||||||
fn accumulate_gradients_vec(v: &[DifferentiableHidden<A>], acc: &mut HashMap<Scalar<A>, A>) {
|
fn accumulate_gradients_vec(v: &[DifferentiableHidden<A>], acc: &mut HashMap<Scalar<A>, A>) {
|
||||||
for v in v.iter().rev() {
|
for v in v.iter().rev() {
|
||||||
@@ -242,7 +241,7 @@ impl<A, const RANK: usize> Differentiable<A, RANK> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn grad<F>(f: F, theta: Differentiable<A, RANK>) -> Differentiable<A, RANK>
|
pub fn grad<F>(f: F, theta: &Differentiable<A, RANK>) -> Differentiable<A, RANK>
|
||||||
where
|
where
|
||||||
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
|
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
|
||||||
A: Clone
|
A: Clone
|
||||||
@@ -254,8 +253,7 @@ impl<A, const RANK: usize> Differentiable<A, RANK> {
|
|||||||
+ Zero
|
+ Zero
|
||||||
+ One
|
+ One
|
||||||
+ Neg<Output = A>
|
+ Neg<Output = A>
|
||||||
+ Eq
|
+ Eq,
|
||||||
+ std::fmt::Display,
|
|
||||||
{
|
{
|
||||||
let mut i = 0usize;
|
let mut i = 0usize;
|
||||||
let wrt = theta.contents.map(&mut |x| {
|
let wrt = theta.contents.map(&mut |x| {
|
||||||
@@ -332,7 +330,7 @@ mod tests {
|
|||||||
x,
|
x,
|
||||||
))])
|
))])
|
||||||
},
|
},
|
||||||
input_vec,
|
&input_vec,
|
||||||
);
|
);
|
||||||
|
|
||||||
let grad_vec: Vec<f64> = Differentiable::to_vector(grad)
|
let grad_vec: Vec<f64> = Differentiable::to_vector(grad)
|
||||||
|
@@ -237,6 +237,10 @@ impl<A> Scalar<A> {
|
|||||||
{
|
{
|
||||||
Scalar::Dual(self.clone_real_part(), Link::EndOfLink(Some(index)))
|
Scalar::Dual(self.clone_real_part(), Link::EndOfLink(Some(index)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn make(x: A) -> Scalar<A> {
|
||||||
|
Scalar::Number(x, None)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> Display for Scalar<A>
|
impl<A> Display for Scalar<A>
|
||||||
|
@@ -9,3 +9,4 @@ edition = "2021"
|
|||||||
immutable-chunkmap = "1.0.5"
|
immutable-chunkmap = "1.0.5"
|
||||||
ordered-float = "3.6.0"
|
ordered-float = "3.6.0"
|
||||||
little_learner = { path = "../little_learner" }
|
little_learner = { path = "../little_learner" }
|
||||||
|
arrayvec = "0.7.2"
|
||||||
|
@@ -3,10 +3,14 @@
|
|||||||
|
|
||||||
mod with_tensor;
|
mod with_tensor;
|
||||||
|
|
||||||
use little_learner::auto_diff::{of_scalar, of_slice, Differentiable};
|
use core::hash::Hash;
|
||||||
|
use std::ops::{Add, AddAssign, Div, Mul, Neg};
|
||||||
|
|
||||||
|
use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable};
|
||||||
|
|
||||||
use little_learner::loss::{l2_loss_2, predict_line_2, square};
|
use little_learner::loss::{l2_loss_2, predict_line_2, square};
|
||||||
use little_learner::traits::Zero;
|
use little_learner::scalar::Scalar;
|
||||||
|
use little_learner::traits::{Exp, One, Zero};
|
||||||
use ordered_float::NotNan;
|
use ordered_float::NotNan;
|
||||||
|
|
||||||
use crate::with_tensor::{l2_loss, predict_line};
|
use crate::with_tensor::{l2_loss, predict_line};
|
||||||
@@ -19,23 +23,105 @@ fn l2_loss_non_autodiff_example() {
|
|||||||
println!("{:?}", loss);
|
println!("{:?}", loss);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn iterate<A, F>(f: &F, start: A, n: u32) -> A
|
||||||
|
where
|
||||||
|
F: Fn(A) -> A,
|
||||||
|
{
|
||||||
|
if n == 0 {
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
iterate(f, f(start), n - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gradient_descent_step<A, F, const RANK: usize>(
|
||||||
|
f: &F,
|
||||||
|
learning_rate: A,
|
||||||
|
theta: Differentiable<A, RANK>,
|
||||||
|
) -> Differentiable<A, RANK>
|
||||||
|
where
|
||||||
|
A: Clone
|
||||||
|
+ Mul<Output = A>
|
||||||
|
+ Neg<Output = A>
|
||||||
|
+ Add<Output = A>
|
||||||
|
+ Hash
|
||||||
|
+ AddAssign
|
||||||
|
+ Div<Output = A>
|
||||||
|
+ Zero
|
||||||
|
+ One
|
||||||
|
+ Eq
|
||||||
|
+ Exp,
|
||||||
|
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
|
||||||
|
{
|
||||||
|
let delta = Differentiable::grad(f, &theta);
|
||||||
|
Differentiable::map2(&theta, &delta, &|theta, delta| {
|
||||||
|
(*theta).clone() - (Scalar::make(learning_rate.clone()) * (*delta).clone())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
|
let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
|
||||||
|
|
||||||
let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), input_vec);
|
let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), &input_vec);
|
||||||
println!("Gradient of the x^2 function at x=27: {}", grad);
|
println!("Gradient of the x^2 function at x=27: {}", grad);
|
||||||
|
|
||||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
|
|
||||||
|
let alpha = NotNan::new(0.01).expect("not nan");
|
||||||
|
let iterated = {
|
||||||
|
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
iterate(
|
||||||
|
&|theta| {
|
||||||
|
gradient_descent_step(
|
||||||
|
&|x| {
|
||||||
|
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
||||||
|
predict_line_2,
|
||||||
|
of_slice(&xs),
|
||||||
|
of_slice(&ys),
|
||||||
|
x,
|
||||||
|
))])
|
||||||
|
},
|
||||||
|
alpha,
|
||||||
|
theta,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
|
||||||
|
1000,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"After iteration: {:?}",
|
||||||
|
Differentiable::to_vector(iterated)
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| to_scalar(x).real_part().into_inner())
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use arrayvec::ArrayVec;
|
||||||
|
use little_learner::auto_diff::to_scalar;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn loss_example() {
|
||||||
|
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||||
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
let loss = l2_loss_2(
|
let loss = l2_loss_2(
|
||||||
predict_line_2,
|
predict_line_2,
|
||||||
of_slice(&xs),
|
of_slice(&xs),
|
||||||
of_slice(&ys),
|
of_slice(&ys),
|
||||||
of_slice(&[0.0, 0.0]),
|
of_slice(&[0.0, 0.0]),
|
||||||
);
|
);
|
||||||
println!("Computation of L2 loss: {}", loss);
|
|
||||||
|
|
||||||
|
assert_eq!(*loss.real_part(), 33.21);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn loss_gradient() {
|
||||||
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
|
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
|
||||||
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
||||||
@@ -48,8 +134,63 @@ fn main() {
|
|||||||
x,
|
x,
|
||||||
))])
|
))])
|
||||||
},
|
},
|
||||||
input_vec,
|
&input_vec,
|
||||||
);
|
);
|
||||||
|
|
||||||
println!("{}", grad);
|
assert_eq!(
|
||||||
|
Differentiable::to_vector(grad)
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| *(to_scalar(x).real_part()))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
[-63.0, -21.0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_iterate() {
|
||||||
|
let f = |t: [i32; 3]| {
|
||||||
|
let mut vec = ArrayVec::<i32, 3>::new();
|
||||||
|
for i in t {
|
||||||
|
vec.push(i - 3);
|
||||||
|
}
|
||||||
|
vec.into_inner().unwrap()
|
||||||
|
};
|
||||||
|
assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn first_optimisation_test() {
|
||||||
|
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||||
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
|
|
||||||
|
let alpha = NotNan::new(0.01).expect("not nan");
|
||||||
|
let iterated = {
|
||||||
|
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
iterate(
|
||||||
|
&|theta| {
|
||||||
|
gradient_descent_step(
|
||||||
|
&|x| {
|
||||||
|
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
||||||
|
predict_line_2,
|
||||||
|
of_slice(&xs),
|
||||||
|
of_slice(&ys),
|
||||||
|
x,
|
||||||
|
))])
|
||||||
|
},
|
||||||
|
alpha,
|
||||||
|
theta,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
|
||||||
|
1000,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
let iterated = Differentiable::to_vector(iterated)
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| to_scalar(x).real_part().into_inner())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user