Add hyperparameters, optimise quadratic function (#9)
This commit is contained in:
@@ -21,7 +21,7 @@ pub fn dot_2<A, const RANK: usize>(
|
|||||||
y: &Differentiable<A, RANK>,
|
y: &Differentiable<A, RANK>,
|
||||||
) -> Differentiable<A, RANK>
|
) -> Differentiable<A, RANK>
|
||||||
where
|
where
|
||||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default,
|
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||||
{
|
{
|
||||||
Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
||||||
}
|
}
|
||||||
@@ -35,7 +35,7 @@ where
|
|||||||
|
|
||||||
fn sum_2<A>(x: Differentiable<A, 1>) -> Scalar<A>
|
fn sum_2<A>(x: Differentiable<A, 1>) -> Scalar<A>
|
||||||
where
|
where
|
||||||
A: Sum<A> + Copy + Add<Output = A> + Zero,
|
A: Sum<A> + Clone + Add<Output = A> + Zero,
|
||||||
{
|
{
|
||||||
Differentiable::to_vector(x)
|
Differentiable::to_vector(x)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@@ -91,3 +91,17 @@ where
|
|||||||
}
|
}
|
||||||
Differentiable::of_vector(result)
|
Differentiable::of_vector(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn predict_quadratic<A>(
|
||||||
|
xs: Differentiable<A, 1>,
|
||||||
|
theta: Differentiable<A, 1>,
|
||||||
|
) -> Differentiable<A, 1>
|
||||||
|
where
|
||||||
|
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
|
||||||
|
{
|
||||||
|
Differentiable::map(xs, &mut |x| {
|
||||||
|
let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
|
||||||
|
let x_powers = Differentiable::of_vector(x_powers.into_iter().map(of_scalar).collect());
|
||||||
|
sum_2(dot_2(&x_powers, &theta))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
@@ -8,21 +8,11 @@ use std::ops::{Add, AddAssign, Div, Mul, Neg};
|
|||||||
|
|
||||||
use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable};
|
use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable};
|
||||||
|
|
||||||
use little_learner::loss::{l2_loss_2, predict_line_2, square};
|
use little_learner::loss::{l2_loss_2, predict_quadratic};
|
||||||
use little_learner::scalar::Scalar;
|
use little_learner::scalar::Scalar;
|
||||||
use little_learner::traits::{Exp, One, Zero};
|
use little_learner::traits::{Exp, One, Zero};
|
||||||
use ordered_float::NotNan;
|
use ordered_float::NotNan;
|
||||||
|
|
||||||
use crate::with_tensor::{l2_loss, predict_line};
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn l2_loss_non_autodiff_example() {
|
|
||||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
|
||||||
let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
|
|
||||||
println!("{:?}", loss);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn iterate<A, F>(f: &F, start: A, n: u32) -> A
|
fn iterate<A, F>(f: &F, start: A, n: u32) -> A
|
||||||
where
|
where
|
||||||
F: Fn(A) -> A,
|
F: Fn(A) -> A,
|
||||||
@@ -33,10 +23,15 @@ where
|
|||||||
iterate(f, f(start), n - 1)
|
iterate(f, f(start), n - 1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct GradientDescentHyper<A, const RANK: usize> {
|
||||||
|
learning_rate: A,
|
||||||
|
iterations: u32,
|
||||||
|
}
|
||||||
|
|
||||||
fn gradient_descent_step<A, F, const RANK: usize>(
|
fn gradient_descent_step<A, F, const RANK: usize>(
|
||||||
f: &F,
|
f: &F,
|
||||||
learning_rate: A,
|
|
||||||
theta: Differentiable<A, RANK>,
|
theta: Differentiable<A, RANK>,
|
||||||
|
params: &GradientDescentHyper<A, RANK>,
|
||||||
) -> Differentiable<A, RANK>
|
) -> Differentiable<A, RANK>
|
||||||
where
|
where
|
||||||
A: Clone
|
A: Clone
|
||||||
@@ -54,20 +49,19 @@ where
|
|||||||
{
|
{
|
||||||
let delta = Differentiable::grad(f, &theta);
|
let delta = Differentiable::grad(f, &theta);
|
||||||
Differentiable::map2(&theta, &delta, &|theta, delta| {
|
Differentiable::map2(&theta, &delta, &|theta, delta| {
|
||||||
(*theta).clone() - (Scalar::make(learning_rate.clone()) * (*delta).clone())
|
(*theta).clone() - (Scalar::make((params.learning_rate).clone()) * (*delta).clone())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
|
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
||||||
|
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
||||||
|
|
||||||
let grad = Differentiable::grad(|x| Differentiable::map(x, &mut |x| square(&x)), &input_vec);
|
let hyper = GradientDescentHyper {
|
||||||
println!("Gradient of the x^2 function at x=27: {}", grad);
|
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||||
|
iterations: 1000,
|
||||||
|
};
|
||||||
|
|
||||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
|
||||||
|
|
||||||
let alpha = NotNan::new(0.01).expect("not nan");
|
|
||||||
let iterated = {
|
let iterated = {
|
||||||
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
@@ -76,18 +70,22 @@ fn main() {
|
|||||||
gradient_descent_step(
|
gradient_descent_step(
|
||||||
&|x| {
|
&|x| {
|
||||||
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
||||||
predict_line_2,
|
predict_quadratic,
|
||||||
of_slice(&xs),
|
of_slice(&xs),
|
||||||
of_slice(&ys),
|
of_slice(&ys),
|
||||||
x,
|
x,
|
||||||
))])
|
))])
|
||||||
},
|
},
|
||||||
alpha,
|
|
||||||
theta,
|
theta,
|
||||||
|
&hyper,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
|
of_slice(&[
|
||||||
1000,
|
NotNan::<f64>::zero(),
|
||||||
|
NotNan::<f64>::zero(),
|
||||||
|
NotNan::<f64>::zero(),
|
||||||
|
]),
|
||||||
|
hyper.iterations,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -104,7 +102,12 @@ fn main() {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use arrayvec::ArrayVec;
|
use arrayvec::ArrayVec;
|
||||||
use little_learner::auto_diff::to_scalar;
|
use little_learner::{
|
||||||
|
auto_diff::to_scalar,
|
||||||
|
loss::{predict_line_2, square},
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::with_tensor::{l2_loss, predict_line};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn loss_example() {
|
fn loss_example() {
|
||||||
@@ -120,6 +123,28 @@ mod tests {
|
|||||||
assert_eq!(*loss.real_part(), 33.21);
|
assert_eq!(*loss.real_part(), 33.21);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn l2_loss_non_autodiff_example() {
|
||||||
|
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||||
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
|
let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
|
||||||
|
assert_eq!(loss, 32.5892403);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn grad_example() {
|
||||||
|
let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
|
||||||
|
|
||||||
|
let grad: Vec<_> = Differentiable::to_vector(Differentiable::grad(
|
||||||
|
|x| Differentiable::map(x, &mut |x| square(&x)),
|
||||||
|
&input_vec,
|
||||||
|
))
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| to_scalar(x).real_part().into_inner())
|
||||||
|
.collect();
|
||||||
|
assert_eq!(grad, [54.0]);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn loss_gradient() {
|
fn loss_gradient() {
|
||||||
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
|
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
|
||||||
@@ -163,7 +188,10 @@ mod tests {
|
|||||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
|
|
||||||
let alpha = NotNan::new(0.01).expect("not nan");
|
let hyper = GradientDescentHyper {
|
||||||
|
learning_rate: NotNan::new(0.01).expect("not nan"),
|
||||||
|
iterations: 1000,
|
||||||
|
};
|
||||||
let iterated = {
|
let iterated = {
|
||||||
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
@@ -178,12 +206,12 @@ mod tests {
|
|||||||
x,
|
x,
|
||||||
))])
|
))])
|
||||||
},
|
},
|
||||||
alpha,
|
|
||||||
theta,
|
theta,
|
||||||
|
&hyper,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
|
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
|
||||||
1000,
|
hyper.iterations,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
let iterated = Differentiable::to_vector(iterated)
|
let iterated = Differentiable::to_vector(iterated)
|
||||||
@@ -193,4 +221,53 @@ mod tests {
|
|||||||
|
|
||||||
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn optimise_quadratic() {
|
||||||
|
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
||||||
|
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
||||||
|
|
||||||
|
let hyper = GradientDescentHyper {
|
||||||
|
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||||
|
iterations: 1000,
|
||||||
|
};
|
||||||
|
|
||||||
|
let iterated = {
|
||||||
|
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
iterate(
|
||||||
|
&|theta| {
|
||||||
|
gradient_descent_step(
|
||||||
|
&|x| {
|
||||||
|
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
||||||
|
predict_quadratic,
|
||||||
|
of_slice(&xs),
|
||||||
|
of_slice(&ys),
|
||||||
|
x,
|
||||||
|
))])
|
||||||
|
},
|
||||||
|
theta,
|
||||||
|
&hyper,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
of_slice(&[
|
||||||
|
NotNan::<f64>::zero(),
|
||||||
|
NotNan::<f64>::zero(),
|
||||||
|
NotNan::<f64>::zero(),
|
||||||
|
]),
|
||||||
|
hyper.iterations,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
let iterated = Differentiable::to_vector(iterated)
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| to_scalar(x).real_part().into_inner())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
println!("{:?}", iterated);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
iterated,
|
||||||
|
[2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,3 +1,5 @@
|
|||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
use std::iter::Sum;
|
use std::iter::Sum;
|
||||||
use std::ops::{Mul, Sub};
|
use std::ops::{Mul, Sub};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user