Compare commits
19 Commits
1b738b200a
...
f873e5ca3d
Author | SHA1 | Date | |
---|---|---|---|
|
f873e5ca3d | ||
|
bdb5d8e192 | ||
|
fd55cd1c5f | ||
|
095a8af7f2 | ||
|
5bb1bddf83 | ||
|
242f71fa75 | ||
|
6ab19d4c4d | ||
|
1ee76d4bc3 | ||
|
fac93253f2 | ||
|
a0da79591a | ||
|
deb0ec67ca | ||
|
e42cfa22db | ||
|
87f191e479 | ||
|
379bd1554a | ||
|
bbbacd421b | ||
|
6dbd89aaac | ||
|
64d98757f4 | ||
|
ae6430aa85 | ||
|
41977a726e |
20
.github/workflows/rust.yml
vendored
20
.github/workflows/rust.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "cargo test",
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "cargo test (release)",
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
# {
|
||||
# "name": "Install Nix",
|
||||
# "uses": "cachix/install-nix-action@v17",
|
||||
# "with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
# "with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
# },
|
||||
# {
|
||||
# "name": "Run Shellcheck",
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "Run Alejandra",
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "Run ShellCheck",
|
||||
@@ -125,11 +125,11 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "Run Clippy",
|
||||
"run": "nix develop --command cargo -- clippy -- -D warnings"
|
||||
"run": "nix develop --command cargo -- clippy -- -D warnings -W clippy::must_use_candidate"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "Build app",
|
||||
@@ -167,7 +167,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "Run app",
|
||||
@@ -188,7 +188,7 @@ jobs:
|
||||
{
|
||||
"name": "Install Nix",
|
||||
"uses": "cachix/install-nix-action@v17",
|
||||
"with": { "extra-nix-config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
"with": { "extra_nix_config": "access-tokens = github.com=${{ secrets.GITHUB_TOKEN }}" }
|
||||
},
|
||||
{
|
||||
"name": "Run link checker",
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -3,3 +3,4 @@ target/
|
||||
*.iml
|
||||
.vscode/
|
||||
.profile*
|
||||
.DS_Store
|
||||
|
113
Cargo.lock
generated
113
Cargo.lock
generated
@@ -26,12 +26,50 @@ dependencies = [
|
||||
"wyz",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "csv"
|
||||
version = "1.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086"
|
||||
dependencies = [
|
||||
"csv-core",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "csv-core"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "funty"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "immutable-chunkmap"
|
||||
version = "1.0.5"
|
||||
@@ -43,23 +81,44 @@ dependencies = [
|
||||
"packed_struct_codegen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.142"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317"
|
||||
|
||||
[[package]]
|
||||
name = "little_learner"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"immutable-chunkmap",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "little_learner_app"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"csv",
|
||||
"immutable-chunkmap",
|
||||
"little_learner",
|
||||
"ordered-float",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "num-traits"
|
||||
version = "0.2.15"
|
||||
@@ -99,6 +158,12 @@ dependencies = [
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.56"
|
||||
@@ -123,6 +188,48 @@ version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09"
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"rand_chacha",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_chacha"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
|
||||
dependencies = [
|
||||
"ppv-lite86",
|
||||
"rand_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.164"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
@@ -146,6 +253,12 @@ version = "1.0.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "wyz"
|
||||
version = "0.5.1"
|
||||
|
@@ -1,3 +1,5 @@
|
||||
# The Little Learner, in Rust
|
||||
|
||||
[](https://github.com/Smaug123/little_learner/actions/workflows/rust.yml)
|
||||
|
||||
Me running through [The Little Learner](https://www.thelittlelearner.com/), but in Rust instead of Scheme.
|
||||
|
@@ -8,5 +8,6 @@ edition = "2021"
|
||||
[dependencies]
|
||||
immutable-chunkmap = "1.0.5"
|
||||
ordered-float = "3.6.0"
|
||||
rand = "0.8.5"
|
||||
|
||||
[lib]
|
||||
|
File diff suppressed because it is too large
Load Diff
63
little_learner/src/block.rs
Normal file
63
little_learner/src/block.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use crate::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged};
|
||||
use crate::ext::relu;
|
||||
use crate::traits::NumLike;
|
||||
|
||||
pub struct Block<F, const N: usize> {
|
||||
f: F,
|
||||
ranks: [usize; N],
|
||||
}
|
||||
|
||||
/// Does the second argument first, so compose(b1, b2) performs b2 on its input, and then b1.
|
||||
pub fn compose<'a, 'c, 'd, A, T, B, C, F, G, const N: usize, const M: usize>(
|
||||
b1: Block<F, N>,
|
||||
b2: Block<G, M>,
|
||||
j: usize,
|
||||
) -> Block<impl FnOnce(&'a A, &'d [T]) -> C, { N + M }>
|
||||
where
|
||||
F: FnOnce(&'a A, &'d [T]) -> B,
|
||||
G: for<'b> FnOnce(&'b B, &'d [T]) -> C,
|
||||
A: 'a,
|
||||
T: 'd,
|
||||
{
|
||||
let mut ranks = [0usize; N + M];
|
||||
ranks[..N].copy_from_slice(&b1.ranks);
|
||||
ranks[N..(M + N)].copy_from_slice(&b2.ranks);
|
||||
Block {
|
||||
f: move |t, theta| {
|
||||
let intermediate = (b1.f)(t, theta);
|
||||
(b2.f)(&intermediate, &theta[j..])
|
||||
},
|
||||
ranks,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn dense<'b, A, Tag>(
|
||||
input_len: usize,
|
||||
neuron_count: usize,
|
||||
) -> Block<
|
||||
impl for<'a> FnOnce(
|
||||
&'a RankedDifferentiableTagged<A, Tag, 1>,
|
||||
&'b [Differentiable<A>],
|
||||
) -> RankedDifferentiable<A, 1>,
|
||||
2,
|
||||
>
|
||||
where
|
||||
Tag: Clone,
|
||||
A: NumLike + PartialOrd + Default,
|
||||
{
|
||||
Block {
|
||||
f: for<'a> |t: &'a RankedDifferentiableTagged<A, Tag, 1>,
|
||||
theta: &'b [Differentiable<A>]|
|
||||
-> RankedDifferentiable<A, 1> {
|
||||
relu(
|
||||
t,
|
||||
&(theta[0].clone().attach_rank().unwrap()),
|
||||
&(theta[1].clone().attach_rank().unwrap()),
|
||||
)
|
||||
.attach_rank()
|
||||
.unwrap()
|
||||
},
|
||||
ranks: [input_len, neuron_count],
|
||||
}
|
||||
}
|
@@ -1,13 +0,0 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct ConstTeq<const A: usize, const B: usize> {
|
||||
phantom_a: PhantomData<[(); A]>,
|
||||
phantom_b: PhantomData<[(); B]>,
|
||||
}
|
||||
|
||||
pub fn make<const A: usize>() -> ConstTeq<A, A> {
|
||||
ConstTeq {
|
||||
phantom_a: Default::default(),
|
||||
phantom_b: Default::default(),
|
||||
}
|
||||
}
|
68
little_learner/src/decider.rs
Normal file
68
little_learner/src/decider.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
use crate::auto_diff::RankedDifferentiableTagged;
|
||||
use crate::loss::dot;
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::{NumLike, Zero};
|
||||
|
||||
pub(crate) fn rectify<A>(x: A) -> A
|
||||
where
|
||||
A: Zero + PartialOrd,
|
||||
{
|
||||
if x < A::zero() {
|
||||
A::zero()
|
||||
} else {
|
||||
x
|
||||
}
|
||||
}
|
||||
|
||||
fn linear<A, Tag1, Tag2>(
|
||||
t: &RankedDifferentiableTagged<A, Tag1, 1>,
|
||||
theta0: &RankedDifferentiableTagged<A, Tag2, 1>,
|
||||
theta1: Scalar<A>,
|
||||
) -> Scalar<A>
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
dot(theta0, t) + theta1
|
||||
}
|
||||
|
||||
pub fn relu<A, Tag1, Tag2>(
|
||||
t: &RankedDifferentiableTagged<A, Tag1, 1>,
|
||||
theta0: &RankedDifferentiableTagged<A, Tag2, 1>,
|
||||
theta1: Scalar<A>,
|
||||
) -> Scalar<A>
|
||||
where
|
||||
A: NumLike + PartialOrd,
|
||||
{
|
||||
rectify(linear(t, theta0, theta1))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_decider {
|
||||
use crate::auto_diff::RankedDifferentiable;
|
||||
use crate::decider::{linear, relu};
|
||||
use crate::not_nan::to_not_nan_1;
|
||||
use crate::scalar::Scalar;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
#[test]
|
||||
fn test_linear() {
|
||||
let theta0 = RankedDifferentiable::of_slice(&to_not_nan_1([7.1, 4.3, -6.4]));
|
||||
let theta1 = Scalar::make(NotNan::new(0.6).expect("not nan"));
|
||||
let t = RankedDifferentiable::of_slice(&to_not_nan_1([2.0, 1.0, 3.0]));
|
||||
|
||||
let result = linear(&t, &theta0, theta1).real_part().into_inner();
|
||||
|
||||
assert!((result + 0.1).abs() < 0.000_000_01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relu() {
|
||||
let theta0 = RankedDifferentiable::of_slice(&to_not_nan_1([7.1, 4.3, -6.4]));
|
||||
let theta1 = Scalar::make(NotNan::new(0.6).expect("not nan"));
|
||||
let t = RankedDifferentiable::of_slice(&to_not_nan_1([2.0, 1.0, 3.0]));
|
||||
|
||||
let result = relu(&t, &theta0, theta1).real_part().into_inner();
|
||||
|
||||
assert_eq!(result, 0.0);
|
||||
}
|
||||
}
|
@@ -1,155 +0,0 @@
|
||||
use immutable_chunkmap::map;
|
||||
use std::ops::{Add, Mul};
|
||||
|
||||
/*
|
||||
An untyped syntax tree for an expression whose constants are all of type `A`.
|
||||
*/
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Expr<A> {
|
||||
Const(A),
|
||||
Sum(Box<Expr<A>>, Box<Expr<A>>),
|
||||
Variable(u32),
|
||||
// The first `Expr` here is a function, which may reference the input variable `Variable(i)`.
|
||||
// For example, `(fun x y -> x + y) 3 4` is expressed as:
|
||||
// Apply(0, Apply(1, Sum(Variable(0), Variable(1)), Const(4)), Const(3))
|
||||
Apply(u32, Box<Expr<A>>, Box<Expr<A>>),
|
||||
Mul(Box<Expr<A>>, Box<Expr<A>>),
|
||||
}
|
||||
|
||||
impl<A> Expr<A> {
|
||||
fn eval_inner<const SIZE: usize>(e: &Expr<A>, ctx: &map::Map<u32, A, SIZE>) -> A
|
||||
where
|
||||
A: Clone + Add<Output = A> + Mul<Output = A>,
|
||||
{
|
||||
match &e {
|
||||
Expr::Const(x) => x.clone(),
|
||||
Expr::Sum(x, y) => Expr::eval_inner(x, ctx) + Expr::eval_inner(y, ctx),
|
||||
Expr::Variable(id) => ctx
|
||||
.get(id)
|
||||
.unwrap_or_else(|| panic!("No binding found for free variable {}", id))
|
||||
.clone(),
|
||||
Expr::Apply(variable, func, arg) => {
|
||||
let arg = Expr::eval_inner(arg, ctx);
|
||||
let (updated_context, _) = ctx.insert(*variable, arg);
|
||||
Expr::eval_inner(func, &updated_context)
|
||||
}
|
||||
Expr::Mul(x, y) => Expr::eval_inner(x, ctx) * Expr::eval_inner(y, ctx),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn eval<const MAX_VAR_NUM: usize>(e: &Expr<A>) -> A
|
||||
where
|
||||
A: Clone + Add<Output = A> + Mul<Output = A>,
|
||||
{
|
||||
Expr::eval_inner(e, &map::Map::<u32, A, MAX_VAR_NUM>::new())
|
||||
}
|
||||
|
||||
pub fn apply(var: u32, f: Expr<A>, arg: Expr<A>) -> Expr<A> {
|
||||
Expr::Apply(var, Box::new(f), Box::new(arg))
|
||||
}
|
||||
|
||||
pub fn differentiate(one: &A, zero: &A, var: u32, f: &Expr<A>) -> Expr<A>
|
||||
where
|
||||
A: Clone,
|
||||
{
|
||||
match f {
|
||||
Expr::Const(_) => Expr::Const(zero.clone()),
|
||||
Expr::Sum(x, y) => {
|
||||
Expr::differentiate(one, zero, var, x) + Expr::differentiate(one, zero, var, y)
|
||||
}
|
||||
Expr::Variable(i) => {
|
||||
if *i == var {
|
||||
Expr::Const(one.clone())
|
||||
} else {
|
||||
Expr::Const(zero.clone())
|
||||
}
|
||||
}
|
||||
Expr::Mul(x, y) => {
|
||||
Expr::Mul(
|
||||
Box::new(Expr::differentiate(one, zero, var, x.as_ref())),
|
||||
(*y).clone(),
|
||||
) + Expr::Mul(
|
||||
Box::new(Expr::differentiate(one, zero, var, y.as_ref())),
|
||||
(*x).clone(),
|
||||
)
|
||||
}
|
||||
Expr::Apply(new_var, func, expr) => {
|
||||
if *new_var == var {
|
||||
panic!(
|
||||
"cannot differentiate with respect to variable {} that's been assigned",
|
||||
var
|
||||
)
|
||||
}
|
||||
let expr_deriv = Expr::differentiate(one, zero, var, expr);
|
||||
Expr::mul(
|
||||
expr_deriv,
|
||||
Expr::Apply(
|
||||
*new_var,
|
||||
Box::new(Expr::differentiate(one, zero, *new_var, func)),
|
||||
(*expr).clone(),
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Add for Expr<A> {
|
||||
type Output = Expr<A>;
|
||||
fn add(self: Expr<A>, y: Expr<A>) -> Expr<A> {
|
||||
Expr::Sum(Box::new(self), Box::new(y))
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Mul for Expr<A> {
|
||||
type Output = Expr<A>;
|
||||
fn mul(self: Expr<A>, y: Expr<A>) -> Expr<A> {
|
||||
Expr::Mul(Box::new(self), Box::new(y))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_expr() {
|
||||
let expr = Expr::apply(
|
||||
0,
|
||||
Expr::apply(1, Expr::Variable(0) + Expr::Variable(1), Expr::Const(4)),
|
||||
Expr::Const(3),
|
||||
);
|
||||
|
||||
assert_eq!(Expr::eval::<2>(&expr), 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_derivative() {
|
||||
let add_four = Expr::Variable(0) + Expr::Const(4);
|
||||
let mul_five = Expr::Variable(1) * Expr::Const(5);
|
||||
|
||||
{
|
||||
let mul_five_then_add_four = Expr::apply(0, add_four.clone(), mul_five.clone());
|
||||
let mul_then_add_diff = Expr::differentiate(&1, &0, 1, &mul_five_then_add_four);
|
||||
for i in 3..10 {
|
||||
// (5x + 4) differentiates to 5
|
||||
assert_eq!(
|
||||
Expr::eval::<2>(&Expr::apply(1, mul_then_add_diff.clone(), Expr::Const(i))),
|
||||
5
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let add_four_then_mul_five = Expr::apply(1, mul_five.clone(), add_four.clone());
|
||||
let add_then_mul_diff = Expr::differentiate(&1, &0, 0, &add_four_then_mul_five);
|
||||
for i in 3..10 {
|
||||
// ((x + 4) * 5) differentiates to 5
|
||||
assert_eq!(
|
||||
Expr::eval::<2>(&Expr::apply(0, add_then_mul_diff.clone(), Expr::Const(i))),
|
||||
5
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
442
little_learner/src/ext.rs
Normal file
442
little_learner/src/ext.rs
Normal file
@@ -0,0 +1,442 @@
|
||||
use crate::auto_diff::{
|
||||
Differentiable, DifferentiableTagged, RankedDifferentiable, RankedDifferentiableTagged,
|
||||
};
|
||||
use crate::decider::rectify;
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::{NumLike, Zero};
|
||||
use std::iter::Sum;
|
||||
use std::ops::{Add, Mul};
|
||||
|
||||
pub fn ext1<A, B, Tag, Tag2, F>(
|
||||
n: usize,
|
||||
f: &mut F,
|
||||
t: &DifferentiableTagged<A, Tag>,
|
||||
) -> DifferentiableTagged<B, Tag2>
|
||||
where
|
||||
F: FnMut(&DifferentiableTagged<A, Tag>) -> DifferentiableTagged<B, Tag2>,
|
||||
{
|
||||
if t.rank() == n {
|
||||
f(t)
|
||||
} else {
|
||||
t.map_once_tagged(|x| ext1(n, f, x))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ext2<A, B, C, Tag, Tag2, Tag3, F>(
|
||||
n: usize,
|
||||
m: usize,
|
||||
f: &mut F,
|
||||
t: &DifferentiableTagged<A, Tag>,
|
||||
u: &DifferentiableTagged<B, Tag2>,
|
||||
) -> DifferentiableTagged<C, Tag3>
|
||||
where
|
||||
F: FnMut(
|
||||
&DifferentiableTagged<A, Tag>,
|
||||
&DifferentiableTagged<B, Tag2>,
|
||||
) -> DifferentiableTagged<C, Tag3>,
|
||||
A: Clone,
|
||||
Tag: Clone,
|
||||
B: Clone,
|
||||
Tag2: Clone,
|
||||
{
|
||||
if t.rank() == n && u.rank() == m {
|
||||
f(t, u)
|
||||
} else if t.rank() == n {
|
||||
u.map_once_tagged(|eu| ext2(n, m, f, t, eu))
|
||||
} else if u.rank() == m {
|
||||
t.map_once_tagged(|et| ext2(n, m, f, et, u))
|
||||
} else if t.rank() == u.rank() {
|
||||
t.map2_once_tagged(u, |t, u| ext2(n, m, f, t, u))
|
||||
} else if t.rank() > u.rank() {
|
||||
t.map_once_tagged(|et| ext2(n, m, f, et, u))
|
||||
} else {
|
||||
u.map_once_tagged(|eu| ext2(n, m, f, t, eu))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn elementwise_mul_via_ext<A, Tag, Tag2, const RANK1: usize, const RANK2: usize>(
|
||||
x: &RankedDifferentiableTagged<A, Tag, RANK1>,
|
||||
y: &RankedDifferentiableTagged<A, Tag2, RANK2>,
|
||||
) -> RankedDifferentiable<A, RANK1>
|
||||
where
|
||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||
Tag: Clone,
|
||||
Tag2: Clone,
|
||||
{
|
||||
ext2(
|
||||
0,
|
||||
0,
|
||||
&mut |x, y| {
|
||||
DifferentiableTagged::of_scalar(x.borrow_scalar().clone() * y.borrow_scalar().clone())
|
||||
},
|
||||
x.to_unranked_borrow(),
|
||||
y.to_unranked_borrow(),
|
||||
)
|
||||
.attach_rank::<RANK1>()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Produce the element-wise multiplication of the inputs, threading where necessary until the
|
||||
/// first argument has rank 2 and the second argument has rank 1.
|
||||
/// This is essentially "matrix-multiply a matrix by a vector, but don't do the sum; instead
|
||||
/// leave the components to be summed in a vector".
|
||||
pub fn star_2_1<T, Tag, Tag2>(
|
||||
x: &DifferentiableTagged<T, Tag>,
|
||||
y: &DifferentiableTagged<T, Tag2>,
|
||||
) -> Differentiable<T>
|
||||
where
|
||||
T: Clone + Sum + Mul<Output = T> + Default,
|
||||
Tag: Clone,
|
||||
Tag2: Clone,
|
||||
{
|
||||
ext2(
|
||||
2,
|
||||
1,
|
||||
&mut |x, y| {
|
||||
elementwise_mul_via_ext(
|
||||
&x.clone().attach_rank::<2>().unwrap(),
|
||||
&y.clone().attach_rank::<1>().unwrap(),
|
||||
)
|
||||
.to_unranked()
|
||||
},
|
||||
x,
|
||||
y,
|
||||
)
|
||||
}
|
||||
|
||||
fn sum_1_scalar<A, Tag>(x: RankedDifferentiableTagged<A, Tag, 1>) -> Scalar<A>
|
||||
where
|
||||
A: Sum<A> + Clone + Add<Output = A> + Zero,
|
||||
{
|
||||
RankedDifferentiableTagged::to_vector(x)
|
||||
.into_iter()
|
||||
.map(|x| x.to_scalar())
|
||||
.sum()
|
||||
}
|
||||
|
||||
pub fn sum_1<A, Tag>(x: RankedDifferentiableTagged<A, Tag, 1>) -> Differentiable<A>
|
||||
where
|
||||
A: Sum<A> + Clone + Add<Output = A> + Zero,
|
||||
{
|
||||
DifferentiableTagged::of_scalar(sum_1_scalar(x))
|
||||
}
|
||||
|
||||
pub fn sum<T>(x: &Differentiable<T>) -> Differentiable<T>
|
||||
where
|
||||
T: Sum<T> + Clone + Add<Output = T> + Zero,
|
||||
{
|
||||
ext1(1, &mut |y| sum_1(y.clone().attach_rank::<1>().unwrap()), x)
|
||||
}
|
||||
|
||||
/// Matrix-multiply W with T, threading where necessary until the first argument has rank 2 and the
|
||||
/// second argument has rank 1.
|
||||
pub fn dot_2_1<A, Tag, Tag2>(
|
||||
w: &DifferentiableTagged<A, Tag>,
|
||||
t: &DifferentiableTagged<A, Tag2>,
|
||||
) -> Differentiable<A>
|
||||
where
|
||||
A: NumLike + Default,
|
||||
Tag: Clone,
|
||||
Tag2: Clone,
|
||||
{
|
||||
assert!(
|
||||
w.rank() >= 2,
|
||||
"w needed to have rank 2 or more, was {}",
|
||||
w.rank()
|
||||
);
|
||||
assert!(
|
||||
t.rank() >= 1,
|
||||
"t needed to have rank 1 or more, was {}",
|
||||
t.rank()
|
||||
);
|
||||
sum(&star_2_1(w, t))
|
||||
}
|
||||
|
||||
pub fn linear<A, Tag1, Tag2, Tag3>(
|
||||
theta0: &DifferentiableTagged<A, Tag1>,
|
||||
theta1: &DifferentiableTagged<A, Tag2>,
|
||||
t: &DifferentiableTagged<A, Tag3>,
|
||||
) -> DifferentiableTagged<A, ()>
|
||||
where
|
||||
A: NumLike + Default,
|
||||
Tag1: Clone,
|
||||
Tag2: Clone,
|
||||
Tag3: Clone,
|
||||
{
|
||||
dot_2_1(theta0, t).map2_tagged(theta1, &mut |x, _, y, _| (x.clone() + y.clone(), ()))
|
||||
}
|
||||
|
||||
pub fn relu<A, Tag1, Tag2, Tag3>(
|
||||
t: &RankedDifferentiableTagged<A, Tag1, 1>,
|
||||
theta0: &RankedDifferentiableTagged<A, Tag2, 2>,
|
||||
theta1: &RankedDifferentiableTagged<A, Tag3, 1>,
|
||||
) -> Differentiable<A>
|
||||
where
|
||||
A: NumLike + PartialOrd + Default,
|
||||
Tag1: Clone,
|
||||
Tag2: Clone,
|
||||
Tag3: Clone,
|
||||
{
|
||||
linear(
|
||||
theta0.to_unranked_borrow(),
|
||||
theta1.to_unranked_borrow(),
|
||||
t.to_unranked_borrow(),
|
||||
)
|
||||
.map(&mut rectify)
|
||||
}
|
||||
|
||||
pub fn k_relu<A, Tag>(
|
||||
t: &RankedDifferentiableTagged<A, Tag, 1>,
|
||||
theta: &[Differentiable<A>],
|
||||
) -> Differentiable<A>
|
||||
where
|
||||
Tag: Clone,
|
||||
A: NumLike + PartialOrd + Default,
|
||||
{
|
||||
assert!(theta.len() < 2, "Needed at least 2 parameters for k_relu");
|
||||
let once = relu(
|
||||
t,
|
||||
&theta[0].clone().attach_rank::<2>().unwrap(),
|
||||
&theta[1].clone().attach_rank::<1>().unwrap(),
|
||||
);
|
||||
if theta.len() == 2 {
|
||||
once
|
||||
} else {
|
||||
k_relu(&once.attach_rank().unwrap(), &theta[2..])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::auto_diff::{Differentiable, RankedDifferentiable};
|
||||
use crate::ext::{dot_2_1, ext1, relu, star_2_1};
|
||||
use crate::not_nan::{to_not_nan_1, to_not_nan_2};
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::Zero;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
fn zeros_redefined<A>(t: &Differentiable<A>) -> Differentiable<A>
|
||||
where
|
||||
A: Zero,
|
||||
{
|
||||
ext1(
|
||||
0,
|
||||
&mut |_| Differentiable::of_scalar(Scalar::make(A::zero())),
|
||||
t,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn define_zeros() {
|
||||
let shape = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[1.0, 2.0],
|
||||
[3.0, 4.0],
|
||||
[5.0, 6.0],
|
||||
]));
|
||||
let zeros = zeros_redefined(&shape.to_unranked());
|
||||
let to_zeros = zeros
|
||||
.attach_rank::<2>()
|
||||
.unwrap()
|
||||
.to_vector()
|
||||
.iter()
|
||||
.map(|x| {
|
||||
(*x).clone()
|
||||
.to_vector()
|
||||
.iter()
|
||||
.map(|x| (*x).clone().to_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(to_zeros, [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]])
|
||||
}
|
||||
|
||||
fn flatten_2<A>(t: RankedDifferentiable<A, 2>) -> RankedDifferentiable<A, 1>
|
||||
where
|
||||
A: Clone,
|
||||
{
|
||||
let mut result = Vec::new();
|
||||
for v in t.to_unranked_borrow().borrow_vector() {
|
||||
result.extend((*v.borrow_vector()).clone())
|
||||
}
|
||||
Differentiable::of_vec(result).attach_rank::<1>().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_2() {
|
||||
let input = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[1.0, 0.5],
|
||||
[3.1, 2.2],
|
||||
[7.3, 2.1],
|
||||
]));
|
||||
let flattened = flatten_2(input);
|
||||
let reshaped = flattened
|
||||
.to_vector()
|
||||
.iter()
|
||||
.map(|x| (*x).clone().to_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(reshaped, [1.0, 0.5, 3.1, 2.2, 7.3, 2.1])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten() {
|
||||
let flatten = |t: &Differentiable<NotNan<f64>>| {
|
||||
ext1(
|
||||
2,
|
||||
&mut |t| flatten_2((*t).clone().attach_rank::<2>().unwrap()).to_unranked(),
|
||||
t,
|
||||
)
|
||||
};
|
||||
let input = RankedDifferentiable::of_vector(vec![
|
||||
RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[1.0, 0.5],
|
||||
[3.1, 2.2],
|
||||
[7.3, 2.1],
|
||||
])),
|
||||
RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[2.9, 3.5],
|
||||
[0.7, 1.5],
|
||||
[2.5, 6.4],
|
||||
])),
|
||||
]);
|
||||
|
||||
let flattened = flatten(&input.to_unranked())
|
||||
.attach_rank::<2>()
|
||||
.unwrap()
|
||||
.to_vector()
|
||||
.iter()
|
||||
.map(|i| {
|
||||
i.to_unranked_borrow()
|
||||
.borrow_vector()
|
||||
.iter()
|
||||
.map(|j| j.borrow_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
flattened,
|
||||
[
|
||||
[1.0, 0.5, 3.1, 2.2, 7.3, 2.1],
|
||||
[2.9, 3.5, 0.7, 1.5, 2.5, 6.4]
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_star_2_1_a() {
|
||||
let input1 = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[3.0, 4.0, 5.0],
|
||||
[7.0, 8.0, 9.0],
|
||||
]));
|
||||
let input2 = RankedDifferentiable::of_slice(&to_not_nan_1([2.0, 4.0, 3.0]));
|
||||
|
||||
let output = star_2_1(input1.to_unranked_borrow(), input2.to_unranked_borrow())
|
||||
.into_vector()
|
||||
.iter()
|
||||
.map(|x| {
|
||||
x.clone()
|
||||
.into_vector()
|
||||
.iter()
|
||||
.map(|i| i.clone().into_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(output, [[6.0, 16.0, 15.0], [14.0, 32.0, 27.0]])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_star_2_1_b() {
|
||||
let input1 = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[8.0, 1.0],
|
||||
[7.0, 3.0],
|
||||
[5.0, 4.0],
|
||||
]));
|
||||
let input2 = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[6.0, 2.0],
|
||||
[4.0, 9.0],
|
||||
[3.0, 8.0],
|
||||
]));
|
||||
|
||||
let output = star_2_1(input1.to_unranked_borrow(), input2.to_unranked_borrow())
|
||||
.into_vector()
|
||||
.iter()
|
||||
.map(|x| {
|
||||
x.clone()
|
||||
.into_vector()
|
||||
.iter()
|
||||
.map(|i| {
|
||||
i.clone()
|
||||
.into_vector()
|
||||
.iter()
|
||||
.map(|i| i.borrow_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
output,
|
||||
[
|
||||
[[48.0, 2.0], [42.0, 6.0], [30.0, 8.0]],
|
||||
[[32.0, 9.0], [28.0, 27.0], [20.0, 36.0]],
|
||||
[[24.0, 8.0], [21.0, 24.0], [15.0, 32.0]]
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dot_2_1() {
|
||||
let w = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[2.0, 1.0, 3.1],
|
||||
[3.7, 4.0, 6.1],
|
||||
]));
|
||||
let t = RankedDifferentiable::of_slice(&to_not_nan_1([1.3, 0.4, 3.3]));
|
||||
|
||||
let result = dot_2_1(w.to_unranked_borrow(), t.to_unranked_borrow())
|
||||
.attach_rank::<1>()
|
||||
.unwrap()
|
||||
.to_vector()
|
||||
.iter()
|
||||
.map(|x| x.clone().to_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(result, [13.23, 26.54])
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_relu() {
|
||||
let weights = to_not_nan_2([
|
||||
[7.1, 4.3, -6.4],
|
||||
[1.0, 2.0, 3.0],
|
||||
[4.0, 5.0, 6.0],
|
||||
[-1.3, -2.4, -3.6],
|
||||
]);
|
||||
let biases = to_not_nan_1([10.2, 11.3, 12.4, 13.5]);
|
||||
let inputs = to_not_nan_1([7.0, 8.0, 9.0]);
|
||||
let theta0 = RankedDifferentiable::of_slice_2::<_, 2>(&weights);
|
||||
let theta1 = RankedDifferentiable::of_slice(&biases);
|
||||
let t = RankedDifferentiable::of_slice(&inputs);
|
||||
|
||||
let result = relu(&t, &theta0, &theta1)
|
||||
.into_vector()
|
||||
.iter()
|
||||
.map(|x| x.borrow_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut expected = Vec::new();
|
||||
for (weights, bias) in weights.iter().zip(biases.iter()) {
|
||||
expected.push(
|
||||
crate::decider::relu(
|
||||
&t,
|
||||
&RankedDifferentiable::of_slice(weights),
|
||||
Scalar::make(*bias),
|
||||
)
|
||||
.clone_real_part()
|
||||
.into_inner(),
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
462
little_learner/src/gradient_descent.rs
Normal file
462
little_learner/src/gradient_descent.rs
Normal file
@@ -0,0 +1,462 @@
|
||||
use crate::auto_diff::{grad, Differentiable, RankedDifferentiable};
|
||||
use crate::hyper;
|
||||
use crate::loss::l2_loss_2;
|
||||
use crate::predictor::Predictor;
|
||||
use crate::sample;
|
||||
use crate::traits::NumLike;
|
||||
use rand::Rng;
|
||||
use std::hash::Hash;
|
||||
|
||||
fn iterate<A, F>(mut f: F, start: A, n: u32) -> A
|
||||
where
|
||||
F: FnMut(A) -> A,
|
||||
{
|
||||
let mut v = start;
|
||||
for _ in 0..n {
|
||||
v = f(v);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
/// `adjust` takes the previous value and a delta, and returns a deflated new value.
|
||||
fn general_gradient_descent_step<
|
||||
A,
|
||||
F,
|
||||
Inflated,
|
||||
Deflate,
|
||||
Adjust,
|
||||
Hyper,
|
||||
const RANK: usize,
|
||||
const PARAM_NUM: usize,
|
||||
>(
|
||||
f: &mut F,
|
||||
theta: [Inflated; PARAM_NUM],
|
||||
deflate: Deflate,
|
||||
hyper: Hyper,
|
||||
mut adjust: Adjust,
|
||||
) -> [Inflated; PARAM_NUM]
|
||||
where
|
||||
A: Clone + NumLike + Hash + Eq,
|
||||
F: FnMut(&[Differentiable<A>; PARAM_NUM]) -> RankedDifferentiable<A, RANK>,
|
||||
Deflate: FnMut(Inflated) -> Differentiable<A>,
|
||||
Inflated: Clone,
|
||||
Hyper: Clone,
|
||||
Adjust: FnMut(Inflated, &Differentiable<A>, Hyper) -> Inflated,
|
||||
{
|
||||
let deflated = theta.clone().map(deflate);
|
||||
let delta = grad(f, &deflated);
|
||||
let mut i = 0;
|
||||
theta.map(|inflated| {
|
||||
let delta = &delta[i];
|
||||
i += 1;
|
||||
adjust(inflated, delta, hyper.clone())
|
||||
})
|
||||
}
|
||||
|
||||
pub fn gradient_descent<
|
||||
'a,
|
||||
T,
|
||||
R,
|
||||
Point,
|
||||
F,
|
||||
G,
|
||||
H,
|
||||
Inflated,
|
||||
Hyper,
|
||||
ImmutableHyper,
|
||||
const IN_SIZE: usize,
|
||||
const PARAM_NUM: usize,
|
||||
>(
|
||||
hyper: Hyper,
|
||||
xs: &'a [Point],
|
||||
to_ranked_differentiable: G,
|
||||
ys: &[T],
|
||||
zero_params: [Differentiable<T>; PARAM_NUM],
|
||||
mut predictor: Predictor<F, Inflated, Differentiable<T>, ImmutableHyper>,
|
||||
to_immutable: H,
|
||||
) -> [Differentiable<T>; PARAM_NUM]
|
||||
where
|
||||
T: NumLike + Hash + Copy + Default,
|
||||
Point: 'a + Copy,
|
||||
F: Fn(
|
||||
RankedDifferentiable<T, IN_SIZE>,
|
||||
&[Differentiable<T>; PARAM_NUM],
|
||||
) -> RankedDifferentiable<T, 1>,
|
||||
G: for<'b> Fn(&'b [Point]) -> RankedDifferentiable<T, IN_SIZE>,
|
||||
Inflated: Clone,
|
||||
ImmutableHyper: Clone,
|
||||
Hyper: Into<hyper::BaseGradientDescent<R>>,
|
||||
H: FnOnce(&Hyper) -> ImmutableHyper,
|
||||
R: Rng,
|
||||
{
|
||||
let sub_hypers = to_immutable(&hyper);
|
||||
let mut gradient_hyper: hyper::BaseGradientDescent<R> = hyper.into();
|
||||
let iterations = gradient_hyper.iterations;
|
||||
let out = iterate(
|
||||
|theta| {
|
||||
general_gradient_descent_step(
|
||||
&mut |x| match gradient_hyper.sampling.as_mut() {
|
||||
None => RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||
l2_loss_2(
|
||||
&predictor.predict,
|
||||
to_ranked_differentiable(xs),
|
||||
RankedDifferentiable::of_slice(ys),
|
||||
x,
|
||||
),
|
||||
)]),
|
||||
Some((rng, batch_size)) => {
|
||||
let (sampled_xs, sampled_ys) = sample::take_2(rng, *batch_size, xs, ys);
|
||||
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||
l2_loss_2(
|
||||
&predictor.predict,
|
||||
to_ranked_differentiable(&sampled_xs),
|
||||
RankedDifferentiable::of_slice(&sampled_ys),
|
||||
x,
|
||||
),
|
||||
)])
|
||||
}
|
||||
},
|
||||
theta,
|
||||
predictor.deflate,
|
||||
sub_hypers.clone(),
|
||||
predictor.update,
|
||||
)
|
||||
},
|
||||
zero_params.map(predictor.inflate),
|
||||
iterations,
|
||||
);
|
||||
out.map(&mut predictor.deflate)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::auto_diff::RankedDifferentiableTagged;
|
||||
use crate::hyper;
|
||||
use crate::loss::{predict_line_2_unranked, predict_plane, predict_quadratic_unranked};
|
||||
use crate::not_nan::{to_not_nan_1, to_not_nan_2};
|
||||
use crate::predictor;
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::Zero;
|
||||
use ordered_float::NotNan;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
#[test]
|
||||
fn test_iterate() {
|
||||
let f = |t: [i32; 3]| t.map(|i| i - 3);
|
||||
assert_eq!(iterate(f, [1, 2, 3], 5u32), [-14, -13, -12]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_optimisation_test() {
|
||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||
|
||||
let zero = Scalar::<NotNan<f64>>::zero();
|
||||
|
||||
let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.01).expect("not nan"), 1000);
|
||||
let iterated = {
|
||||
let xs = to_not_nan_1(xs);
|
||||
let ys = to_not_nan_1(ys);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||
];
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
|b| RankedDifferentiable::of_slice(b),
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::naked(predict_line_2_unranked),
|
||||
hyper::NakedGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
let iterated = iterated
|
||||
.into_iter()
|
||||
.map(|x| x.into_scalar().real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimise_quadratic() {
|
||||
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
||||
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
||||
|
||||
let zero = Scalar::<NotNan<f64>>::zero();
|
||||
|
||||
let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000);
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_1(xs);
|
||||
let ys = to_not_nan_1(ys);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||
];
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
|b| RankedDifferentiable::of_slice(b),
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::naked(predict_quadratic_unranked),
|
||||
hyper::NakedGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
let iterated = iterated
|
||||
.into_iter()
|
||||
.map(|x| x.into_scalar().real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
iterated,
|
||||
[2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
|
||||
);
|
||||
}
|
||||
|
||||
const PLANE_XS: [[f64; 2]; 6] = [
|
||||
[1.0, 2.05],
|
||||
[1.0, 3.0],
|
||||
[2.0, 2.0],
|
||||
[2.0, 3.91],
|
||||
[3.0, 6.13],
|
||||
[4.0, 8.09],
|
||||
];
|
||||
const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
|
||||
|
||||
#[test]
|
||||
fn optimise_plane() {
|
||||
let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000);
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_2(PLANE_XS);
|
||||
let ys = to_not_nan_1(PLANE_YS);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
];
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
RankedDifferentiable::of_slice_2::<_, 2>,
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::naked(predict_plane),
|
||||
hyper::NakedGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
assert_eq!(theta0.collect(), [3.97757644609063, 2.0496557321494446]);
|
||||
assert_eq!(
|
||||
theta1.to_scalar().real_part().into_inner(),
|
||||
5.786758464448078
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimise_plane_with_sampling() {
|
||||
let rng = StdRng::seed_from_u64(314159);
|
||||
let hyper = hyper::NakedGradientDescent::new(NotNan::new(0.001).expect("not nan"), 1000)
|
||||
.with_rng(rng, 4);
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_2(PLANE_XS);
|
||||
let ys = to_not_nan_1(PLANE_YS);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_slice(&[NotNan::zero(), NotNan::zero()]).to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
];
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
RankedDifferentiable::of_slice_2::<_, 2>,
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::naked(predict_plane),
|
||||
hyper::NakedGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor").collect();
|
||||
let theta1 = theta1
|
||||
.attach_rank::<0>()
|
||||
.expect("rank 0 tensor")
|
||||
.to_scalar()
|
||||
.real_part()
|
||||
.into_inner();
|
||||
|
||||
/*
|
||||
Mathematica code to verify by eye that the optimisation gave a reasonable result:
|
||||
|
||||
xs = {{1.0, 2.05}, {1.0, 3.0}, {2.0, 2.0}, {2.0, 3.91}, {3.0,
|
||||
6.13}, {4.0, 8.09}};
|
||||
ys = {13.99, 15.99, 18.0, 22.4, 30.2, 37.94};
|
||||
points = ListPointPlot3D[Append @@@ Transpose[{xs, ys}]];
|
||||
|
||||
withoutBatching0 = {3.97757644609063, 2.0496557321494446};
|
||||
withoutBatching1 = 5.2839863438547159;
|
||||
withoutBatching =
|
||||
Plot3D[{x, y} . withoutBatching0 + withoutBatching1, {x, 0, 4}, {y,
|
||||
0, 8}];
|
||||
|
||||
withBatching0 = {3.8581694055684781, 2.2166222673968554};
|
||||
withBatching1 = 5.2399202468216668;
|
||||
withBatching =
|
||||
Plot3D[{x, y} . withBatching0 + withBatching1, {x, 0, 4}, {y, 0, 8}];
|
||||
|
||||
Show[points, withoutBatching]
|
||||
|
||||
Show[points, withBatching]
|
||||
*/
|
||||
|
||||
assert_eq!(theta0, [3.858_169_405_568_478, 2.2166222673968554]);
|
||||
assert_eq!(theta1, 5.283_986_343_854_716);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_velocity() {
|
||||
let hyper = hyper::VelocityGradientDescent::zero_momentum(
|
||||
NotNan::new(0.001).expect("not nan"),
|
||||
1000,
|
||||
)
|
||||
.with_mu(NotNan::new(0.9).expect("not nan"));
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_2(PLANE_XS);
|
||||
let ys = to_not_nan_1(PLANE_YS);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()])
|
||||
.to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
];
|
||||
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
RankedDifferentiableTagged::of_slice_2::<_, 2>,
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::velocity(predict_plane),
|
||||
hyper::VelocityGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
assert_eq!(theta0.collect(), [3.979645447136021, 1.976454920954754]);
|
||||
assert_eq!(
|
||||
theta1.to_scalar().real_part().into_inner(),
|
||||
6.169579045974949
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_rms() {
|
||||
let beta = NotNan::new(0.9).expect("not nan");
|
||||
let stabilizer = NotNan::new(0.00000001).expect("not nan");
|
||||
let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.01).expect("not nan"), 3000)
|
||||
.with_stabilizer(stabilizer)
|
||||
.with_beta(beta);
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_2(PLANE_XS);
|
||||
let ys = to_not_nan_1(PLANE_YS);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()])
|
||||
.to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
];
|
||||
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
RankedDifferentiableTagged::of_slice_2::<_, 2>,
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::rms(predict_plane),
|
||||
hyper::RmsGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
let fitted_theta0 = theta0
|
||||
.collect()
|
||||
.iter()
|
||||
.map(|x| x.into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
|
||||
assert_eq!(
|
||||
fitted_theta0,
|
||||
[3.974_645_444_172_085, 1.971_454_922_077_495]
|
||||
);
|
||||
assert_eq!(fitted_theta1, 6.164_579_048_274_036);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_with_adam() {
|
||||
let beta = NotNan::new(0.9).expect("not nan");
|
||||
let stabilizer = NotNan::new(0.00000001).expect("not nan");
|
||||
let mu = NotNan::new(0.85).expect("not nan");
|
||||
// Erratum in the book: they printed 0.001 but intended 0.01.
|
||||
let hyper = hyper::AdamGradientDescent::default(NotNan::new(0.01).expect("not nan"), 1500)
|
||||
.with_stabilizer(stabilizer)
|
||||
.with_beta(beta)
|
||||
.with_mu(mu);
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_2(PLANE_XS);
|
||||
let ys = to_not_nan_1(PLANE_YS);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()])
|
||||
.to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
];
|
||||
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
RankedDifferentiableTagged::of_slice_2::<_, 2>,
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::adam(predict_plane),
|
||||
hyper::AdamGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
let fitted_theta0 = theta0
|
||||
.collect()
|
||||
.iter()
|
||||
.map(|x| x.into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
|
||||
assert_eq!(
|
||||
fitted_theta0,
|
||||
[3.980_262_420_345_729_5, 1.977_071_898_301_444]
|
||||
);
|
||||
assert_eq!(fitted_theta1, 6.170_196_024_282_712_5);
|
||||
}
|
||||
}
|
265
little_learner/src/hyper.rs
Normal file
265
little_learner/src/hyper.rs
Normal file
@@ -0,0 +1,265 @@
|
||||
use crate::predictor::{AdamHyper, NakedHypers, RmsHyper, VelocityHypers};
|
||||
use crate::traits::{NumLike, Zero};
|
||||
use rand::rngs::StdRng;
|
||||
|
||||
/// Hyperparameters which apply to any possible optimisation algorithm that uses gradient descent.
|
||||
pub struct BaseGradientDescent<Rng> {
|
||||
pub sampling: Option<(Rng, usize)>,
|
||||
pub iterations: u32,
|
||||
}
|
||||
|
||||
impl BaseGradientDescent<StdRng> {
|
||||
#[must_use]
|
||||
pub fn new(iterations: u32) -> BaseGradientDescent<StdRng> {
|
||||
BaseGradientDescent {
|
||||
sampling: None,
|
||||
iterations,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Rng> BaseGradientDescent<Rng> {
|
||||
#[must_use]
|
||||
pub fn with_rng<Rng2>(self, rng: Rng2, size: usize) -> BaseGradientDescent<Rng2> {
|
||||
BaseGradientDescent {
|
||||
iterations: self.iterations,
|
||||
sampling: Some((rng, size)),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_iterations(self, n: u32) -> Self {
|
||||
BaseGradientDescent {
|
||||
sampling: self.sampling,
|
||||
iterations: n,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct NakedGradientDescent<A, Rng> {
|
||||
base: BaseGradientDescent<Rng>,
|
||||
naked: NakedHypers<A>,
|
||||
}
|
||||
|
||||
impl<A> NakedGradientDescent<A, StdRng>
|
||||
where
|
||||
A: Zero,
|
||||
{
|
||||
#[must_use]
|
||||
pub fn new(learning_rate: A, iterations: u32) -> Self {
|
||||
NakedGradientDescent {
|
||||
base: BaseGradientDescent::new(iterations),
|
||||
naked: NakedHypers { learning_rate },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> NakedGradientDescent<A, Rng> {
|
||||
pub fn to_immutable(&self) -> NakedHypers<A>
|
||||
where
|
||||
A: Clone,
|
||||
{
|
||||
self.naked.clone()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_rng<Rng2>(self, rng: Rng2, size: usize) -> NakedGradientDescent<A, Rng2> {
|
||||
NakedGradientDescent {
|
||||
base: self.base.with_rng(rng, size),
|
||||
naked: self.naked,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> From<NakedGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
|
||||
fn from(val: NakedGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
|
||||
val.base
|
||||
}
|
||||
}
|
||||
|
||||
pub struct VelocityGradientDescent<A, Rng> {
|
||||
base: BaseGradientDescent<Rng>,
|
||||
velocity: VelocityHypers<A>,
|
||||
}
|
||||
|
||||
impl<A> VelocityGradientDescent<A, StdRng>
|
||||
where
|
||||
A: Zero,
|
||||
{
|
||||
#[must_use]
|
||||
pub fn zero_momentum(learning_rate: A, iterations: u32) -> Self {
|
||||
VelocityGradientDescent {
|
||||
base: BaseGradientDescent::new(iterations),
|
||||
velocity: VelocityHypers {
|
||||
learning_rate,
|
||||
mu: A::zero(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> VelocityGradientDescent<A, Rng> {
|
||||
#[must_use]
|
||||
pub fn with_mu(self, mu: A) -> Self {
|
||||
VelocityGradientDescent {
|
||||
base: self.base,
|
||||
velocity: VelocityHypers {
|
||||
learning_rate: self.velocity.learning_rate,
|
||||
mu,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_immutable(&self) -> VelocityHypers<A>
|
||||
where
|
||||
A: Clone,
|
||||
{
|
||||
self.velocity.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> From<VelocityGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
|
||||
fn from(val: VelocityGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
|
||||
val.base
|
||||
}
|
||||
}
|
||||
|
||||
fn ten<A>() -> A
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
let two = A::one() + A::one();
|
||||
two.clone() * two.clone() * two.clone() + two
|
||||
}
|
||||
|
||||
fn one_ten_k<A>() -> A
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
let one_tenth = A::one() / ten();
|
||||
let one_hundredth = one_tenth.clone() * one_tenth;
|
||||
one_hundredth.clone() * one_hundredth
|
||||
}
|
||||
|
||||
pub struct RmsGradientDescent<A, Rng> {
|
||||
base: BaseGradientDescent<Rng>,
|
||||
rms: RmsHyper<A>,
|
||||
}
|
||||
|
||||
impl<A> RmsGradientDescent<A, StdRng> {
|
||||
pub fn default(learning_rate: A, iterations: u32) -> Self
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
RmsGradientDescent {
|
||||
base: BaseGradientDescent::new(iterations),
|
||||
rms: RmsHyper {
|
||||
stabilizer: one_ten_k::<A>() * one_ten_k(),
|
||||
beta: A::one() + -(A::one() / ten()),
|
||||
learning_rate,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> RmsGradientDescent<A, Rng> {
|
||||
#[must_use]
|
||||
pub fn with_stabilizer(self, stabilizer: A) -> Self {
|
||||
RmsGradientDescent {
|
||||
base: self.base,
|
||||
rms: RmsHyper {
|
||||
stabilizer,
|
||||
beta: self.rms.beta,
|
||||
learning_rate: self.rms.learning_rate,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_beta(self, beta: A) -> Self {
|
||||
RmsGradientDescent {
|
||||
base: self.base,
|
||||
rms: RmsHyper {
|
||||
stabilizer: self.rms.stabilizer,
|
||||
beta,
|
||||
learning_rate: self.rms.learning_rate,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_immutable(&self) -> RmsHyper<A>
|
||||
where
|
||||
A: Clone,
|
||||
{
|
||||
self.rms.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> From<RmsGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
|
||||
fn from(val: RmsGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
|
||||
val.base
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AdamGradientDescent<A, Rng> {
|
||||
base: BaseGradientDescent<Rng>,
|
||||
adam: AdamHyper<A>,
|
||||
}
|
||||
|
||||
impl<A> AdamGradientDescent<A, StdRng> {
|
||||
pub fn default(learning_rate: A, iterations: u32) -> Self
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
AdamGradientDescent {
|
||||
base: BaseGradientDescent::new(iterations),
|
||||
adam: AdamHyper {
|
||||
mu: A::zero(),
|
||||
rms: RmsHyper {
|
||||
learning_rate,
|
||||
stabilizer: one_ten_k::<A>() * one_ten_k(),
|
||||
beta: A::one() + -(A::one() / ten()),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> AdamGradientDescent<A, Rng> {
|
||||
#[must_use]
|
||||
pub fn with_stabilizer(self, stabilizer: A) -> Self {
|
||||
AdamGradientDescent {
|
||||
base: self.base,
|
||||
adam: self.adam.with_stabilizer(stabilizer),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_beta(self, beta: A) -> Self {
|
||||
AdamGradientDescent {
|
||||
base: self.base,
|
||||
adam: self.adam.with_beta(beta),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_mu(self, mu: A) -> Self {
|
||||
AdamGradientDescent {
|
||||
base: self.base,
|
||||
adam: self.adam.with_mu(mu),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_immutable(&self) -> AdamHyper<A>
|
||||
where
|
||||
A: Clone,
|
||||
{
|
||||
self.adam.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, Rng> From<AdamGradientDescent<A, Rng>> for BaseGradientDescent<Rng> {
|
||||
fn from(val: AdamGradientDescent<A, Rng>) -> BaseGradientDescent<Rng> {
|
||||
val.base
|
||||
}
|
||||
}
|
75
little_learner/src/layer.rs
Normal file
75
little_learner/src/layer.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use crate::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged};
|
||||
use crate::decider::relu;
|
||||
use crate::traits::NumLike;
|
||||
|
||||
/// Returns a tensor1.
|
||||
/// Theta has two components: a tensor2 of weights and a tensor1 of bias.
|
||||
pub fn layer<T>(
|
||||
theta: Differentiable<T>,
|
||||
t: RankedDifferentiable<T, 1>,
|
||||
) -> RankedDifferentiable<T, 1>
|
||||
where
|
||||
T: NumLike + PartialOrd,
|
||||
{
|
||||
let mut theta = theta.into_vector();
|
||||
assert_eq!(theta.len(), 2, "Needed weights and a bias");
|
||||
let b = theta.pop().unwrap().attach_rank::<1>().unwrap();
|
||||
let w = theta.pop().unwrap().attach_rank::<2>().unwrap();
|
||||
|
||||
RankedDifferentiableTagged::map2_once(
|
||||
&w,
|
||||
&b,
|
||||
&mut |w: &RankedDifferentiable<_, 1>, b: &RankedDifferentiable<_, 0>| {
|
||||
RankedDifferentiableTagged::of_scalar(relu(&t, w, b.clone().to_scalar()))
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::auto_diff::{Differentiable, RankedDifferentiable};
|
||||
use crate::layer::layer;
|
||||
use crate::not_nan::{to_not_nan_1, to_not_nan_2};
|
||||
|
||||
#[test]
|
||||
fn test_single_layer() {
|
||||
let b = RankedDifferentiable::of_slice(&to_not_nan_1([1.0, 2.0]));
|
||||
let w = RankedDifferentiable::of_slice_2::<_, 2>(&to_not_nan_2([
|
||||
[3.0, 4.0, 5.0],
|
||||
[6.0, 7.0, 8.0],
|
||||
]));
|
||||
let theta = Differentiable::of_vec(vec![w.to_unranked(), b.to_unranked()]);
|
||||
|
||||
/*
|
||||
Two neurons:
|
||||
w =
|
||||
(3 4 5
|
||||
6 7 8)
|
||||
b = (1, 2)
|
||||
|
||||
Three inputs:
|
||||
t = (9, 10, 11)
|
||||
|
||||
Output has two elements, one per neuron.
|
||||
Neuron 1 has weights (3,4,5) and bias 1;
|
||||
Neuron 2 has weights (6,7,8) and bias 2.
|
||||
|
||||
Neuron 1 is relu(t, (3,4,5), 1), which is (9, 10, 11).(3, 4, 5) + 1.
|
||||
Neuron 2 is relu(t, (6,7,8), 2), which is (9, 10, 11).(6, 7, 8) + 2.
|
||||
*/
|
||||
|
||||
let t = RankedDifferentiable::of_slice(&to_not_nan_1([9.0, 10.0, 11.0]));
|
||||
let mut output = layer(theta, t)
|
||||
.to_vector()
|
||||
.iter()
|
||||
.map(|t| (*t).clone().to_scalar().clone_real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(output.len(), 2);
|
||||
let result_2 = output.pop().unwrap();
|
||||
let result_1 = output.pop().unwrap();
|
||||
|
||||
assert_eq!(result_1, (9 * 3 + 10 * 4 + 11 * 5 + 1) as f64);
|
||||
assert_eq!(result_2, (9 * 6 + 10 * 7 + 11 * 8 + 2) as f64);
|
||||
}
|
||||
}
|
@@ -1,11 +1,19 @@
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(generic_const_exprs)]
|
||||
#![feature(array_methods)]
|
||||
#![feature(closure_lifetime_binder)]
|
||||
|
||||
pub mod auto_diff;
|
||||
pub mod const_teq;
|
||||
pub mod expr_syntax_tree;
|
||||
pub mod block;
|
||||
pub mod decider;
|
||||
pub mod ext;
|
||||
pub mod gradient_descent;
|
||||
pub mod hyper;
|
||||
pub mod layer;
|
||||
pub mod loss;
|
||||
pub mod not_nan;
|
||||
pub mod predictor;
|
||||
pub mod sample;
|
||||
pub mod scalar;
|
||||
pub mod tensor;
|
||||
pub mod smooth;
|
||||
pub mod traits;
|
||||
|
@@ -3,8 +3,10 @@ use std::{
|
||||
ops::{Add, Mul, Neg},
|
||||
};
|
||||
|
||||
use crate::auto_diff::{Differentiable, RankedDifferentiableTagged};
|
||||
use crate::ext::{sum, sum_1};
|
||||
use crate::{
|
||||
auto_diff::{Differentiable, RankedDifferentiable},
|
||||
auto_diff::{DifferentiableTagged, RankedDifferentiable},
|
||||
scalar::Scalar,
|
||||
traits::{One, Zero},
|
||||
};
|
||||
@@ -23,14 +25,47 @@ pub fn elementwise_mul<A, const RANK: usize>(
|
||||
where
|
||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||
{
|
||||
RankedDifferentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
||||
RankedDifferentiable::map2(x, y, &mut |x, y| x.clone() * y.clone())
|
||||
}
|
||||
|
||||
pub fn dot_unranked_tagged<A, Tag1, Tag2, Tag3, F>(
|
||||
x: &DifferentiableTagged<A, Tag1>,
|
||||
y: &DifferentiableTagged<A, Tag2>,
|
||||
mut combine_tags: F,
|
||||
) -> DifferentiableTagged<A, Tag3>
|
||||
where
|
||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||
F: FnMut(Tag1, Tag2) -> Tag3,
|
||||
Tag1: Clone,
|
||||
Tag2: Clone,
|
||||
{
|
||||
DifferentiableTagged::map2_tagged(x, y, &mut |x, tag1, y, tag2| {
|
||||
(x.clone() * y.clone(), combine_tags(tag1, tag2))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn dot_unranked<A>(x: &Differentiable<A>, y: &Differentiable<A>) -> Differentiable<A>
|
||||
where
|
||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||
{
|
||||
Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
||||
dot_unranked_tagged(x, y, |(), ()| ())
|
||||
}
|
||||
|
||||
pub fn dot<A, Tag1, Tag2>(
|
||||
x: &RankedDifferentiableTagged<A, Tag1, 1>,
|
||||
y: &RankedDifferentiableTagged<A, Tag2, 1>,
|
||||
) -> Scalar<A>
|
||||
where
|
||||
A: Mul<Output = A> + Sum + Clone + Add<Output = A> + Zero,
|
||||
{
|
||||
// Much sadness - find a way to get rid of these clones
|
||||
let x = x.map_tag(&mut |_| ());
|
||||
let y = y.map_tag(&mut |_| ());
|
||||
x.to_vector()
|
||||
.iter()
|
||||
.zip(y.to_vector().iter())
|
||||
.map(|(x, y)| x.clone().to_scalar() * y.clone().to_scalar())
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn squared_2<A, const RANK: usize>(
|
||||
@@ -39,17 +74,7 @@ fn squared_2<A, const RANK: usize>(
|
||||
where
|
||||
A: Mul<Output = A> + Copy + Default,
|
||||
{
|
||||
RankedDifferentiable::map2(x, x, &|x, y| x.clone() * y.clone())
|
||||
}
|
||||
|
||||
fn sum_2<A>(x: RankedDifferentiable<A, 1>) -> Scalar<A>
|
||||
where
|
||||
A: Sum<A> + Clone + Add<Output = A> + Zero,
|
||||
{
|
||||
RankedDifferentiable::to_vector(x)
|
||||
.into_iter()
|
||||
.map(|x| x.to_scalar())
|
||||
.sum()
|
||||
RankedDifferentiable::map2(x, x, &mut |x, y| x.clone() * y.clone())
|
||||
}
|
||||
|
||||
fn l2_norm_2<A>(
|
||||
@@ -59,8 +84,8 @@ fn l2_norm_2<A>(
|
||||
where
|
||||
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero + Neg,
|
||||
{
|
||||
let diff = RankedDifferentiable::map2(prediction, data, &|x, y| x.clone() - y.clone());
|
||||
sum_2(squared_2(&diff))
|
||||
let diff = RankedDifferentiable::map2(prediction, data, &mut |x, y| x.clone() - y.clone());
|
||||
sum_1(squared_2(&diff)).into_scalar()
|
||||
}
|
||||
|
||||
pub fn l2_loss_2<A, F, Params, const N: usize>(
|
||||
@@ -126,7 +151,7 @@ where
|
||||
let dotted = RankedDifferentiable::of_scalar(
|
||||
dot_unranked(
|
||||
left_arg.to_unranked_borrow(),
|
||||
&Differentiable::of_vec(theta.to_vec()),
|
||||
&DifferentiableTagged::of_vec(theta.to_vec()),
|
||||
)
|
||||
.into_vector()
|
||||
.into_iter()
|
||||
@@ -180,7 +205,7 @@ where
|
||||
);
|
||||
dot_unranked(
|
||||
x_powers.to_unranked_borrow(),
|
||||
&Differentiable::of_vec(theta.to_vec()),
|
||||
&DifferentiableTagged::of_vec(theta.to_vec()),
|
||||
)
|
||||
.attach_rank::<1>()
|
||||
.expect("wanted a tensor1")
|
||||
@@ -191,7 +216,10 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
|
||||
/// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
|
||||
///
|
||||
/// # Panics
|
||||
/// Panics if the input `theta` is not of rank 1 consisting of a tensor1 and a scalar.
|
||||
pub fn predict_plane<A>(
|
||||
xs: RankedDifferentiable<A, 2>,
|
||||
theta: &[Differentiable<A>; 2],
|
||||
@@ -199,9 +227,12 @@ pub fn predict_plane<A>(
|
||||
where
|
||||
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
|
||||
{
|
||||
if theta[0].rank() != 1 {
|
||||
panic!("theta0 must be of rank 1, got: {}", theta[0].rank())
|
||||
}
|
||||
assert_eq!(
|
||||
theta[0].rank(),
|
||||
1,
|
||||
"theta0 must be of rank 1, got: {}",
|
||||
theta[0].rank()
|
||||
);
|
||||
let theta0 = RankedDifferentiable::of_vector(
|
||||
theta[0]
|
||||
.borrow_vector()
|
||||
@@ -209,12 +240,41 @@ where
|
||||
.map(|v| RankedDifferentiable::of_scalar(v.borrow_scalar().clone()))
|
||||
.collect::<Vec<_>>(),
|
||||
);
|
||||
let theta1 = theta[1].borrow_scalar().clone();
|
||||
let theta1 = theta[1].clone().attach_rank::<0>().unwrap();
|
||||
let dotted: Vec<_> = xs
|
||||
.to_vector()
|
||||
.into_iter()
|
||||
.map(|point| sum_2(elementwise_mul(&theta0, &point)))
|
||||
.map(|x| RankedDifferentiable::of_scalar(x + theta1.clone()))
|
||||
.map(|point| {
|
||||
sum(elementwise_mul(&theta0, &point).to_unranked_borrow())
|
||||
.attach_rank::<0>()
|
||||
.unwrap()
|
||||
})
|
||||
.map(|x| x.map2(&theta1, &mut |x, theta| x.clone() + theta.clone()))
|
||||
.collect();
|
||||
RankedDifferentiable::of_vector(dotted)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_loss {
|
||||
use crate::auto_diff::RankedDifferentiable;
|
||||
use crate::loss::{l2_loss_2, predict_line_2};
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::Zero;
|
||||
|
||||
#[test]
|
||||
fn loss_example() {
|
||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||
let loss = l2_loss_2(
|
||||
predict_line_2,
|
||||
RankedDifferentiable::of_slice(&xs),
|
||||
RankedDifferentiable::of_slice(&ys),
|
||||
&[
|
||||
RankedDifferentiable::of_scalar(Scalar::zero()),
|
||||
RankedDifferentiable::of_scalar(Scalar::zero()),
|
||||
],
|
||||
);
|
||||
|
||||
assert_eq!(*loss.real_part(), 33.21);
|
||||
}
|
||||
}
|
||||
|
19
little_learner/src/not_nan.rs
Normal file
19
little_learner/src/not_nan.rs
Normal file
@@ -0,0 +1,19 @@
|
||||
use ordered_float::NotNan;
|
||||
|
||||
pub fn to_not_nan_1<T, const N: usize>(xs: [T; N]) -> [NotNan<T>; N]
|
||||
where
|
||||
T: ordered_float::Float,
|
||||
{
|
||||
xs.map(|x| NotNan::new(x).expect("not nan"))
|
||||
}
|
||||
|
||||
pub fn from_not_nan_1<T, const N: usize>(xs: [NotNan<T>; N]) -> [T; N] {
|
||||
xs.map(|x| x.into_inner())
|
||||
}
|
||||
|
||||
pub fn to_not_nan_2<T, const N: usize, const M: usize>(xs: [[T; N]; M]) -> [[NotNan<T>; N]; M]
|
||||
where
|
||||
T: ordered_float::Float,
|
||||
{
|
||||
xs.map(to_not_nan_1)
|
||||
}
|
197
little_learner/src/predictor.rs
Normal file
197
little_learner/src/predictor.rs
Normal file
@@ -0,0 +1,197 @@
|
||||
use crate::auto_diff::{Differentiable, DifferentiableTagged};
|
||||
use crate::scalar::Scalar;
|
||||
use crate::smooth::smooth;
|
||||
use crate::traits::NumLike;
|
||||
|
||||
/// A Predictor is a function (`predict`) we're optimising, an `inflate` which adds any metadata
|
||||
/// that the prediction engine might require, a corresponding `deflate` which removes the metadata,
|
||||
/// and an `update` which computes the next guess based on the previous guess.
|
||||
pub struct Predictor<F, Inflated, Deflated, Params> {
|
||||
/// The function we're trying to optimise.
|
||||
pub predict: F,
|
||||
/// Attach prediction metadata to an input to the function we're trying to optimise.
|
||||
pub inflate: fn(Deflated) -> Inflated,
|
||||
/// Remove prediction metadata.
|
||||
pub deflate: fn(Inflated) -> Deflated,
|
||||
/// Given a guess at an optimum, the gradient at that point, and any hyperparameters,
|
||||
/// compute the next guess at the optimum.
|
||||
pub update: fn(Inflated, &Deflated, Params) -> Inflated,
|
||||
}
|
||||
|
||||
/// Hyperparameters applying to the most basic way to calculate the next step.
|
||||
#[derive(Clone)]
|
||||
pub struct NakedHypers<A> {
|
||||
pub learning_rate: A,
|
||||
}
|
||||
|
||||
pub const fn naked<F, A>(f: F) -> Predictor<F, Differentiable<A>, Differentiable<A>, NakedHypers<A>>
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
Predictor {
|
||||
predict: f,
|
||||
inflate: |x| x,
|
||||
deflate: |x| x,
|
||||
|
||||
update: |theta, delta, hyper| {
|
||||
let learning_rate = Scalar::make(hyper.learning_rate);
|
||||
Differentiable::map2(&theta, delta, &mut |theta, delta| {
|
||||
(theta.clone() - delta.clone() * learning_rate.clone()).truncate_dual(None)
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct RmsHyper<A> {
|
||||
pub stabilizer: A,
|
||||
pub beta: A,
|
||||
pub learning_rate: A,
|
||||
}
|
||||
|
||||
impl<A> RmsHyper<A> {
|
||||
#[must_use]
|
||||
pub fn with_stabilizer(self, s: A) -> RmsHyper<A> {
|
||||
RmsHyper {
|
||||
learning_rate: self.learning_rate,
|
||||
beta: self.beta,
|
||||
stabilizer: s,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_beta(self, s: A) -> RmsHyper<A> {
|
||||
RmsHyper {
|
||||
learning_rate: self.learning_rate,
|
||||
beta: s,
|
||||
stabilizer: self.stabilizer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn rms<F, A>(
|
||||
f: F,
|
||||
) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, RmsHyper<A>>
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
Predictor {
|
||||
predict: f,
|
||||
inflate: |x| x.map_tag(&mut |()| A::zero()),
|
||||
deflate: |x| x.map_tag(&mut |_| ()),
|
||||
update: |theta, delta, hyper| {
|
||||
DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, smoothed_r, delta, ()| {
|
||||
let r = smooth(
|
||||
Scalar::make(hyper.beta.clone()),
|
||||
&Differentiable::of_scalar(Scalar::make(smoothed_r)),
|
||||
&Differentiable::of_scalar(delta.clone() * delta.clone()),
|
||||
)
|
||||
.into_scalar();
|
||||
let learning_rate = hyper.learning_rate.clone()
|
||||
/ (r.clone_real_part().sqrt() + hyper.stabilizer.clone());
|
||||
(
|
||||
Scalar::make(
|
||||
theta.clone_real_part() + -(delta.clone_real_part() * learning_rate),
|
||||
),
|
||||
r.clone_real_part(),
|
||||
)
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct VelocityHypers<A> {
|
||||
pub learning_rate: A,
|
||||
pub mu: A,
|
||||
}
|
||||
|
||||
pub const fn velocity<F, A>(
|
||||
f: F,
|
||||
) -> Predictor<F, DifferentiableTagged<A, A>, Differentiable<A>, VelocityHypers<A>>
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
Predictor {
|
||||
predict: f,
|
||||
inflate: |x| x.map_tag(&mut |()| A::zero()),
|
||||
deflate: |x| x.map_tag(&mut |_| ()),
|
||||
update: |theta, delta, hyper| {
|
||||
DifferentiableTagged::map2_tagged(&theta, delta, &mut |theta, velocity, delta, ()| {
|
||||
let velocity = hyper.mu.clone() * velocity
|
||||
+ -(delta.clone_real_part() * hyper.learning_rate.clone());
|
||||
(theta.clone() + Scalar::make(velocity.clone()), velocity)
|
||||
})
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AdamHyper<A> {
|
||||
pub rms: RmsHyper<A>,
|
||||
pub mu: A,
|
||||
}
|
||||
|
||||
impl<A> AdamHyper<A> {
|
||||
#[must_use]
|
||||
pub fn with_stabilizer(self, s: A) -> AdamHyper<A> {
|
||||
AdamHyper {
|
||||
mu: self.mu,
|
||||
rms: self.rms.with_stabilizer(s),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_beta(self, s: A) -> AdamHyper<A> {
|
||||
AdamHyper {
|
||||
mu: self.mu,
|
||||
rms: self.rms.with_beta(s),
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_mu(self, mu: A) -> AdamHyper<A> {
|
||||
AdamHyper { mu, rms: self.rms }
|
||||
}
|
||||
}
|
||||
|
||||
type AdamInflated<A> = DifferentiableTagged<A, (A, A)>;
|
||||
|
||||
pub const fn adam<F, A>(f: F) -> Predictor<F, AdamInflated<A>, Differentiable<A>, AdamHyper<A>>
|
||||
where
|
||||
A: NumLike,
|
||||
{
|
||||
Predictor {
|
||||
predict: f,
|
||||
inflate: |x| x.map_tag(&mut |()| (A::zero(), A::zero())),
|
||||
deflate: |x| x.map_tag(&mut |_| ()),
|
||||
update: |theta, delta, hyper| {
|
||||
DifferentiableTagged::map2_tagged(
|
||||
&theta,
|
||||
delta,
|
||||
&mut |theta, (smoothed_velocity, smoothed_r), delta, ()| {
|
||||
let r = smooth(
|
||||
Scalar::make(hyper.rms.beta.clone()),
|
||||
&Differentiable::of_scalar(Scalar::make(smoothed_r)),
|
||||
&Differentiable::of_scalar(delta.clone() * delta.clone()),
|
||||
)
|
||||
.into_scalar();
|
||||
let learning_rate = hyper.rms.learning_rate.clone()
|
||||
/ (r.clone_real_part().sqrt() + hyper.rms.stabilizer.clone());
|
||||
let velocity = smooth(
|
||||
Scalar::make(hyper.mu.clone()),
|
||||
&Differentiable::of_scalar(Scalar::make(smoothed_velocity)),
|
||||
&Differentiable::of_scalar(delta.clone()),
|
||||
)
|
||||
.into_scalar();
|
||||
(
|
||||
Scalar::make(
|
||||
theta.clone_real_part() + -(velocity.clone_real_part() * learning_rate),
|
||||
),
|
||||
(velocity.clone_real_part(), r.clone_real_part()),
|
||||
)
|
||||
},
|
||||
)
|
||||
},
|
||||
}
|
||||
}
|
21
little_learner/src/sample.rs
Normal file
21
little_learner/src/sample.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use rand::Rng;
|
||||
|
||||
/// Grab `n` random samples from `from_x` and `from_y`, collecting them into a vector.
|
||||
pub fn take_2<R: Rng, T, U, I, J>(rng: &mut R, n: usize, from_x: I, from_y: J) -> (Vec<T>, Vec<U>)
|
||||
where
|
||||
T: Copy,
|
||||
U: Copy,
|
||||
I: AsRef<[T]>,
|
||||
J: AsRef<[U]>,
|
||||
{
|
||||
let from_x = from_x.as_ref();
|
||||
let from_y = from_y.as_ref();
|
||||
let mut out_x = Vec::with_capacity(n);
|
||||
let mut out_y = Vec::with_capacity(n);
|
||||
for _ in 0..n {
|
||||
let sample = rng.gen_range(0..from_x.len());
|
||||
out_x.push(from_x[sample]);
|
||||
out_y.push(from_y[sample]);
|
||||
}
|
||||
(out_x, out_y)
|
||||
}
|
@@ -1,5 +1,6 @@
|
||||
use crate::traits::{Exp, One, Zero};
|
||||
use crate::traits::{Exp, One, Sqrt, Zero};
|
||||
use core::hash::Hash;
|
||||
use std::cmp::Ordering;
|
||||
use std::{
|
||||
collections::{hash_map::Entry, HashMap},
|
||||
fmt::Display,
|
||||
@@ -14,6 +15,8 @@ pub enum LinkData<A> {
|
||||
Mul(Box<Scalar<A>>, Box<Scalar<A>>),
|
||||
Exponent(Box<Scalar<A>>),
|
||||
Log(Box<Scalar<A>>),
|
||||
Div(Box<Scalar<A>>, Box<Scalar<A>>),
|
||||
Sqrt(Box<Scalar<A>>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Hash, PartialEq, Eq, Debug)]
|
||||
@@ -41,6 +44,10 @@ where
|
||||
f.write_fmt(format_args!("exp({})", arg.as_ref()))
|
||||
}
|
||||
Link::Link(LinkData::Log(arg)) => f.write_fmt(format_args!("log({})", arg.as_ref())),
|
||||
Link::Link(LinkData::Sqrt(arg)) => f.write_fmt(format_args!("sqrt({})", arg.as_ref())),
|
||||
Link::Link(LinkData::Div(left, right)) => {
|
||||
f.write_fmt(format_args!("({} / {})", left.as_ref(), right.as_ref()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -53,9 +60,11 @@ impl<A> Link<A> {
|
||||
+ AddAssign
|
||||
+ Clone
|
||||
+ Exp
|
||||
+ Add<Output = A>
|
||||
+ Mul<Output = A>
|
||||
+ Div<Output = A>
|
||||
+ Neg<Output = A>
|
||||
+ Sqrt
|
||||
+ Zero
|
||||
+ One,
|
||||
{
|
||||
@@ -96,6 +105,21 @@ impl<A> Link<A> {
|
||||
.clone_link()
|
||||
.invoke(&right, left.clone_real_part() * z, acc);
|
||||
}
|
||||
LinkData::Div(left, right) => {
|
||||
// d/dx(f / g) = f d(1/g)/dx + (df/dx) / g
|
||||
// = -f (dg/dx)/g^2 + (df/dx) / g
|
||||
left.as_ref().clone_link().invoke(
|
||||
&left,
|
||||
z.clone() / right.clone_real_part(),
|
||||
acc,
|
||||
);
|
||||
right.as_ref().clone_link().invoke(
|
||||
&right,
|
||||
-left.clone_real_part() * z
|
||||
/ (right.clone_real_part() * right.clone_real_part()),
|
||||
acc,
|
||||
);
|
||||
}
|
||||
LinkData::Log(arg) => {
|
||||
// d/dx(log y) = 1/y dy/dx
|
||||
arg.as_ref().clone_link().invoke(
|
||||
@@ -108,6 +132,15 @@ impl<A> Link<A> {
|
||||
// d/dx(-y) = - dy/dx
|
||||
arg.as_ref().clone_link().invoke(&arg, -z, acc);
|
||||
}
|
||||
LinkData::Sqrt(arg) => {
|
||||
// d/dx(y^(1/2)) = 1/2 y^(-1/2) dy/dx
|
||||
let two = A::one() + A::one();
|
||||
arg.as_ref().clone_link().invoke(
|
||||
&arg,
|
||||
A::one() / (two * arg.as_ref().clone_real_part().sqrt()) * z,
|
||||
acc,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -144,6 +177,15 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> AddAssign for Scalar<A>
|
||||
where
|
||||
A: Add<Output = A> + Clone,
|
||||
{
|
||||
fn add_assign(&mut self, rhs: Self) {
|
||||
*self = self.clone() + rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Neg for Scalar<A>
|
||||
where
|
||||
A: Neg<Output = A> + Clone,
|
||||
@@ -190,17 +232,72 @@ where
|
||||
fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
|
||||
let mut answer = Zero::zero();
|
||||
for i in iter {
|
||||
answer = answer + i;
|
||||
answer += i;
|
||||
}
|
||||
answer
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> PartialOrd for Scalar<A>
|
||||
where
|
||||
A: PartialOrd + Clone,
|
||||
{
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
self.real_part().partial_cmp(other.real_part())
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Exp for Scalar<A>
|
||||
where
|
||||
A: Exp + Clone,
|
||||
{
|
||||
fn exp(self) -> Self {
|
||||
Self::Dual(
|
||||
self.clone_real_part().exp(),
|
||||
Link::Link(LinkData::Exponent(Box::new(self))),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Div for Scalar<A>
|
||||
where
|
||||
A: Div<Output = A> + Clone,
|
||||
{
|
||||
type Output = Scalar<A>;
|
||||
|
||||
fn div(self, rhs: Self) -> Self::Output {
|
||||
Self::Dual(
|
||||
self.clone_real_part() / rhs.clone_real_part(),
|
||||
Link::Link(LinkData::Div(Box::new(self), Box::new(rhs))),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Sqrt for Scalar<A>
|
||||
where
|
||||
A: Sqrt + Clone,
|
||||
{
|
||||
fn sqrt(self) -> Self {
|
||||
Self::Dual(
|
||||
self.clone_real_part().sqrt(),
|
||||
Link::Link(LinkData::Sqrt(Box::new(self))),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Default for Scalar<A>
|
||||
where
|
||||
A: Default,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Scalar::Number(A::default(), None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> Scalar<A> {
|
||||
pub fn real_part(&self) -> &A {
|
||||
match self {
|
||||
Scalar::Number(a, _) => a,
|
||||
Scalar::Dual(a, _) => a,
|
||||
Scalar::Number(a, _) | Scalar::Dual(a, _) => a,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -209,8 +306,7 @@ impl<A> Scalar<A> {
|
||||
A: Clone,
|
||||
{
|
||||
match self {
|
||||
Scalar::Number(a, _) => (*a).clone(),
|
||||
Scalar::Dual(a, _) => (*a).clone(),
|
||||
Scalar::Number(a, _) | Scalar::Dual(a, _) => (*a).clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,6 +327,7 @@ impl<A> Scalar<A> {
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn truncate_dual(self, index: Option<usize>) -> Scalar<A>
|
||||
where
|
||||
A: Clone,
|
||||
@@ -238,6 +335,7 @@ impl<A> Scalar<A> {
|
||||
Scalar::Dual(self.clone_real_part(), Link::EndOfLink(index))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn make(x: A) -> Scalar<A> {
|
||||
Scalar::Number(x, None)
|
||||
}
|
||||
@@ -249,9 +347,60 @@ where
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{}_{}", n, index)),
|
||||
Scalar::Number(n, None) => f.write_fmt(format_args!("{}", n)),
|
||||
Scalar::Dual(n, link) => f.write_fmt(format_args!("<{}, link: {}>", n, link)),
|
||||
Scalar::Number(n, Some(index)) => f.write_fmt(format_args!("{n}_{index}")),
|
||||
Scalar::Number(n, None) => f.write_fmt(format_args!("{n}")),
|
||||
Scalar::Dual(n, link) => f.write_fmt(format_args!("<{n}, link: {link}>")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_loss {
|
||||
use crate::auto_diff::{grad, Differentiable, RankedDifferentiable};
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::Sqrt;
|
||||
use ordered_float::NotNan;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn div_gradient() {
|
||||
let left = Scalar::make(NotNan::new(3.0).expect("not nan"));
|
||||
let right = Scalar::make(NotNan::new(5.0).expect("not nan"));
|
||||
let divided = left / right;
|
||||
assert_eq!(divided.clone_real_part().into_inner(), 3.0 / 5.0);
|
||||
let mut acc = HashMap::new();
|
||||
divided
|
||||
.clone_link()
|
||||
.invoke(÷d, NotNan::new(1.0).expect("not nan"), &mut acc);
|
||||
|
||||
// Derivative of x/5 with respect to x is the constant 1/5
|
||||
// Derivative of 3/x with respect to x is -3/x^2, so at the value 5 is -3/25
|
||||
assert_eq!(acc.len(), 2);
|
||||
for (key, value) in acc {
|
||||
let key = key.real_part().into_inner();
|
||||
let value = value.into_inner();
|
||||
if key < 4.0 {
|
||||
// This is the numerator.
|
||||
assert_eq!(key, 3.0);
|
||||
assert_eq!(value, 1.0 / 5.0);
|
||||
} else {
|
||||
// This is the denominator.
|
||||
assert_eq!(key, 5.0);
|
||||
assert_eq!(value, -3.0 / 25.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqrt_gradient() {
|
||||
let nine = Differentiable::of_scalar(Scalar::make(NotNan::new(9.0).expect("not nan")));
|
||||
let graded: [Differentiable<NotNan<f64>>; 1] = grad(
|
||||
|x| RankedDifferentiable::of_scalar(x[0].clone().into_scalar().sqrt()),
|
||||
&[nine],
|
||||
);
|
||||
let graded = graded.map(|x| x.into_scalar().clone_real_part().into_inner())[0];
|
||||
|
||||
// Derivative of sqrt(x) with respect to x at 3 is 1/6
|
||||
assert_eq!(graded, 1.0 / 6.0);
|
||||
}
|
||||
}
|
||||
|
125
little_learner/src/smooth.rs
Normal file
125
little_learner/src/smooth.rs
Normal file
@@ -0,0 +1,125 @@
|
||||
use crate::auto_diff::{Differentiable, DifferentiableTagged};
|
||||
use crate::scalar::Scalar;
|
||||
use crate::traits::One;
|
||||
use std::ops::{Add, Mul, Neg};
|
||||
|
||||
/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor
|
||||
/// of `decay`.
|
||||
pub fn smooth_tagged<A, F, Tag1, Tag2, Tag3>(
|
||||
decay: Scalar<A>,
|
||||
old_value: &DifferentiableTagged<A, Tag1>,
|
||||
new_value: &DifferentiableTagged<A, Tag2>,
|
||||
mut tags: F,
|
||||
) -> DifferentiableTagged<A, Tag3>
|
||||
where
|
||||
A: One + Clone + Mul<Output = A> + Neg<Output = A> + Add<Output = A>,
|
||||
F: FnMut(Tag1, Tag2) -> Tag3,
|
||||
Tag1: Clone,
|
||||
Tag2: Clone,
|
||||
{
|
||||
DifferentiableTagged::map2_tagged(old_value, new_value, &mut |old, tag1, new, tag2| {
|
||||
(
|
||||
(old.clone() * decay.clone()) + (new.clone() * (Scalar::<A>::one() + -decay.clone())),
|
||||
tags(tag1, tag2),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Combine `old_value` and `new_value`, weighting the combination towards `new_value` by a factor
|
||||
/// of `decay`.
|
||||
pub fn smooth<A>(
|
||||
decay: Scalar<A>,
|
||||
old_value: &Differentiable<A>,
|
||||
new_value: &Differentiable<A>,
|
||||
) -> Differentiable<A>
|
||||
where
|
||||
A: One + Clone + Mul<Output = A> + Neg<Output = A> + Add<Output = A>,
|
||||
{
|
||||
smooth_tagged(decay, old_value, new_value, |(), ()| ())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_smooth {
|
||||
use crate::auto_diff::Differentiable;
|
||||
use crate::scalar::Scalar;
|
||||
use crate::smooth::smooth;
|
||||
use crate::traits::Zero;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
#[test]
|
||||
fn one_dimension() {
|
||||
let decay = Scalar::make(NotNan::new(0.9).expect("not nan"));
|
||||
let smoothed = smooth(
|
||||
decay.clone(),
|
||||
&Differentiable::of_scalar(Scalar::<NotNan<f64>>::zero()),
|
||||
&Differentiable::of_scalar(Scalar::make(NotNan::new(50.3).expect("not nan"))),
|
||||
);
|
||||
assert_eq!(
|
||||
smoothed.into_scalar().real_part().into_inner(),
|
||||
5.0299999999999985
|
||||
);
|
||||
|
||||
let numbers = vec![50.3, 22.7, 4.3, 2.7, 1.8, 2.2, 0.6];
|
||||
let mut output = Vec::with_capacity(numbers.len());
|
||||
let mut acc = Scalar::<NotNan<f64>>::zero();
|
||||
for number in numbers {
|
||||
let number =
|
||||
Differentiable::of_scalar(Scalar::make(NotNan::new(number).expect("not nan")));
|
||||
let next = smooth(decay.clone(), &Differentiable::of_scalar(acc), &number);
|
||||
output.push(next.clone().into_scalar().clone_real_part().into_inner());
|
||||
acc = next.into_scalar();
|
||||
}
|
||||
|
||||
// Note that the original sequence from the book has been heavily affected by rounding.
|
||||
// By zero-indexed element 4, the sequence is different in the first significant digit!
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![
|
||||
5.0299999999999985,
|
||||
6.796_999_999_999_998,
|
||||
6.547_299_999_999_998,
|
||||
6.162_569_999_999_998,
|
||||
5.7263129999999975,
|
||||
5.373_681_699_999_998,
|
||||
4.896_313_529_999_998
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
fn hydrate(v: &[f64]) -> Differentiable<NotNan<f64>> {
|
||||
Differentiable::of_vec(
|
||||
v.iter()
|
||||
.cloned()
|
||||
.map(|v| Differentiable::of_scalar(Scalar::make(NotNan::new(v).expect("not nan"))))
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn more_dimension() {
|
||||
let decay = Scalar::make(NotNan::new(0.9).expect("not nan"));
|
||||
|
||||
let inputs = [
|
||||
vec![1.0, 1.1, 3.0],
|
||||
vec![13.4, 18.2, 41.4],
|
||||
vec![1.1, 0.3, 67.3],
|
||||
]
|
||||
.map(|x| hydrate(&x));
|
||||
|
||||
let mut current = hydrate(&[0.8, 3.1, 2.2]);
|
||||
let mut output = Vec::with_capacity(inputs.len());
|
||||
for input in inputs {
|
||||
current = smooth(decay.clone(), ¤t, &input);
|
||||
output.push(current.clone().attach_rank::<1>().unwrap().collect());
|
||||
}
|
||||
|
||||
assert_eq!(
|
||||
output,
|
||||
vec![
|
||||
vec![0.820_000_000_000_000_1, 2.9, 2.280_000_000_000_000_2],
|
||||
vec![2.078, 4.43, 6.191_999_999_999_999],
|
||||
vec![1.9802, 4.016_999_999_999_999_5, 12.302_799_999_999_998]
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
@@ -1,107 +0,0 @@
|
||||
#[macro_export]
|
||||
macro_rules! tensor {
|
||||
($x:ty , $i: expr) => {[$x; $i]};
|
||||
($x:ty , $i: expr, $($is:expr),+) => {[tensor!($x, $($is),+); $i]};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_tensor_type() {
|
||||
let _: tensor!(f64, 1, 2, 3) = [[[1.0, 3.0, 6.0], [-1.3, -30.0, -0.0]]];
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Extensible1<A> {
|
||||
fn apply<F>(&self, other: &A, op: &F) -> Self
|
||||
where
|
||||
F: Fn(&A, &A) -> A;
|
||||
}
|
||||
|
||||
pub trait Extensible2<A> {
|
||||
fn apply<F>(&self, other: &Self, op: &F) -> Self
|
||||
where
|
||||
F: Fn(&A, &A) -> A;
|
||||
}
|
||||
|
||||
impl<A, T, const N: usize> Extensible1<A> for [T; N]
|
||||
where
|
||||
T: Extensible1<A> + Copy + Default,
|
||||
{
|
||||
fn apply<F>(&self, other: &A, op: &F) -> Self
|
||||
where
|
||||
F: Fn(&A, &A) -> A,
|
||||
{
|
||||
let mut result = [Default::default(); N];
|
||||
for (i, coord) in self.iter().enumerate() {
|
||||
result[i] = T::apply(coord, other, op);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
impl<A, T, const N: usize> Extensible2<A> for [T; N]
|
||||
where
|
||||
T: Extensible2<A> + Copy + Default,
|
||||
{
|
||||
fn apply<F>(&self, other: &Self, op: &F) -> Self
|
||||
where
|
||||
F: Fn(&A, &A) -> A,
|
||||
{
|
||||
let mut result = [Default::default(); N];
|
||||
for (i, coord) in self.iter().enumerate() {
|
||||
result[i] = T::apply(coord, &other[i], op);
|
||||
}
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! extensible1 {
|
||||
($x: ty) => {
|
||||
impl Extensible1<$x> for $x {
|
||||
fn apply<F>(&self, other: &$x, op: &F) -> Self
|
||||
where
|
||||
F: Fn(&Self, &Self) -> Self,
|
||||
{
|
||||
op(self, other)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! extensible2 {
|
||||
($x: ty) => {
|
||||
impl Extensible2<$x> for $x {
|
||||
fn apply<F>(&self, other: &Self, op: &F) -> Self
|
||||
where
|
||||
F: Fn(&Self, &Self) -> Self,
|
||||
{
|
||||
op(self, other)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
extensible1!(u8);
|
||||
extensible1!(f64);
|
||||
|
||||
extensible2!(u8);
|
||||
extensible2!(f64);
|
||||
|
||||
pub fn extension1<T, A, F>(t1: &T, t2: &A, op: F) -> T
|
||||
where
|
||||
T: Extensible1<A>,
|
||||
F: Fn(&A, &A) -> A,
|
||||
{
|
||||
t1.apply::<F>(t2, &op)
|
||||
}
|
||||
|
||||
pub fn extension2<T, A, F>(t1: &T, t2: &T, op: F) -> T
|
||||
where
|
||||
T: Extensible2<A>,
|
||||
F: Fn(&A, &A) -> A,
|
||||
{
|
||||
t1.apply::<F>(t2, &op)
|
||||
}
|
@@ -1,6 +1,10 @@
|
||||
use crate::scalar::Scalar;
|
||||
use ordered_float::NotNan;
|
||||
use std::iter::Sum;
|
||||
use std::ops::{Add, AddAssign, Div, Mul, Neg};
|
||||
|
||||
pub trait Exp {
|
||||
#[must_use]
|
||||
fn exp(self) -> Self;
|
||||
}
|
||||
|
||||
@@ -10,11 +14,24 @@ impl Exp for NotNan<f64> {
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Sqrt {
|
||||
#[must_use]
|
||||
fn sqrt(self) -> Self;
|
||||
}
|
||||
|
||||
impl Sqrt for NotNan<f64> {
|
||||
fn sqrt(self) -> Self {
|
||||
NotNan::new(f64::sqrt(self.into_inner())).expect("expected a non-NaN")
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Zero {
|
||||
#[must_use]
|
||||
fn zero() -> Self;
|
||||
}
|
||||
|
||||
pub trait One {
|
||||
#[must_use]
|
||||
fn one() -> Self;
|
||||
}
|
||||
|
||||
@@ -41,3 +58,25 @@ impl One for NotNan<f64> {
|
||||
NotNan::new(1.0).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait NumLike:
|
||||
One
|
||||
+ Zero
|
||||
+ Exp
|
||||
+ Add<Output = Self>
|
||||
+ AddAssign
|
||||
+ Neg<Output = Self>
|
||||
+ Mul<Output = Self>
|
||||
+ Div<Output = Self>
|
||||
+ Sum
|
||||
+ Sqrt
|
||||
+ Clone
|
||||
+ Sized
|
||||
+ PartialEq
|
||||
+ Eq
|
||||
{
|
||||
}
|
||||
|
||||
impl NumLike for NotNan<f64> {}
|
||||
|
||||
impl<A> NumLike for Scalar<A> where A: NumLike {}
|
||||
|
@@ -9,3 +9,5 @@ edition = "2021"
|
||||
immutable-chunkmap = "1.0.5"
|
||||
ordered-float = "3.6.0"
|
||||
little_learner = { path = "../little_learner" }
|
||||
rand = "0.8.5"
|
||||
csv = "1.2.2"
|
||||
|
151
little_learner_app/src/iris.csv
Executable file
151
little_learner_app/src/iris.csv
Executable file
@@ -0,0 +1,151 @@
|
||||
5.1,3.5,1.4,0.2,Iris-setosa
|
||||
4.9,3.0,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.3,0.2,Iris-setosa
|
||||
4.6,3.1,1.5,0.2,Iris-setosa
|
||||
5.0,3.6,1.4,0.2,Iris-setosa
|
||||
5.4,3.9,1.7,0.4,Iris-setosa
|
||||
4.6,3.4,1.4,0.3,Iris-setosa
|
||||
5.0,3.4,1.5,0.2,Iris-setosa
|
||||
4.4,2.9,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.4,3.7,1.5,0.2,Iris-setosa
|
||||
4.8,3.4,1.6,0.2,Iris-setosa
|
||||
4.8,3.0,1.4,0.1,Iris-setosa
|
||||
4.3,3.0,1.1,0.1,Iris-setosa
|
||||
5.8,4.0,1.2,0.2,Iris-setosa
|
||||
5.7,4.4,1.5,0.4,Iris-setosa
|
||||
5.4,3.9,1.3,0.4,Iris-setosa
|
||||
5.1,3.5,1.4,0.3,Iris-setosa
|
||||
5.7,3.8,1.7,0.3,Iris-setosa
|
||||
5.1,3.8,1.5,0.3,Iris-setosa
|
||||
5.4,3.4,1.7,0.2,Iris-setosa
|
||||
5.1,3.7,1.5,0.4,Iris-setosa
|
||||
4.6,3.6,1.0,0.2,Iris-setosa
|
||||
5.1,3.3,1.7,0.5,Iris-setosa
|
||||
4.8,3.4,1.9,0.2,Iris-setosa
|
||||
5.0,3.0,1.6,0.2,Iris-setosa
|
||||
5.0,3.4,1.6,0.4,Iris-setosa
|
||||
5.2,3.5,1.5,0.2,Iris-setosa
|
||||
5.2,3.4,1.4,0.2,Iris-setosa
|
||||
4.7,3.2,1.6,0.2,Iris-setosa
|
||||
4.8,3.1,1.6,0.2,Iris-setosa
|
||||
5.4,3.4,1.5,0.4,Iris-setosa
|
||||
5.2,4.1,1.5,0.1,Iris-setosa
|
||||
5.5,4.2,1.4,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
5.0,3.2,1.2,0.2,Iris-setosa
|
||||
5.5,3.5,1.3,0.2,Iris-setosa
|
||||
4.9,3.1,1.5,0.1,Iris-setosa
|
||||
4.4,3.0,1.3,0.2,Iris-setosa
|
||||
5.1,3.4,1.5,0.2,Iris-setosa
|
||||
5.0,3.5,1.3,0.3,Iris-setosa
|
||||
4.5,2.3,1.3,0.3,Iris-setosa
|
||||
4.4,3.2,1.3,0.2,Iris-setosa
|
||||
5.0,3.5,1.6,0.6,Iris-setosa
|
||||
5.1,3.8,1.9,0.4,Iris-setosa
|
||||
4.8,3.0,1.4,0.3,Iris-setosa
|
||||
5.1,3.8,1.6,0.2,Iris-setosa
|
||||
4.6,3.2,1.4,0.2,Iris-setosa
|
||||
5.3,3.7,1.5,0.2,Iris-setosa
|
||||
5.0,3.3,1.4,0.2,Iris-setosa
|
||||
7.0,3.2,4.7,1.4,Iris-versicolor
|
||||
6.4,3.2,4.5,1.5,Iris-versicolor
|
||||
6.9,3.1,4.9,1.5,Iris-versicolor
|
||||
5.5,2.3,4.0,1.3,Iris-versicolor
|
||||
6.5,2.8,4.6,1.5,Iris-versicolor
|
||||
5.7,2.8,4.5,1.3,Iris-versicolor
|
||||
6.3,3.3,4.7,1.6,Iris-versicolor
|
||||
4.9,2.4,3.3,1.0,Iris-versicolor
|
||||
6.6,2.9,4.6,1.3,Iris-versicolor
|
||||
5.2,2.7,3.9,1.4,Iris-versicolor
|
||||
5.0,2.0,3.5,1.0,Iris-versicolor
|
||||
5.9,3.0,4.2,1.5,Iris-versicolor
|
||||
6.0,2.2,4.0,1.0,Iris-versicolor
|
||||
6.1,2.9,4.7,1.4,Iris-versicolor
|
||||
5.6,2.9,3.6,1.3,Iris-versicolor
|
||||
6.7,3.1,4.4,1.4,Iris-versicolor
|
||||
5.6,3.0,4.5,1.5,Iris-versicolor
|
||||
5.8,2.7,4.1,1.0,Iris-versicolor
|
||||
6.2,2.2,4.5,1.5,Iris-versicolor
|
||||
5.6,2.5,3.9,1.1,Iris-versicolor
|
||||
5.9,3.2,4.8,1.8,Iris-versicolor
|
||||
6.1,2.8,4.0,1.3,Iris-versicolor
|
||||
6.3,2.5,4.9,1.5,Iris-versicolor
|
||||
6.1,2.8,4.7,1.2,Iris-versicolor
|
||||
6.4,2.9,4.3,1.3,Iris-versicolor
|
||||
6.6,3.0,4.4,1.4,Iris-versicolor
|
||||
6.8,2.8,4.8,1.4,Iris-versicolor
|
||||
6.7,3.0,5.0,1.7,Iris-versicolor
|
||||
6.0,2.9,4.5,1.5,Iris-versicolor
|
||||
5.7,2.6,3.5,1.0,Iris-versicolor
|
||||
5.5,2.4,3.8,1.1,Iris-versicolor
|
||||
5.5,2.4,3.7,1.0,Iris-versicolor
|
||||
5.8,2.7,3.9,1.2,Iris-versicolor
|
||||
6.0,2.7,5.1,1.6,Iris-versicolor
|
||||
5.4,3.0,4.5,1.5,Iris-versicolor
|
||||
6.0,3.4,4.5,1.6,Iris-versicolor
|
||||
6.7,3.1,4.7,1.5,Iris-versicolor
|
||||
6.3,2.3,4.4,1.3,Iris-versicolor
|
||||
5.6,3.0,4.1,1.3,Iris-versicolor
|
||||
5.5,2.5,4.0,1.3,Iris-versicolor
|
||||
5.5,2.6,4.4,1.2,Iris-versicolor
|
||||
6.1,3.0,4.6,1.4,Iris-versicolor
|
||||
5.8,2.6,4.0,1.2,Iris-versicolor
|
||||
5.0,2.3,3.3,1.0,Iris-versicolor
|
||||
5.6,2.7,4.2,1.3,Iris-versicolor
|
||||
5.7,3.0,4.2,1.2,Iris-versicolor
|
||||
5.7,2.9,4.2,1.3,Iris-versicolor
|
||||
6.2,2.9,4.3,1.3,Iris-versicolor
|
||||
5.1,2.5,3.0,1.1,Iris-versicolor
|
||||
5.7,2.8,4.1,1.3,Iris-versicolor
|
||||
6.3,3.3,6.0,2.5,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
7.1,3.0,5.9,2.1,Iris-virginica
|
||||
6.3,2.9,5.6,1.8,Iris-virginica
|
||||
6.5,3.0,5.8,2.2,Iris-virginica
|
||||
7.6,3.0,6.6,2.1,Iris-virginica
|
||||
4.9,2.5,4.5,1.7,Iris-virginica
|
||||
7.3,2.9,6.3,1.8,Iris-virginica
|
||||
6.7,2.5,5.8,1.8,Iris-virginica
|
||||
7.2,3.6,6.1,2.5,Iris-virginica
|
||||
6.5,3.2,5.1,2.0,Iris-virginica
|
||||
6.4,2.7,5.3,1.9,Iris-virginica
|
||||
6.8,3.0,5.5,2.1,Iris-virginica
|
||||
5.7,2.5,5.0,2.0,Iris-virginica
|
||||
5.8,2.8,5.1,2.4,Iris-virginica
|
||||
6.4,3.2,5.3,2.3,Iris-virginica
|
||||
6.5,3.0,5.5,1.8,Iris-virginica
|
||||
7.7,3.8,6.7,2.2,Iris-virginica
|
||||
7.7,2.6,6.9,2.3,Iris-virginica
|
||||
6.0,2.2,5.0,1.5,Iris-virginica
|
||||
6.9,3.2,5.7,2.3,Iris-virginica
|
||||
5.6,2.8,4.9,2.0,Iris-virginica
|
||||
7.7,2.8,6.7,2.0,Iris-virginica
|
||||
6.3,2.7,4.9,1.8,Iris-virginica
|
||||
6.7,3.3,5.7,2.1,Iris-virginica
|
||||
7.2,3.2,6.0,1.8,Iris-virginica
|
||||
6.2,2.8,4.8,1.8,Iris-virginica
|
||||
6.1,3.0,4.9,1.8,Iris-virginica
|
||||
6.4,2.8,5.6,2.1,Iris-virginica
|
||||
7.2,3.0,5.8,1.6,Iris-virginica
|
||||
7.4,2.8,6.1,1.9,Iris-virginica
|
||||
7.9,3.8,6.4,2.0,Iris-virginica
|
||||
6.4,2.8,5.6,2.2,Iris-virginica
|
||||
6.3,2.8,5.1,1.5,Iris-virginica
|
||||
6.1,2.6,5.6,1.4,Iris-virginica
|
||||
7.7,3.0,6.1,2.3,Iris-virginica
|
||||
6.3,3.4,5.6,2.4,Iris-virginica
|
||||
6.4,3.1,5.5,1.8,Iris-virginica
|
||||
6.0,3.0,4.8,1.8,Iris-virginica
|
||||
6.9,3.1,5.4,2.1,Iris-virginica
|
||||
6.7,3.1,5.6,2.4,Iris-virginica
|
||||
6.9,3.1,5.1,2.3,Iris-virginica
|
||||
5.8,2.7,5.1,1.9,Iris-virginica
|
||||
6.8,3.2,5.9,2.3,Iris-virginica
|
||||
6.7,3.3,5.7,2.5,Iris-virginica
|
||||
6.7,3.0,5.2,2.3,Iris-virginica
|
||||
6.3,2.5,5.0,1.9,Iris-virginica
|
||||
6.5,3.0,5.2,2.0,Iris-virginica
|
||||
6.2,3.4,5.4,2.3,Iris-virginica
|
||||
5.9,3.0,5.1,1.8,Iris-virginica
|
||||
|
|
110
little_learner_app/src/iris.rs
Normal file
110
little_learner_app/src/iris.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use csv::ReaderBuilder;
|
||||
use little_learner::auto_diff::RankedDifferentiable;
|
||||
use little_learner::scalar::Scalar;
|
||||
use little_learner::traits::{One, Zero};
|
||||
use std::fmt::Debug;
|
||||
use std::io::Cursor;
|
||||
use std::str::FromStr;
|
||||
|
||||
const IRIS_DATA: &str = include_str!("iris.csv");
|
||||
|
||||
#[derive(Eq, PartialEq, Debug, Clone, Copy)]
|
||||
pub enum IrisType {
|
||||
Setosa = 0,
|
||||
Versicolor = 1,
|
||||
Virginica = 2,
|
||||
}
|
||||
|
||||
impl FromStr for IrisType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"Iris-virginica" => Ok(IrisType::Virginica),
|
||||
"Iris-versicolor" => Ok(IrisType::Versicolor),
|
||||
"Iris-setosa" => Ok(IrisType::Setosa),
|
||||
_ => Err(String::from(s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Debug)]
|
||||
pub struct Iris<A> {
|
||||
pub class: IrisType,
|
||||
pub petal_length: A,
|
||||
pub petal_width: A,
|
||||
pub sepal_length: A,
|
||||
pub sepal_width: A,
|
||||
}
|
||||
|
||||
pub fn import<A, B>() -> Vec<Iris<A>>
|
||||
where
|
||||
A: FromStr<Err = B>,
|
||||
B: Debug,
|
||||
{
|
||||
let mut reader = ReaderBuilder::new()
|
||||
.has_headers(false)
|
||||
.from_reader(Cursor::new(IRIS_DATA));
|
||||
let mut output = Vec::new();
|
||||
for record in reader.records() {
|
||||
let record = record.unwrap();
|
||||
let petal_length = A::from_str(&record[0]).unwrap();
|
||||
let petal_width = A::from_str(&record[1]).unwrap();
|
||||
let sepal_length = A::from_str(&record[2]).unwrap();
|
||||
let sepal_width = A::from_str(&record[3]).unwrap();
|
||||
let class = IrisType::from_str(&record[4]).unwrap();
|
||||
output.push(Iris {
|
||||
class,
|
||||
petal_length,
|
||||
petal_width,
|
||||
sepal_length,
|
||||
sepal_width,
|
||||
});
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
impl<A> Iris<A> {
|
||||
pub fn one_hot(&self) -> (RankedDifferentiable<A, 1>, RankedDifferentiable<A, 1>)
|
||||
where
|
||||
A: Clone + Zero + One,
|
||||
{
|
||||
let vec = vec![
|
||||
RankedDifferentiable::of_scalar(Scalar::make(self.petal_length.clone())),
|
||||
RankedDifferentiable::of_scalar(Scalar::make(self.petal_width.clone())),
|
||||
RankedDifferentiable::of_scalar(Scalar::make(self.sepal_length.clone())),
|
||||
RankedDifferentiable::of_scalar(Scalar::make(self.sepal_width.clone())),
|
||||
];
|
||||
|
||||
let mut one_hot = vec![A::zero(); 3];
|
||||
one_hot[self.class as usize] = A::one();
|
||||
let one_hot = one_hot
|
||||
.iter()
|
||||
.map(|x| RankedDifferentiable::of_scalar(Scalar::make(x.clone())))
|
||||
.collect();
|
||||
(
|
||||
RankedDifferentiable::of_vector(vec),
|
||||
RankedDifferentiable::of_vector(one_hot),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::iris::{import, Iris, IrisType};
|
||||
|
||||
const EXPECTED_FIRST: Iris<f32> = Iris {
|
||||
class: IrisType::Setosa,
|
||||
petal_length: 5.1,
|
||||
petal_width: 3.5,
|
||||
sepal_length: 1.4,
|
||||
sepal_width: 0.2,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn first_element() {
|
||||
let irises = import();
|
||||
assert_eq!(irises[0], EXPECTED_FIRST);
|
||||
}
|
||||
}
|
@@ -1,389 +1,27 @@
|
||||
#![allow(incomplete_features)]
|
||||
#![feature(generic_const_exprs)]
|
||||
|
||||
mod with_tensor;
|
||||
|
||||
use core::hash::Hash;
|
||||
use std::ops::{Add, AddAssign, Div, Mul, Neg};
|
||||
|
||||
use little_learner::auto_diff::{grad, Differentiable, RankedDifferentiable};
|
||||
|
||||
use little_learner::loss::{l2_loss_2, predict_plane};
|
||||
use little_learner::scalar::Scalar;
|
||||
use little_learner::traits::{Exp, One, Zero};
|
||||
use crate::rms_example::rms_example;
|
||||
use little_learner::auto_diff::RankedDifferentiable;
|
||||
use little_learner::block;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
fn iterate<A, F>(f: &F, start: A, n: u32) -> A
|
||||
where
|
||||
F: Fn(A) -> A,
|
||||
{
|
||||
let mut v = start;
|
||||
for _ in 0..n {
|
||||
v = f(v);
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
struct GradientDescentHyper<A> {
|
||||
learning_rate: A,
|
||||
iterations: u32,
|
||||
}
|
||||
|
||||
fn gradient_descent_step<A, F, const RANK: usize, const PARAM_NUM: usize>(
|
||||
f: &F,
|
||||
theta: [Differentiable<A>; PARAM_NUM],
|
||||
params: &GradientDescentHyper<A>,
|
||||
) -> [Differentiable<A>; PARAM_NUM]
|
||||
where
|
||||
A: Clone
|
||||
+ Mul<Output = A>
|
||||
+ Neg<Output = A>
|
||||
+ Add<Output = A>
|
||||
+ Hash
|
||||
+ AddAssign
|
||||
+ Div<Output = A>
|
||||
+ Zero
|
||||
+ One
|
||||
+ Eq
|
||||
+ Exp,
|
||||
F: Fn(&[Differentiable<A>; PARAM_NUM]) -> RankedDifferentiable<A, RANK>,
|
||||
{
|
||||
let delta = grad(f, &theta);
|
||||
let mut i = 0;
|
||||
theta.map(|theta| {
|
||||
let delta = &delta[i];
|
||||
i += 1;
|
||||
// For speed, you might want to truncate_dual this.
|
||||
let learning_rate = Scalar::make((params.learning_rate).clone());
|
||||
Differentiable::map2(
|
||||
&theta,
|
||||
&delta.map(&mut |s| s * learning_rate.clone()),
|
||||
&|theta, delta| (*theta).clone() - (*delta).clone(),
|
||||
)
|
||||
})
|
||||
}
|
||||
mod iris;
|
||||
mod rms_example;
|
||||
|
||||
fn main() {
|
||||
let plane_xs = [
|
||||
[1.0, 2.05],
|
||||
[1.0, 3.0],
|
||||
[2.0, 2.0],
|
||||
[2.0, 3.91],
|
||||
[3.0, 6.13],
|
||||
[4.0, 8.09],
|
||||
];
|
||||
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
|
||||
rms_example();
|
||||
|
||||
let hyper = GradientDescentHyper {
|
||||
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||
iterations: 1000,
|
||||
};
|
||||
let irises = iris::import::<f64, _>();
|
||||
let mut xs = Vec::with_capacity(irises.len());
|
||||
let mut ys = Vec::with_capacity(irises.len());
|
||||
for iris in irises {
|
||||
let (x, y) = iris.one_hot();
|
||||
xs.push(x);
|
||||
ys.push(y);
|
||||
}
|
||||
let _xs = RankedDifferentiable::of_vector(xs);
|
||||
let _ys = RankedDifferentiable::of_vector(ys);
|
||||
|
||||
let iterated = {
|
||||
let xs = plane_xs.map(|x| {
|
||||
[
|
||||
NotNan::new(x[0]).expect("not nan"),
|
||||
NotNan::new(x[1]).expect("not nan"),
|
||||
]
|
||||
});
|
||||
let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||
iterate(
|
||||
&|theta| {
|
||||
gradient_descent_step(
|
||||
&|x| {
|
||||
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||
l2_loss_2(
|
||||
predict_plane,
|
||||
RankedDifferentiable::of_slice_2::<_, 2>(&xs),
|
||||
RankedDifferentiable::of_slice(ys),
|
||||
x,
|
||||
),
|
||||
)])
|
||||
},
|
||||
theta,
|
||||
&hyper,
|
||||
)
|
||||
},
|
||||
[
|
||||
RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
],
|
||||
hyper.iterations,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
assert_eq!(
|
||||
theta0
|
||||
.to_vector()
|
||||
.into_iter()
|
||||
.map(|x| x.to_scalar().real_part().into_inner())
|
||||
.collect::<Vec<_>>(),
|
||||
[3.97757644609063, 2.0496557321494446]
|
||||
);
|
||||
assert_eq!(
|
||||
theta1.to_scalar().real_part().into_inner(),
|
||||
5.786758464448078
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use little_learner::{
|
||||
auto_diff::grad,
|
||||
loss::{l2_loss_2, predict_line_2, predict_line_2_unranked, predict_quadratic_unranked},
|
||||
};
|
||||
|
||||
use crate::with_tensor::{l2_loss, predict_line};
|
||||
|
||||
#[test]
|
||||
fn loss_example() {
|
||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||
let loss = l2_loss_2(
|
||||
predict_line_2,
|
||||
RankedDifferentiable::of_slice(&xs),
|
||||
RankedDifferentiable::of_slice(&ys),
|
||||
&[
|
||||
RankedDifferentiable::of_scalar(Scalar::zero()),
|
||||
RankedDifferentiable::of_scalar(Scalar::zero()),
|
||||
],
|
||||
);
|
||||
|
||||
assert_eq!(*loss.real_part(), 33.21);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn l2_loss_non_autodiff_example() {
|
||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||
let loss = l2_loss(predict_line, &xs, &ys, &[0.0099, 0.0]);
|
||||
assert_eq!(loss, 32.5892403);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn grad_example() {
|
||||
let input_vec = [Differentiable::of_scalar(Scalar::make(
|
||||
NotNan::new(27.0).expect("not nan"),
|
||||
))];
|
||||
|
||||
let grad: Vec<_> = grad(
|
||||
|x| {
|
||||
RankedDifferentiable::of_scalar(
|
||||
x[0].borrow_scalar().clone() * x[0].borrow_scalar().clone(),
|
||||
)
|
||||
},
|
||||
&input_vec,
|
||||
)
|
||||
.into_iter()
|
||||
.map(|x| x.into_scalar().real_part().into_inner())
|
||||
.collect();
|
||||
assert_eq!(grad, [54.0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn loss_gradient() {
|
||||
let zero = Scalar::<NotNan<f64>>::zero();
|
||||
let input_vec = [
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||
];
|
||||
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
||||
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
||||
let grad = grad(
|
||||
|x| {
|
||||
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2(
|
||||
predict_line_2_unranked,
|
||||
RankedDifferentiable::of_slice(&xs),
|
||||
RankedDifferentiable::of_slice(&ys),
|
||||
x,
|
||||
))])
|
||||
},
|
||||
&input_vec,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
grad.into_iter()
|
||||
.map(|x| *(x.into_scalar().real_part()))
|
||||
.collect::<Vec<_>>(),
|
||||
[-63.0, -21.0]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iterate() {
|
||||
let f = |t: [i32; 3]| t.map(|i| i - 3);
|
||||
assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_optimisation_test() {
|
||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||
|
||||
let zero = Scalar::<NotNan<f64>>::zero();
|
||||
|
||||
let hyper = GradientDescentHyper {
|
||||
learning_rate: NotNan::new(0.01).expect("not nan"),
|
||||
iterations: 1000,
|
||||
};
|
||||
let iterated = {
|
||||
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||
iterate(
|
||||
&|theta| {
|
||||
gradient_descent_step(
|
||||
&|x| {
|
||||
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||
l2_loss_2(
|
||||
predict_line_2_unranked,
|
||||
RankedDifferentiable::of_slice(&xs),
|
||||
RankedDifferentiable::of_slice(&ys),
|
||||
x,
|
||||
),
|
||||
)])
|
||||
},
|
||||
theta,
|
||||
&hyper,
|
||||
)
|
||||
},
|
||||
[
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||
],
|
||||
hyper.iterations,
|
||||
)
|
||||
};
|
||||
let iterated = iterated
|
||||
.into_iter()
|
||||
.map(|x| x.into_scalar().real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimise_quadratic() {
|
||||
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
||||
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
||||
|
||||
let zero = Scalar::<NotNan<f64>>::zero();
|
||||
|
||||
let hyper = GradientDescentHyper {
|
||||
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||
iterations: 1000,
|
||||
};
|
||||
|
||||
let iterated = {
|
||||
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
||||
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||
iterate(
|
||||
&|theta| {
|
||||
gradient_descent_step(
|
||||
&|x| {
|
||||
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||
l2_loss_2(
|
||||
predict_quadratic_unranked,
|
||||
RankedDifferentiable::of_slice(&xs),
|
||||
RankedDifferentiable::of_slice(&ys),
|
||||
x,
|
||||
),
|
||||
)])
|
||||
},
|
||||
theta,
|
||||
&hyper,
|
||||
)
|
||||
},
|
||||
[
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||
],
|
||||
hyper.iterations,
|
||||
)
|
||||
};
|
||||
let iterated = iterated
|
||||
.into_iter()
|
||||
.map(|x| x.into_scalar().real_part().into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(
|
||||
iterated,
|
||||
[2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn optimise_plane() {
|
||||
let plane_xs = [
|
||||
[1.0, 2.05],
|
||||
[1.0, 3.0],
|
||||
[2.0, 2.0],
|
||||
[2.0, 3.91],
|
||||
[3.0, 6.13],
|
||||
[4.0, 8.09],
|
||||
];
|
||||
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
|
||||
|
||||
let hyper = GradientDescentHyper {
|
||||
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||
iterations: 1000,
|
||||
};
|
||||
|
||||
let iterated = {
|
||||
let xs = plane_xs.map(|x| {
|
||||
[
|
||||
NotNan::new(x[0]).expect("not nan"),
|
||||
NotNan::new(x[1]).expect("not nan"),
|
||||
]
|
||||
});
|
||||
let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||
iterate(
|
||||
&|theta| {
|
||||
gradient_descent_step(
|
||||
&|x| {
|
||||
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||
l2_loss_2(
|
||||
predict_plane,
|
||||
RankedDifferentiable::of_slice_2::<_, 2>(&xs),
|
||||
RankedDifferentiable::of_slice(ys),
|
||||
x,
|
||||
),
|
||||
)])
|
||||
},
|
||||
theta,
|
||||
&hyper,
|
||||
)
|
||||
},
|
||||
[
|
||||
RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
],
|
||||
hyper.iterations,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
assert_eq!(
|
||||
theta0
|
||||
.to_vector()
|
||||
.into_iter()
|
||||
.map(|x| x.to_scalar().real_part().into_inner())
|
||||
.collect::<Vec<_>>(),
|
||||
[3.97757644609063, 2.0496557321494446]
|
||||
);
|
||||
assert_eq!(
|
||||
theta1.to_scalar().real_part().into_inner(),
|
||||
5.786758464448078
|
||||
);
|
||||
}
|
||||
let _network = block::compose(block::dense::<NotNan<f64>, ()>(6, 3), block::dense(4, 6), 2);
|
||||
}
|
||||
|
65
little_learner_app/src/rms_example.rs
Normal file
65
little_learner_app/src/rms_example.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
use little_learner::auto_diff::{Differentiable, RankedDifferentiable, RankedDifferentiableTagged};
|
||||
|
||||
use little_learner::gradient_descent::gradient_descent;
|
||||
use little_learner::hyper;
|
||||
use little_learner::loss::predict_plane;
|
||||
use little_learner::not_nan::{to_not_nan_1, to_not_nan_2};
|
||||
use little_learner::predictor;
|
||||
use little_learner::scalar::Scalar;
|
||||
use little_learner::traits::Zero;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
const PLANE_XS: [[f64; 2]; 6] = [
|
||||
[1.0, 2.05],
|
||||
[1.0, 3.0],
|
||||
[2.0, 2.0],
|
||||
[2.0, 3.91],
|
||||
[3.0, 6.13],
|
||||
[4.0, 8.09],
|
||||
];
|
||||
const PLANE_YS: [f64; 6] = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
|
||||
|
||||
pub(crate) fn rms_example() {
|
||||
let beta = NotNan::new(0.9).expect("not nan");
|
||||
let stabilizer = NotNan::new(0.000_000_01).expect("not nan");
|
||||
let hyper = hyper::RmsGradientDescent::default(NotNan::new(0.01).expect("not nan"), 3000)
|
||||
.with_stabilizer(stabilizer)
|
||||
.with_beta(beta);
|
||||
|
||||
let iterated = {
|
||||
let xs = to_not_nan_2(PLANE_XS);
|
||||
let ys = to_not_nan_1(PLANE_YS);
|
||||
let zero_params = [
|
||||
RankedDifferentiable::of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()])
|
||||
.to_unranked(),
|
||||
Differentiable::of_scalar(Scalar::zero()),
|
||||
];
|
||||
|
||||
gradient_descent(
|
||||
hyper,
|
||||
&xs,
|
||||
RankedDifferentiableTagged::of_slice_2::<_, 2>,
|
||||
&ys,
|
||||
zero_params,
|
||||
predictor::rms(predict_plane),
|
||||
hyper::RmsGradientDescent::to_immutable,
|
||||
)
|
||||
};
|
||||
|
||||
let [theta0, theta1] = iterated;
|
||||
|
||||
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||
|
||||
let fitted_theta0 = theta0
|
||||
.collect()
|
||||
.iter()
|
||||
.map(|x| x.into_inner())
|
||||
.collect::<Vec<_>>();
|
||||
let fitted_theta1 = theta1.to_scalar().real_part().into_inner();
|
||||
assert_eq!(
|
||||
fitted_theta0,
|
||||
[3.974_645_444_172_085, 1.971_454_922_077_495]
|
||||
);
|
||||
assert_eq!(fitted_theta1, 6.164_579_048_274_036);
|
||||
}
|
@@ -1,128 +0,0 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::iter::Sum;
|
||||
use std::ops::{Mul, Sub};
|
||||
|
||||
use little_learner::tensor;
|
||||
use little_learner::tensor::{extension2, Extensible2};
|
||||
use little_learner::traits::One;
|
||||
|
||||
type Point<A, const N: usize> = [A; N];
|
||||
|
||||
type Parameters<A, const N: usize, const M: usize> = [Point<A, N>; M];
|
||||
|
||||
fn dot_points<A: Mul, const N: usize>(x: &Point<A, N>, y: &Point<A, N>) -> A
|
||||
where
|
||||
A: Sum<<A as Mul>::Output> + Copy + Default + Mul<Output = A> + Extensible2<A>,
|
||||
{
|
||||
extension2(x, y, |&x, &y| x * y).into_iter().sum()
|
||||
}
|
||||
|
||||
fn dot<A, const N: usize, const M: usize>(x: &Point<A, N>, y: &Parameters<A, N, M>) -> Point<A, M>
|
||||
where
|
||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + Extensible2<A>,
|
||||
{
|
||||
let mut result = [Default::default(); M];
|
||||
for (i, coord) in y.iter().map(|y| dot_points(x, y)).enumerate() {
|
||||
result[i] = coord;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn sum<A, const N: usize>(x: &tensor!(A, N)) -> A
|
||||
where
|
||||
A: Sum<A> + Copy,
|
||||
{
|
||||
A::sum(x.iter().cloned())
|
||||
}
|
||||
|
||||
fn squared<A, const N: usize>(x: &tensor!(A, N)) -> tensor!(A, N)
|
||||
where
|
||||
A: Mul<Output = A> + Extensible2<A> + Copy + Default,
|
||||
{
|
||||
extension2(x, x, |&a, &b| (a * b))
|
||||
}
|
||||
|
||||
fn l2_norm<A, const N: usize>(prediction: &tensor!(A, N), data: &tensor!(A, N)) -> A
|
||||
where
|
||||
A: Sum<A> + Mul<Output = A> + Extensible2<A> + Copy + Default + Sub<Output = A>,
|
||||
{
|
||||
let diff = extension2(prediction, data, |&x, &y| x - y);
|
||||
sum(&squared(&diff))
|
||||
}
|
||||
|
||||
pub fn l2_loss<A, F, Params, const N: usize>(
|
||||
target: F,
|
||||
data_xs: &tensor!(A, N),
|
||||
data_ys: &tensor!(A, N),
|
||||
params: &Params,
|
||||
) -> A
|
||||
where
|
||||
F: Fn(&tensor!(A, N), &Params) -> tensor!(A, N),
|
||||
A: Sum<A> + Mul<Output = A> + Extensible2<A> + Copy + Default + Sub<Output = A>,
|
||||
{
|
||||
let pred_ys = target(data_xs, params);
|
||||
l2_norm(&pred_ys, data_ys)
|
||||
}
|
||||
|
||||
pub fn predict_line<A, const N: usize>(xs: &tensor!(A, N), theta: &tensor!(A, 2)) -> tensor!(A, N)
|
||||
where
|
||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + Extensible2<A> + One,
|
||||
{
|
||||
let mut result: tensor!(A, N) = [Default::default(); N];
|
||||
for (i, &x) in xs.iter().enumerate() {
|
||||
result[i] = dot(&[x, One::one()], &[*theta])[0];
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use little_learner::tensor::extension1;
|
||||
|
||||
#[test]
|
||||
fn test_extension() {
|
||||
let x: tensor!(u8, 1) = [2];
|
||||
assert_eq!(extension1(&x, &7, |x, y| x + y), [9]);
|
||||
let y: tensor!(u8, 1) = [7];
|
||||
assert_eq!(extension2(&x, &y, |x, y| x + y), [9]);
|
||||
|
||||
let x: tensor!(u8, 3) = [5, 6, 7];
|
||||
assert_eq!(extension1(&x, &2, |x, y| x + y), [7, 8, 9]);
|
||||
let y: tensor!(u8, 3) = [2, 0, 1];
|
||||
assert_eq!(extension2(&x, &y, |x, y| x + y), [7, 6, 8]);
|
||||
|
||||
let x: tensor!(u8, 2, 3) = [[4, 6, 7], [2, 0, 1]];
|
||||
assert_eq!(extension1(&x, &2, |x, y| x + y), [[6, 8, 9], [4, 2, 3]]);
|
||||
let y: tensor!(u8, 2, 3) = [[1, 2, 2], [6, 3, 1]];
|
||||
assert_eq!(extension2(&x, &y, |x, y| x + y), [[5, 8, 9], [8, 3, 2]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_norm() {
|
||||
assert_eq!(
|
||||
l2_norm(&[4.0, -3.0, 0.0, -4.0, 3.0], &[0.0, 0.0, 0.0, 0.0, 0.0]),
|
||||
50.0
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_l2_loss() {
|
||||
let loss = l2_loss(
|
||||
predict_line,
|
||||
&[2.0, 1.0, 4.0, 3.0],
|
||||
&[1.8, 1.2, 4.2, 3.3],
|
||||
&[0.0, 0.0],
|
||||
);
|
||||
assert_eq!(loss, 33.21);
|
||||
|
||||
let loss = l2_loss(
|
||||
predict_line,
|
||||
&[2.0, 1.0, 4.0, 3.0],
|
||||
&[1.8, 1.2, 4.2, 3.3],
|
||||
&[0.0099, 0.0],
|
||||
);
|
||||
assert_eq!((100.0 * loss).round() / 100.0, 32.59);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user