Implement plane prediction (#11)
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -55,7 +55,6 @@ dependencies = [
|
|||||||
name = "little_learner_app"
|
name = "little_learner_app"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arrayvec",
|
|
||||||
"immutable-chunkmap",
|
"immutable-chunkmap",
|
||||||
"little_learner",
|
"little_learner",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
|
|||||||
@@ -7,12 +7,12 @@ use std::{
|
|||||||
ops::{AddAssign, Div, Mul, Neg},
|
ops::{AddAssign, Div, Mul, Neg},
|
||||||
};
|
};
|
||||||
|
|
||||||
impl<A> Zero for DifferentiableHidden<A>
|
impl<A> Zero for Differentiable<A>
|
||||||
where
|
where
|
||||||
A: Zero,
|
A: Zero,
|
||||||
{
|
{
|
||||||
fn zero() -> DifferentiableHidden<A> {
|
fn zero() -> Differentiable<A> {
|
||||||
DifferentiableHidden::Scalar(Scalar::Number(A::zero(), None))
|
Differentiable::Scalar(Scalar::Number(A::zero(), None))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -25,16 +25,16 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> One for DifferentiableHidden<A>
|
impl<A> One for Differentiable<A>
|
||||||
where
|
where
|
||||||
A: One,
|
A: One,
|
||||||
{
|
{
|
||||||
fn one() -> DifferentiableHidden<A> {
|
fn one() -> Differentiable<A> {
|
||||||
DifferentiableHidden::Scalar(Scalar::one())
|
Differentiable::Scalar(Scalar::one())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> Clone for DifferentiableHidden<A>
|
impl<A> Clone for Differentiable<A>
|
||||||
where
|
where
|
||||||
A: Clone,
|
A: Clone,
|
||||||
{
|
{
|
||||||
@@ -47,19 +47,19 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum DifferentiableHidden<A> {
|
pub enum Differentiable<A> {
|
||||||
Scalar(Scalar<A>),
|
Scalar(Scalar<A>),
|
||||||
Vector(Vec<DifferentiableHidden<A>>),
|
Vector(Vec<Differentiable<A>>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> Display for DifferentiableHidden<A>
|
impl<A> Display for Differentiable<A>
|
||||||
where
|
where
|
||||||
A: Display,
|
A: Display,
|
||||||
{
|
{
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
DifferentiableHidden::Scalar(s) => f.write_fmt(format_args!("{}", s)),
|
Differentiable::Scalar(s) => f.write_fmt(format_args!("{}", s)),
|
||||||
DifferentiableHidden::Vector(v) => {
|
Differentiable::Vector(v) => {
|
||||||
f.write_char('[')?;
|
f.write_char('[')?;
|
||||||
for v in v.iter() {
|
for v in v.iter() {
|
||||||
f.write_fmt(format_args!("{}", v))?;
|
f.write_fmt(format_args!("{}", v))?;
|
||||||
@@ -71,32 +71,32 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> DifferentiableHidden<A> {
|
impl<A> Differentiable<A> {
|
||||||
fn map<B, F>(&self, f: &mut F) -> DifferentiableHidden<B>
|
pub fn map<B, F>(&self, f: &mut F) -> Differentiable<B>
|
||||||
where
|
where
|
||||||
F: FnMut(Scalar<A>) -> Scalar<B>,
|
F: FnMut(Scalar<A>) -> Scalar<B>,
|
||||||
A: Clone,
|
A: Clone,
|
||||||
{
|
{
|
||||||
match self {
|
match self {
|
||||||
DifferentiableHidden::Scalar(a) => DifferentiableHidden::Scalar(f(a.clone())),
|
Differentiable::Scalar(a) => Differentiable::Scalar(f(a.clone())),
|
||||||
DifferentiableHidden::Vector(slice) => {
|
Differentiable::Vector(slice) => {
|
||||||
DifferentiableHidden::Vector(slice.iter().map(|x| x.map(f)).collect())
|
Differentiable::Vector(slice.iter().map(|x| x.map(f)).collect())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn map2<B, C, F>(&self, other: &DifferentiableHidden<B>, f: &F) -> DifferentiableHidden<C>
|
pub fn map2<B, C, F>(&self, other: &Differentiable<B>, f: &F) -> Differentiable<C>
|
||||||
where
|
where
|
||||||
F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>,
|
F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>,
|
||||||
A: Clone,
|
A: Clone,
|
||||||
B: Clone,
|
B: Clone,
|
||||||
{
|
{
|
||||||
match (self, other) {
|
match (self, other) {
|
||||||
(DifferentiableHidden::Scalar(a), DifferentiableHidden::Scalar(b)) => {
|
(Differentiable::Scalar(a), Differentiable::Scalar(b)) => {
|
||||||
DifferentiableHidden::Scalar(f(a, b))
|
Differentiable::Scalar(f(a, b))
|
||||||
}
|
}
|
||||||
(DifferentiableHidden::Vector(slice_a), DifferentiableHidden::Vector(slice_b)) => {
|
(Differentiable::Vector(slice_a), Differentiable::Vector(slice_b)) => {
|
||||||
DifferentiableHidden::Vector(
|
Differentiable::Vector(
|
||||||
slice_a
|
slice_a
|
||||||
.iter()
|
.iter()
|
||||||
.zip(slice_b.iter())
|
.zip(slice_b.iter())
|
||||||
@@ -108,20 +108,69 @@ impl<A> DifferentiableHidden<A> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn of_slice(input: &[A]) -> DifferentiableHidden<A>
|
fn of_slice<T>(input: T) -> Differentiable<A>
|
||||||
where
|
where
|
||||||
A: Clone,
|
A: Clone,
|
||||||
|
T: AsRef<[A]>,
|
||||||
{
|
{
|
||||||
DifferentiableHidden::Vector(
|
Differentiable::Vector(
|
||||||
input
|
input
|
||||||
|
.as_ref()
|
||||||
.iter()
|
.iter()
|
||||||
.map(|v| DifferentiableHidden::Scalar(Scalar::Number((*v).clone(), None)))
|
.map(|v| Differentiable::Scalar(Scalar::Number((*v).clone(), None)))
|
||||||
.collect(),
|
.collect(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn rank(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
Differentiable::Scalar(_) => 0,
|
||||||
|
Differentiable::Vector(v) => v[0].rank() + 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn attach_rank<const RANK: usize>(
|
||||||
|
self: Differentiable<A>,
|
||||||
|
) -> Option<RankedDifferentiable<A, RANK>> {
|
||||||
|
if self.rank() == RANK {
|
||||||
|
Some(RankedDifferentiable { contents: self })
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A> DifferentiableHidden<A>
|
impl<A> Differentiable<A> {
|
||||||
|
pub fn into_scalar(self) -> Scalar<A> {
|
||||||
|
match self {
|
||||||
|
Differentiable::Scalar(s) => s,
|
||||||
|
Differentiable::Vector(_) => panic!("not a scalar"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_vector(self) -> Vec<Differentiable<A>> {
|
||||||
|
match self {
|
||||||
|
Differentiable::Scalar(_) => panic!("not a vector"),
|
||||||
|
Differentiable::Vector(v) => v,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn borrow_scalar(&self) -> &Scalar<A> {
|
||||||
|
match self {
|
||||||
|
Differentiable::Scalar(s) => s,
|
||||||
|
Differentiable::Vector(_) => panic!("not a scalar"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn borrow_vector(&self) -> &Vec<Differentiable<A>> {
|
||||||
|
match self {
|
||||||
|
Differentiable::Scalar(_) => panic!("not a vector"),
|
||||||
|
Differentiable::Vector(v) => v,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<A> Differentiable<A>
|
||||||
where
|
where
|
||||||
A: Clone
|
A: Clone
|
||||||
+ Eq
|
+ Eq
|
||||||
@@ -134,7 +183,7 @@ where
|
|||||||
+ One
|
+ One
|
||||||
+ Neg<Output = A>,
|
+ Neg<Output = A>,
|
||||||
{
|
{
|
||||||
fn accumulate_gradients_vec(v: &[DifferentiableHidden<A>], acc: &mut HashMap<Scalar<A>, A>) {
|
fn accumulate_gradients_vec(v: &[Differentiable<A>], acc: &mut HashMap<Scalar<A>, A>) {
|
||||||
for v in v.iter().rev() {
|
for v in v.iter().rev() {
|
||||||
v.accumulate_gradients(acc);
|
v.accumulate_gradients(acc);
|
||||||
}
|
}
|
||||||
@@ -142,33 +191,36 @@ where
|
|||||||
|
|
||||||
fn accumulate_gradients(&self, acc: &mut HashMap<Scalar<A>, A>) {
|
fn accumulate_gradients(&self, acc: &mut HashMap<Scalar<A>, A>) {
|
||||||
match self {
|
match self {
|
||||||
DifferentiableHidden::Scalar(y) => {
|
Differentiable::Scalar(y) => {
|
||||||
let k = y.clone_link();
|
let k = y.clone_link();
|
||||||
k.invoke(y, A::one(), acc);
|
k.invoke(y, A::one(), acc);
|
||||||
}
|
}
|
||||||
DifferentiableHidden::Vector(y) => {
|
Differentiable::Vector(y) => Differentiable::accumulate_gradients_vec(y, acc),
|
||||||
DifferentiableHidden::accumulate_gradients_vec(y, acc)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn grad_once(self, wrt: &DifferentiableHidden<A>) -> DifferentiableHidden<A> {
|
fn grad_once<const PARAM_NUM: usize>(
|
||||||
|
self,
|
||||||
|
wrt: [Differentiable<A>; PARAM_NUM],
|
||||||
|
) -> [Differentiable<A>; PARAM_NUM] {
|
||||||
let mut acc = HashMap::new();
|
let mut acc = HashMap::new();
|
||||||
self.accumulate_gradients(&mut acc);
|
self.accumulate_gradients(&mut acc);
|
||||||
|
|
||||||
wrt.map(&mut |d| match acc.get(&d) {
|
wrt.map(|wrt| {
|
||||||
None => Scalar::Number(A::zero(), None),
|
wrt.map(&mut |d| match acc.get(&d) {
|
||||||
Some(x) => Scalar::Number(x.clone(), None),
|
None => Scalar::Number(A::zero(), None),
|
||||||
|
Some(x) => Scalar::Number(x.clone(), None),
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Differentiable<A, const RANK: usize> {
|
pub struct RankedDifferentiable<A, const RANK: usize> {
|
||||||
contents: DifferentiableHidden<A>,
|
contents: Differentiable<A>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<A, const RANK: usize> Display for Differentiable<A, RANK>
|
impl<A, const RANK: usize> Display for RankedDifferentiable<A, RANK>
|
||||||
where
|
where
|
||||||
A: Display,
|
A: Display,
|
||||||
{
|
{
|
||||||
@@ -177,123 +229,161 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn of_scalar<A>(s: Scalar<A>) -> Differentiable<A, 0> {
|
impl<A> RankedDifferentiable<A, 0> {
|
||||||
Differentiable {
|
pub fn to_scalar(self) -> Scalar<A> {
|
||||||
contents: DifferentiableHidden::Scalar(s),
|
match self.contents {
|
||||||
}
|
Differentiable::Scalar(s) => s,
|
||||||
}
|
Differentiable::Vector(_) => panic!("not a scalar despite teq that we're a scalar"),
|
||||||
|
|
||||||
pub fn to_scalar<A>(s: Differentiable<A, 0>) -> Scalar<A> {
|
|
||||||
match s.contents {
|
|
||||||
DifferentiableHidden::Scalar(s) => s,
|
|
||||||
DifferentiableHidden::Vector(_) => panic!("not a vector"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn of_slice<A>(input: &[A]) -> Differentiable<A, 1>
|
|
||||||
where
|
|
||||||
A: Clone,
|
|
||||||
{
|
|
||||||
Differentiable {
|
|
||||||
contents: DifferentiableHidden::of_slice(input),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<A, const RANK: usize> Differentiable<A, RANK> {
|
|
||||||
pub fn of_vector(s: Vec<Differentiable<A, RANK>>) -> Differentiable<A, { RANK + 1 }> {
|
|
||||||
Differentiable {
|
|
||||||
contents: DifferentiableHidden::Vector(s.into_iter().map(|v| v.contents).collect()),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn map<B, F>(s: Differentiable<A, RANK>, f: &mut F) -> Differentiable<B, RANK>
|
pub fn of_scalar(s: Scalar<A>) -> RankedDifferentiable<A, 0> {
|
||||||
|
RankedDifferentiable {
|
||||||
|
contents: Differentiable::Scalar(s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<A> RankedDifferentiable<A, 1> {
|
||||||
|
pub fn of_slice<T>(input: T) -> RankedDifferentiable<A, 1>
|
||||||
|
where
|
||||||
|
A: Clone,
|
||||||
|
T: AsRef<[A]>,
|
||||||
|
{
|
||||||
|
RankedDifferentiable {
|
||||||
|
contents: Differentiable::of_slice(input),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<A> RankedDifferentiable<A, 2> {
|
||||||
|
pub fn of_slice_2<T, const N: usize>(input: &[T]) -> RankedDifferentiable<A, 2>
|
||||||
|
where
|
||||||
|
A: Clone,
|
||||||
|
T: AsRef<[A]>,
|
||||||
|
{
|
||||||
|
let v = input
|
||||||
|
.iter()
|
||||||
|
.map(|x| Differentiable::of_slice(x))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
RankedDifferentiable {
|
||||||
|
contents: Differentiable::Vector(v),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<A, const RANK: usize> RankedDifferentiable<A, RANK> {
|
||||||
|
pub fn to_unranked(self) -> Differentiable<A> {
|
||||||
|
self.contents
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_unranked_borrow(&self) -> &Differentiable<A> {
|
||||||
|
&self.contents
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn of_vector(
|
||||||
|
s: Vec<RankedDifferentiable<A, RANK>>,
|
||||||
|
) -> RankedDifferentiable<A, { RANK + 1 }> {
|
||||||
|
RankedDifferentiable {
|
||||||
|
contents: Differentiable::Vector(s.into_iter().map(|v| v.contents).collect()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn map<B, F>(
|
||||||
|
self: RankedDifferentiable<A, RANK>,
|
||||||
|
f: &mut F,
|
||||||
|
) -> RankedDifferentiable<B, RANK>
|
||||||
where
|
where
|
||||||
F: FnMut(Scalar<A>) -> Scalar<B>,
|
F: FnMut(Scalar<A>) -> Scalar<B>,
|
||||||
A: Clone,
|
A: Clone,
|
||||||
{
|
{
|
||||||
Differentiable {
|
RankedDifferentiable {
|
||||||
contents: DifferentiableHidden::map(&s.contents, f),
|
contents: Differentiable::map(&self.contents, f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn map2<B, C, F>(
|
pub fn map2<B, C, F>(
|
||||||
self: &Differentiable<A, RANK>,
|
self: &RankedDifferentiable<A, RANK>,
|
||||||
other: &Differentiable<B, RANK>,
|
other: &RankedDifferentiable<B, RANK>,
|
||||||
f: &F,
|
f: &F,
|
||||||
) -> Differentiable<C, RANK>
|
) -> RankedDifferentiable<C, RANK>
|
||||||
where
|
where
|
||||||
F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>,
|
F: Fn(&Scalar<A>, &Scalar<B>) -> Scalar<C>,
|
||||||
A: Clone,
|
A: Clone,
|
||||||
B: Clone,
|
B: Clone,
|
||||||
{
|
{
|
||||||
Differentiable {
|
RankedDifferentiable {
|
||||||
contents: DifferentiableHidden::map2(&self.contents, &other.contents, f),
|
contents: Differentiable::map2(&self.contents, &other.contents, f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_vector(s: Differentiable<A, { RANK + 1 }>) -> Vec<Differentiable<A, RANK>> {
|
pub fn to_vector(
|
||||||
match s.contents {
|
self: RankedDifferentiable<A, RANK>,
|
||||||
DifferentiableHidden::Scalar(_) => panic!("not a scalar"),
|
) -> Vec<RankedDifferentiable<A, { RANK - 1 }>> {
|
||||||
DifferentiableHidden::Vector(v) => v
|
match self.contents {
|
||||||
|
Differentiable::Scalar(_) => panic!("not a scalar"),
|
||||||
|
Differentiable::Vector(v) => v
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| Differentiable { contents: v })
|
.map(|v| RankedDifferentiable { contents: v })
|
||||||
.collect(),
|
.collect(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn grad<F>(f: F, theta: &Differentiable<A, RANK>) -> Differentiable<A, RANK>
|
pub fn grad<A, F, const RANK: usize, const PARAM_RANK: usize>(
|
||||||
where
|
f: F,
|
||||||
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
|
theta: &[Differentiable<A>; PARAM_RANK],
|
||||||
A: Clone
|
) -> [Differentiable<A>; PARAM_RANK]
|
||||||
+ Hash
|
where
|
||||||
+ AddAssign
|
F: Fn(&[Differentiable<A>; PARAM_RANK]) -> RankedDifferentiable<A, RANK>,
|
||||||
+ Mul<Output = A>
|
A: ?Sized
|
||||||
+ Exp
|
+ Clone
|
||||||
+ Div<Output = A>
|
+ Hash
|
||||||
+ Zero
|
+ AddAssign
|
||||||
+ One
|
+ Mul<Output = A>
|
||||||
+ Neg<Output = A>
|
+ Exp
|
||||||
+ Eq,
|
+ Div<Output = A>
|
||||||
{
|
+ Zero
|
||||||
let mut i = 0usize;
|
+ One
|
||||||
let wrt = theta.contents.map(&mut |x| {
|
+ Neg<Output = A>
|
||||||
|
+ Eq,
|
||||||
|
{
|
||||||
|
let mut i = 0usize;
|
||||||
|
let wrt = theta.each_ref().map(|theta| {
|
||||||
|
theta.map(&mut |x| {
|
||||||
let result = Scalar::truncate_dual(x, Some(i));
|
let result = Scalar::truncate_dual(x, Some(i));
|
||||||
i += 1;
|
i += 1;
|
||||||
result
|
result
|
||||||
});
|
})
|
||||||
let after_f = f(Differentiable {
|
});
|
||||||
contents: wrt.clone(),
|
let after_f = f(&wrt);
|
||||||
});
|
Differentiable::grad_once(after_f.contents, wrt)
|
||||||
Differentiable {
|
|
||||||
contents: DifferentiableHidden::grad_once(after_f.contents, &wrt),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use ordered_float::NotNan;
|
use ordered_float::NotNan;
|
||||||
|
|
||||||
use crate::loss::{l2_loss_2, predict_line_2};
|
use crate::loss::{l2_loss_2, predict_line_2_unranked};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
fn extract_scalar<'a, A>(d: &'a DifferentiableHidden<A>) -> &'a A {
|
fn extract_scalar<'a, A>(d: &'a Differentiable<A>) -> &'a A {
|
||||||
match d {
|
match d {
|
||||||
DifferentiableHidden::Scalar(a) => &(a.real_part()),
|
Differentiable::Scalar(a) => &(a.real_part()),
|
||||||
DifferentiableHidden::Vector(_) => panic!("not a scalar"),
|
Differentiable::Vector(_) => panic!("not a scalar"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_map() {
|
fn test_map() {
|
||||||
let v = DifferentiableHidden::Vector(
|
let v = Differentiable::Vector(
|
||||||
vec![
|
vec![
|
||||||
DifferentiableHidden::Scalar(Scalar::Number(
|
Differentiable::Scalar(Scalar::Number(
|
||||||
NotNan::new(3.0).expect("3 is not NaN"),
|
NotNan::new(3.0).expect("3 is not NaN"),
|
||||||
Some(0usize),
|
Some(0usize),
|
||||||
)),
|
)),
|
||||||
DifferentiableHidden::Scalar(Scalar::Number(
|
Differentiable::Scalar(Scalar::Number(
|
||||||
NotNan::new(4.0).expect("4 is not NaN"),
|
NotNan::new(4.0).expect("4 is not NaN"),
|
||||||
Some(1usize),
|
Some(1usize),
|
||||||
)),
|
)),
|
||||||
@@ -306,8 +396,8 @@ mod tests {
|
|||||||
});
|
});
|
||||||
|
|
||||||
let v = match mapped {
|
let v = match mapped {
|
||||||
DifferentiableHidden::Scalar(_) => panic!("Not a scalar"),
|
Differentiable::Scalar(_) => panic!("Not a scalar"),
|
||||||
DifferentiableHidden::Vector(v) => v
|
Differentiable::Vector(v) => v
|
||||||
.iter()
|
.iter()
|
||||||
.map(|d| extract_scalar(d).clone())
|
.map(|d| extract_scalar(d).clone())
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>(),
|
||||||
@@ -318,26 +408,27 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_autodiff() {
|
fn test_autodiff() {
|
||||||
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
|
let input_vec = [
|
||||||
|
RankedDifferentiable::of_scalar(Scalar::<NotNan<f64>>::zero()).contents,
|
||||||
|
RankedDifferentiable::of_scalar(Scalar::<NotNan<f64>>::zero()).contents,
|
||||||
|
];
|
||||||
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let grad = Differentiable::grad(
|
let grad = grad(
|
||||||
|x| {
|
|x| {
|
||||||
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2(
|
||||||
predict_line_2,
|
predict_line_2_unranked,
|
||||||
of_slice(&xs),
|
RankedDifferentiable::of_slice(&xs),
|
||||||
of_slice(&ys),
|
RankedDifferentiable::of_slice(&ys),
|
||||||
x,
|
x,
|
||||||
))])
|
))])
|
||||||
},
|
},
|
||||||
&input_vec,
|
&input_vec,
|
||||||
);
|
);
|
||||||
|
|
||||||
let grad_vec: Vec<f64> = Differentiable::to_vector(grad)
|
let grad_vec = grad
|
||||||
.into_iter()
|
.map(Differentiable::into_scalar)
|
||||||
.map(to_scalar)
|
.map(|x| f64::from(*x.real_part()));
|
||||||
.map(|x| f64::from(*x.real_part()))
|
assert_eq!(grad_vec, [-63.0, -21.0]);
|
||||||
.collect();
|
|
||||||
assert_eq!(grad_vec, vec![-63.0, -21.0]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
13
little_learner/src/const_teq.rs
Normal file
13
little_learner/src/const_teq.rs
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
pub struct ConstTeq<const A: usize, const B: usize> {
|
||||||
|
phantom_a: PhantomData<[(); A]>,
|
||||||
|
phantom_b: PhantomData<[(); B]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn make<const A: usize>() -> ConstTeq<A, A> {
|
||||||
|
ConstTeq {
|
||||||
|
phantom_a: Default::default(),
|
||||||
|
phantom_b: Default::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,9 @@
|
|||||||
#![allow(incomplete_features)]
|
#![allow(incomplete_features)]
|
||||||
#![feature(generic_const_exprs)]
|
#![feature(generic_const_exprs)]
|
||||||
|
#![feature(array_methods)]
|
||||||
|
|
||||||
pub mod auto_diff;
|
pub mod auto_diff;
|
||||||
|
pub mod const_teq;
|
||||||
pub mod expr_syntax_tree;
|
pub mod expr_syntax_tree;
|
||||||
pub mod loss;
|
pub mod loss;
|
||||||
pub mod scalar;
|
pub mod scalar;
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ use std::{
|
|||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
auto_diff::{of_scalar, to_scalar, Differentiable},
|
auto_diff::{Differentiable, RankedDifferentiable},
|
||||||
scalar::Scalar,
|
scalar::Scalar,
|
||||||
traits::{One, Zero},
|
traits::{One, Zero},
|
||||||
};
|
};
|
||||||
@@ -16,49 +16,61 @@ where
|
|||||||
x.clone() * x.clone()
|
x.clone() * x.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dot_2<A, const RANK: usize>(
|
pub fn elementwise_mul<A, const RANK: usize>(
|
||||||
x: &Differentiable<A, RANK>,
|
x: &RankedDifferentiable<A, RANK>,
|
||||||
y: &Differentiable<A, RANK>,
|
y: &RankedDifferentiable<A, RANK>,
|
||||||
) -> Differentiable<A, RANK>
|
) -> RankedDifferentiable<A, RANK>
|
||||||
|
where
|
||||||
|
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||||
|
{
|
||||||
|
RankedDifferentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dot_unranked<A>(x: &Differentiable<A>, y: &Differentiable<A>) -> Differentiable<A>
|
||||||
where
|
where
|
||||||
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
A: Mul<Output = A> + Sum<<A as Mul>::Output> + Clone + Default,
|
||||||
{
|
{
|
||||||
Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
Differentiable::map2(x, y, &|x, y| x.clone() * y.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn squared_2<A, const RANK: usize>(x: &Differentiable<A, RANK>) -> Differentiable<A, RANK>
|
fn squared_2<A, const RANK: usize>(
|
||||||
|
x: &RankedDifferentiable<A, RANK>,
|
||||||
|
) -> RankedDifferentiable<A, RANK>
|
||||||
where
|
where
|
||||||
A: Mul<Output = A> + Copy + Default,
|
A: Mul<Output = A> + Copy + Default,
|
||||||
{
|
{
|
||||||
Differentiable::map2(x, x, &|x, y| x.clone() * y.clone())
|
RankedDifferentiable::map2(x, x, &|x, y| x.clone() * y.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sum_2<A>(x: Differentiable<A, 1>) -> Scalar<A>
|
fn sum_2<A>(x: RankedDifferentiable<A, 1>) -> Scalar<A>
|
||||||
where
|
where
|
||||||
A: Sum<A> + Clone + Add<Output = A> + Zero,
|
A: Sum<A> + Clone + Add<Output = A> + Zero,
|
||||||
{
|
{
|
||||||
Differentiable::to_vector(x)
|
RankedDifferentiable::to_vector(x)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(to_scalar)
|
.map(|x| x.to_scalar())
|
||||||
.sum()
|
.sum()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn l2_norm_2<A>(prediction: &Differentiable<A, 1>, data: &Differentiable<A, 1>) -> Scalar<A>
|
fn l2_norm_2<A>(
|
||||||
|
prediction: &RankedDifferentiable<A, 1>,
|
||||||
|
data: &RankedDifferentiable<A, 1>,
|
||||||
|
) -> Scalar<A>
|
||||||
where
|
where
|
||||||
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero + Neg,
|
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero + Neg,
|
||||||
{
|
{
|
||||||
let diff = Differentiable::map2(prediction, data, &|x, y| x.clone() - y.clone());
|
let diff = RankedDifferentiable::map2(prediction, data, &|x, y| x.clone() - y.clone());
|
||||||
sum_2(squared_2(&diff))
|
sum_2(squared_2(&diff))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn l2_loss_2<A, F, Params>(
|
pub fn l2_loss_2<A, F, Params, const N: usize>(
|
||||||
target: F,
|
target: F,
|
||||||
data_xs: Differentiable<A, 1>,
|
data_xs: RankedDifferentiable<A, N>,
|
||||||
data_ys: Differentiable<A, 1>,
|
data_ys: RankedDifferentiable<A, 1>,
|
||||||
params: Params,
|
params: Params,
|
||||||
) -> Scalar<A>
|
) -> Scalar<A>
|
||||||
where
|
where
|
||||||
F: Fn(Differentiable<A, 1>, Params) -> Differentiable<A, 1>,
|
F: Fn(RankedDifferentiable<A, N>, Params) -> RankedDifferentiable<A, 1>,
|
||||||
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero,
|
A: Sum<A> + Mul<Output = A> + Copy + Default + Neg<Output = A> + Add<Output = A> + Zero,
|
||||||
{
|
{
|
||||||
let pred_ys = target(data_xs, params);
|
let pred_ys = target(data_xs, params);
|
||||||
@@ -66,42 +78,143 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn predict_line_2<A>(
|
pub fn predict_line_2<A>(
|
||||||
xs: Differentiable<A, 1>,
|
xs: RankedDifferentiable<A, 1>,
|
||||||
theta: Differentiable<A, 1>,
|
theta: &[RankedDifferentiable<A, 0>; 2],
|
||||||
) -> Differentiable<A, 1>
|
) -> RankedDifferentiable<A, 1>
|
||||||
where
|
where
|
||||||
A: Mul<Output = A> + Add<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + One + Zero,
|
A: Mul<Output = A> + Add<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + One + Zero,
|
||||||
{
|
{
|
||||||
let xs = Differentiable::to_vector(xs)
|
let xs = RankedDifferentiable::to_vector(xs)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|v| to_scalar(v));
|
.map(|v| v.to_scalar());
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
for x in xs {
|
for x in xs {
|
||||||
let left_arg = Differentiable::of_vector(vec![
|
let left_arg = RankedDifferentiable::of_vector(vec![
|
||||||
of_scalar(x.clone()),
|
RankedDifferentiable::of_scalar(x.clone()),
|
||||||
of_scalar(<Scalar<A> as One>::one()),
|
RankedDifferentiable::of_scalar(<Scalar<A> as One>::one()),
|
||||||
]);
|
]);
|
||||||
let dotted = of_scalar(
|
let dotted = RankedDifferentiable::of_scalar(
|
||||||
Differentiable::to_vector(dot_2(&left_arg, &theta))
|
RankedDifferentiable::to_vector(elementwise_mul(
|
||||||
.iter()
|
&left_arg,
|
||||||
.map(|x| to_scalar((*x).clone()))
|
&RankedDifferentiable::of_vector(theta.to_vec()),
|
||||||
.sum(),
|
))
|
||||||
|
.iter()
|
||||||
|
.map(|x| (*x).clone().to_scalar())
|
||||||
|
.sum(),
|
||||||
);
|
);
|
||||||
result.push(dotted);
|
result.push(dotted);
|
||||||
}
|
}
|
||||||
Differentiable::of_vector(result)
|
RankedDifferentiable::of_vector(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn predict_line_2_unranked<A>(
|
||||||
|
xs: RankedDifferentiable<A, 1>,
|
||||||
|
theta: &[Differentiable<A>; 2],
|
||||||
|
) -> RankedDifferentiable<A, 1>
|
||||||
|
where
|
||||||
|
A: Mul<Output = A> + Add<Output = A> + Sum<<A as Mul>::Output> + Copy + Default + One + Zero,
|
||||||
|
{
|
||||||
|
let xs = RankedDifferentiable::to_vector(xs)
|
||||||
|
.into_iter()
|
||||||
|
.map(|v| v.to_scalar());
|
||||||
|
let mut result = vec![];
|
||||||
|
for x in xs {
|
||||||
|
let left_arg = RankedDifferentiable::of_vector(vec![
|
||||||
|
RankedDifferentiable::of_scalar(x.clone()),
|
||||||
|
RankedDifferentiable::of_scalar(<Scalar<A> as One>::one()),
|
||||||
|
]);
|
||||||
|
let dotted = RankedDifferentiable::of_scalar(
|
||||||
|
dot_unranked(
|
||||||
|
left_arg.to_unranked_borrow(),
|
||||||
|
&Differentiable::Vector(theta.to_vec()),
|
||||||
|
)
|
||||||
|
.into_vector()
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| x.into_scalar())
|
||||||
|
.sum(),
|
||||||
|
);
|
||||||
|
result.push(dotted);
|
||||||
|
}
|
||||||
|
RankedDifferentiable::of_vector(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn predict_quadratic<A>(
|
pub fn predict_quadratic<A>(
|
||||||
xs: Differentiable<A, 1>,
|
xs: RankedDifferentiable<A, 1>,
|
||||||
theta: Differentiable<A, 1>,
|
theta: &[RankedDifferentiable<A, 0>; 3],
|
||||||
) -> Differentiable<A, 1>
|
) -> RankedDifferentiable<A, 1>
|
||||||
where
|
where
|
||||||
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
|
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
|
||||||
{
|
{
|
||||||
Differentiable::map(xs, &mut |x| {
|
RankedDifferentiable::map(xs, &mut |x| {
|
||||||
let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
|
let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
|
||||||
let x_powers = Differentiable::of_vector(x_powers.into_iter().map(of_scalar).collect());
|
let x_powers = RankedDifferentiable::of_vector(
|
||||||
sum_2(dot_2(&x_powers, &theta))
|
x_powers
|
||||||
|
.into_iter()
|
||||||
|
.map(RankedDifferentiable::of_scalar)
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
|
RankedDifferentiable::to_vector(elementwise_mul(
|
||||||
|
&x_powers,
|
||||||
|
&RankedDifferentiable::of_vector(theta.to_vec()),
|
||||||
|
))
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| x.to_scalar())
|
||||||
|
.sum()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn predict_quadratic_unranked<A>(
|
||||||
|
xs: RankedDifferentiable<A, 1>,
|
||||||
|
theta: &[Differentiable<A>; 3],
|
||||||
|
) -> RankedDifferentiable<A, 1>
|
||||||
|
where
|
||||||
|
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
|
||||||
|
{
|
||||||
|
RankedDifferentiable::map(xs, &mut |x| {
|
||||||
|
let x_powers = vec![Scalar::make(A::one()), x.clone(), square(&x)];
|
||||||
|
let x_powers = RankedDifferentiable::of_vector(
|
||||||
|
x_powers
|
||||||
|
.into_iter()
|
||||||
|
.map(RankedDifferentiable::of_scalar)
|
||||||
|
.collect(),
|
||||||
|
);
|
||||||
|
dot_unranked(
|
||||||
|
x_powers.to_unranked_borrow(),
|
||||||
|
&Differentiable::Vector(theta.to_vec()),
|
||||||
|
)
|
||||||
|
.attach_rank::<1>()
|
||||||
|
.expect("wanted a tensor1")
|
||||||
|
.to_vector()
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| x.to_scalar())
|
||||||
|
.sum()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// The parameters are: a tensor1 of length 2 (to be dotted with the input), and a scalar (to translate).
|
||||||
|
pub fn predict_plane<A>(
|
||||||
|
xs: RankedDifferentiable<A, 2>,
|
||||||
|
theta: &[Differentiable<A>; 2],
|
||||||
|
) -> RankedDifferentiable<A, 1>
|
||||||
|
where
|
||||||
|
A: Mul<Output = A> + Add<Output = A> + Sum + Default + One + Zero + Clone,
|
||||||
|
{
|
||||||
|
if theta[0].rank() != 1 {
|
||||||
|
panic!("theta0 must be of rank 1, got: {}", theta[0].rank())
|
||||||
|
}
|
||||||
|
let theta0 = RankedDifferentiable::of_vector(
|
||||||
|
theta[0]
|
||||||
|
.borrow_vector()
|
||||||
|
.iter()
|
||||||
|
.map(|v| RankedDifferentiable::of_scalar(v.borrow_scalar().clone()))
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
);
|
||||||
|
let theta1 = theta[1].borrow_scalar().clone();
|
||||||
|
let dotted: Vec<_> = xs
|
||||||
|
.to_vector()
|
||||||
|
.into_iter()
|
||||||
|
.map(|point| sum_2(elementwise_mul(&theta0, &point)))
|
||||||
|
.map(|x| RankedDifferentiable::of_scalar(x + theta1.clone()))
|
||||||
|
.collect();
|
||||||
|
RankedDifferentiable::of_vector(dotted)
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,4 +9,3 @@ edition = "2021"
|
|||||||
immutable-chunkmap = "1.0.5"
|
immutable-chunkmap = "1.0.5"
|
||||||
ordered-float = "3.6.0"
|
ordered-float = "3.6.0"
|
||||||
little_learner = { path = "../little_learner" }
|
little_learner = { path = "../little_learner" }
|
||||||
arrayvec = "0.7.2"
|
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ mod with_tensor;
|
|||||||
use core::hash::Hash;
|
use core::hash::Hash;
|
||||||
use std::ops::{Add, AddAssign, Div, Mul, Neg};
|
use std::ops::{Add, AddAssign, Div, Mul, Neg};
|
||||||
|
|
||||||
use little_learner::auto_diff::{of_scalar, of_slice, to_scalar, Differentiable};
|
use little_learner::auto_diff::{grad, Differentiable, RankedDifferentiable};
|
||||||
|
|
||||||
use little_learner::loss::{l2_loss_2, predict_quadratic};
|
use little_learner::loss::{l2_loss_2, predict_plane};
|
||||||
use little_learner::scalar::Scalar;
|
use little_learner::scalar::Scalar;
|
||||||
use little_learner::traits::{Exp, One, Zero};
|
use little_learner::traits::{Exp, One, Zero};
|
||||||
use ordered_float::NotNan;
|
use ordered_float::NotNan;
|
||||||
@@ -24,16 +24,16 @@ where
|
|||||||
v
|
v
|
||||||
}
|
}
|
||||||
|
|
||||||
struct GradientDescentHyper<A, const RANK: usize> {
|
struct GradientDescentHyper<A> {
|
||||||
learning_rate: A,
|
learning_rate: A,
|
||||||
iterations: u32,
|
iterations: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn gradient_descent_step<A, F, const RANK: usize>(
|
fn gradient_descent_step<A, F, const RANK: usize, const PARAM_NUM: usize>(
|
||||||
f: &F,
|
f: &F,
|
||||||
theta: Differentiable<A, RANK>,
|
theta: [Differentiable<A>; PARAM_NUM],
|
||||||
params: &GradientDescentHyper<A, RANK>,
|
params: &GradientDescentHyper<A>,
|
||||||
) -> Differentiable<A, RANK>
|
) -> [Differentiable<A>; PARAM_NUM]
|
||||||
where
|
where
|
||||||
A: Clone
|
A: Clone
|
||||||
+ Mul<Output = A>
|
+ Mul<Output = A>
|
||||||
@@ -46,17 +46,33 @@ where
|
|||||||
+ One
|
+ One
|
||||||
+ Eq
|
+ Eq
|
||||||
+ Exp,
|
+ Exp,
|
||||||
F: Fn(Differentiable<A, RANK>) -> Differentiable<A, RANK>,
|
F: Fn(&[Differentiable<A>; PARAM_NUM]) -> RankedDifferentiable<A, RANK>,
|
||||||
{
|
{
|
||||||
let delta = Differentiable::grad(f, &theta);
|
let delta = grad(f, &theta);
|
||||||
Differentiable::map2(&theta, &delta, &|theta, delta| {
|
let mut i = 0;
|
||||||
(*theta).clone() - (Scalar::make((params.learning_rate).clone()) * (*delta).clone())
|
theta.map(|theta| {
|
||||||
|
let delta = &delta[i];
|
||||||
|
i += 1;
|
||||||
|
// For speed, you might want to truncate_dual this.
|
||||||
|
let learning_rate = Scalar::make((params.learning_rate).clone());
|
||||||
|
Differentiable::map2(
|
||||||
|
&theta,
|
||||||
|
&delta.map(&mut |s| s * learning_rate.clone()),
|
||||||
|
&|theta, delta| (*theta).clone() - (*delta).clone(),
|
||||||
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
let plane_xs = [
|
||||||
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
[1.0, 2.05],
|
||||||
|
[1.0, 3.0],
|
||||||
|
[2.0, 2.0],
|
||||||
|
[2.0, 3.91],
|
||||||
|
[3.0, 6.13],
|
||||||
|
[4.0, 8.09],
|
||||||
|
];
|
||||||
|
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
|
||||||
|
|
||||||
let hyper = GradientDescentHyper {
|
let hyper = GradientDescentHyper {
|
||||||
learning_rate: NotNan::new(0.001).expect("not nan"),
|
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||||
@@ -64,48 +80,63 @@ fn main() {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let iterated = {
|
let iterated = {
|
||||||
let xs = xs.map(|x| NotNan::new(x).expect("not nan"));
|
let xs = plane_xs.map(|x| {
|
||||||
let ys = ys.map(|x| NotNan::new(x).expect("not nan"));
|
[
|
||||||
|
NotNan::new(x[0]).expect("not nan"),
|
||||||
|
NotNan::new(x[1]).expect("not nan"),
|
||||||
|
]
|
||||||
|
});
|
||||||
|
let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
iterate(
|
iterate(
|
||||||
&|theta| {
|
&|theta| {
|
||||||
gradient_descent_step(
|
gradient_descent_step(
|
||||||
&|x| {
|
&|x| {
|
||||||
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||||
predict_quadratic,
|
l2_loss_2(
|
||||||
of_slice(&xs),
|
predict_plane,
|
||||||
of_slice(&ys),
|
RankedDifferentiable::of_slice_2::<_, 2>(&xs),
|
||||||
x,
|
RankedDifferentiable::of_slice(ys),
|
||||||
))])
|
x,
|
||||||
|
),
|
||||||
|
)])
|
||||||
},
|
},
|
||||||
theta,
|
theta,
|
||||||
&hyper,
|
&hyper,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
of_slice(&[
|
[
|
||||||
NotNan::<f64>::zero(),
|
RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(),
|
||||||
NotNan::<f64>::zero(),
|
Differentiable::Scalar(Scalar::zero()),
|
||||||
NotNan::<f64>::zero(),
|
],
|
||||||
]),
|
|
||||||
hyper.iterations,
|
hyper.iterations,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
println!(
|
let [theta0, theta1] = iterated;
|
||||||
"After iteration: {:?}",
|
|
||||||
Differentiable::to_vector(iterated)
|
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||||
|
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
theta0
|
||||||
|
.to_vector()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| to_scalar(x).real_part().into_inner())
|
.map(|x| x.to_scalar().real_part().into_inner())
|
||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>(),
|
||||||
|
[3.97757644609063, 2.0496557321494446]
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
theta1.to_scalar().real_part().into_inner(),
|
||||||
|
5.786758464448078
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use arrayvec::ArrayVec;
|
|
||||||
use little_learner::{
|
use little_learner::{
|
||||||
auto_diff::to_scalar,
|
auto_diff::grad,
|
||||||
loss::{predict_line_2, square},
|
loss::{l2_loss_2, predict_line_2, predict_line_2_unranked, predict_quadratic_unranked},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::with_tensor::{l2_loss, predict_line};
|
use crate::with_tensor::{l2_loss, predict_line};
|
||||||
@@ -116,9 +147,12 @@ mod tests {
|
|||||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
let loss = l2_loss_2(
|
let loss = l2_loss_2(
|
||||||
predict_line_2,
|
predict_line_2,
|
||||||
of_slice(&xs),
|
RankedDifferentiable::of_slice(&xs),
|
||||||
of_slice(&ys),
|
RankedDifferentiable::of_slice(&ys),
|
||||||
of_slice(&[0.0, 0.0]),
|
&[
|
||||||
|
RankedDifferentiable::of_scalar(Scalar::zero()),
|
||||||
|
RankedDifferentiable::of_scalar(Scalar::zero()),
|
||||||
|
],
|
||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(*loss.real_part(), 33.21);
|
assert_eq!(*loss.real_part(), 33.21);
|
||||||
@@ -134,29 +168,39 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn grad_example() {
|
fn grad_example() {
|
||||||
let input_vec = of_slice(&[NotNan::new(27.0).expect("not nan")]);
|
let input_vec = [Differentiable::Scalar(Scalar::make(
|
||||||
|
NotNan::new(27.0).expect("not nan"),
|
||||||
|
))];
|
||||||
|
|
||||||
let grad: Vec<_> = Differentiable::to_vector(Differentiable::grad(
|
let grad: Vec<_> = grad(
|
||||||
|x| Differentiable::map(x, &mut |x| square(&x)),
|
|x| {
|
||||||
|
RankedDifferentiable::of_scalar(
|
||||||
|
x[0].borrow_scalar().clone() * x[0].borrow_scalar().clone(),
|
||||||
|
)
|
||||||
|
},
|
||||||
&input_vec,
|
&input_vec,
|
||||||
))
|
)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| to_scalar(x).real_part().into_inner())
|
.map(|x| x.into_scalar().real_part().into_inner())
|
||||||
.collect();
|
.collect();
|
||||||
assert_eq!(grad, [54.0]);
|
assert_eq!(grad, [54.0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn loss_gradient() {
|
fn loss_gradient() {
|
||||||
let input_vec = of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]);
|
let zero = Scalar::<NotNan<f64>>::zero();
|
||||||
|
let input_vec = [
|
||||||
|
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||||
|
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||||
|
];
|
||||||
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
let xs = [2.0, 1.0, 4.0, 3.0].map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
let ys = [1.8, 1.2, 4.2, 3.3].map(|x| NotNan::new(x).expect("not nan"));
|
||||||
let grad = Differentiable::grad(
|
let grad = grad(
|
||||||
|x| {
|
|x| {
|
||||||
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(l2_loss_2(
|
||||||
predict_line_2,
|
predict_line_2_unranked,
|
||||||
of_slice(&xs),
|
RankedDifferentiable::of_slice(&xs),
|
||||||
of_slice(&ys),
|
RankedDifferentiable::of_slice(&ys),
|
||||||
x,
|
x,
|
||||||
))])
|
))])
|
||||||
},
|
},
|
||||||
@@ -164,9 +208,8 @@ mod tests {
|
|||||||
);
|
);
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
Differentiable::to_vector(grad)
|
grad.into_iter()
|
||||||
.into_iter()
|
.map(|x| *(x.into_scalar().real_part()))
|
||||||
.map(|x| *(to_scalar(x).real_part()))
|
|
||||||
.collect::<Vec<_>>(),
|
.collect::<Vec<_>>(),
|
||||||
[-63.0, -21.0]
|
[-63.0, -21.0]
|
||||||
);
|
);
|
||||||
@@ -174,13 +217,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_iterate() {
|
fn test_iterate() {
|
||||||
let f = |t: [i32; 3]| {
|
let f = |t: [i32; 3]| t.map(|i| i - 3);
|
||||||
let mut vec = ArrayVec::<i32, 3>::new();
|
|
||||||
for i in t {
|
|
||||||
vec.push(i - 3);
|
|
||||||
}
|
|
||||||
vec.into_inner().unwrap()
|
|
||||||
};
|
|
||||||
assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]);
|
assert_eq!(iterate(&f, [1, 2, 3], 5u32), [-14, -13, -12]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -189,6 +226,8 @@ mod tests {
|
|||||||
let xs = [2.0, 1.0, 4.0, 3.0];
|
let xs = [2.0, 1.0, 4.0, 3.0];
|
||||||
let ys = [1.8, 1.2, 4.2, 3.3];
|
let ys = [1.8, 1.2, 4.2, 3.3];
|
||||||
|
|
||||||
|
let zero = Scalar::<NotNan<f64>>::zero();
|
||||||
|
|
||||||
let hyper = GradientDescentHyper {
|
let hyper = GradientDescentHyper {
|
||||||
learning_rate: NotNan::new(0.01).expect("not nan"),
|
learning_rate: NotNan::new(0.01).expect("not nan"),
|
||||||
iterations: 1000,
|
iterations: 1000,
|
||||||
@@ -200,24 +239,29 @@ mod tests {
|
|||||||
&|theta| {
|
&|theta| {
|
||||||
gradient_descent_step(
|
gradient_descent_step(
|
||||||
&|x| {
|
&|x| {
|
||||||
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||||
predict_line_2,
|
l2_loss_2(
|
||||||
of_slice(&xs),
|
predict_line_2_unranked,
|
||||||
of_slice(&ys),
|
RankedDifferentiable::of_slice(&xs),
|
||||||
x,
|
RankedDifferentiable::of_slice(&ys),
|
||||||
))])
|
x,
|
||||||
|
),
|
||||||
|
)])
|
||||||
},
|
},
|
||||||
theta,
|
theta,
|
||||||
&hyper,
|
&hyper,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
of_slice(&[NotNan::<f64>::zero(), NotNan::<f64>::zero()]),
|
[
|
||||||
|
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||||
|
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||||
|
],
|
||||||
hyper.iterations,
|
hyper.iterations,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
let iterated = Differentiable::to_vector(iterated)
|
let iterated = iterated
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| to_scalar(x).real_part().into_inner())
|
.map(|x| x.into_scalar().real_part().into_inner())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
assert_eq!(iterated, vec![1.0499993623489503, 0.0000018747718457656533]);
|
||||||
@@ -228,6 +272,8 @@ mod tests {
|
|||||||
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
let xs = [-1.0, 0.0, 1.0, 2.0, 3.0];
|
||||||
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
let ys = [2.55, 2.1, 4.35, 10.2, 18.25];
|
||||||
|
|
||||||
|
let zero = Scalar::<NotNan<f64>>::zero();
|
||||||
|
|
||||||
let hyper = GradientDescentHyper {
|
let hyper = GradientDescentHyper {
|
||||||
learning_rate: NotNan::new(0.001).expect("not nan"),
|
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||||
iterations: 1000,
|
iterations: 1000,
|
||||||
@@ -240,35 +286,104 @@ mod tests {
|
|||||||
&|theta| {
|
&|theta| {
|
||||||
gradient_descent_step(
|
gradient_descent_step(
|
||||||
&|x| {
|
&|x| {
|
||||||
Differentiable::of_vector(vec![of_scalar(l2_loss_2(
|
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||||
predict_quadratic,
|
l2_loss_2(
|
||||||
of_slice(&xs),
|
predict_quadratic_unranked,
|
||||||
of_slice(&ys),
|
RankedDifferentiable::of_slice(&xs),
|
||||||
x,
|
RankedDifferentiable::of_slice(&ys),
|
||||||
))])
|
x,
|
||||||
|
),
|
||||||
|
)])
|
||||||
},
|
},
|
||||||
theta,
|
theta,
|
||||||
&hyper,
|
&hyper,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
of_slice(&[
|
[
|
||||||
NotNan::<f64>::zero(),
|
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||||
NotNan::<f64>::zero(),
|
RankedDifferentiable::of_scalar(zero.clone()).to_unranked(),
|
||||||
NotNan::<f64>::zero(),
|
RankedDifferentiable::of_scalar(zero).to_unranked(),
|
||||||
]),
|
],
|
||||||
hyper.iterations,
|
hyper.iterations,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
let iterated = Differentiable::to_vector(iterated)
|
let iterated = iterated
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|x| to_scalar(x).real_part().into_inner())
|
.map(|x| x.into_scalar().real_part().into_inner())
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
println!("{:?}", iterated);
|
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
iterated,
|
iterated,
|
||||||
[2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
|
[2.0546423148479684, 0.9928606519360353, 1.4787394427094362]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn optimise_plane() {
|
||||||
|
let plane_xs = [
|
||||||
|
[1.0, 2.05],
|
||||||
|
[1.0, 3.0],
|
||||||
|
[2.0, 2.0],
|
||||||
|
[2.0, 3.91],
|
||||||
|
[3.0, 6.13],
|
||||||
|
[4.0, 8.09],
|
||||||
|
];
|
||||||
|
let plane_ys = [13.99, 15.99, 18.0, 22.4, 30.2, 37.94];
|
||||||
|
|
||||||
|
let hyper = GradientDescentHyper {
|
||||||
|
learning_rate: NotNan::new(0.001).expect("not nan"),
|
||||||
|
iterations: 1000,
|
||||||
|
};
|
||||||
|
|
||||||
|
let iterated = {
|
||||||
|
let xs = plane_xs.map(|x| {
|
||||||
|
[
|
||||||
|
NotNan::new(x[0]).expect("not nan"),
|
||||||
|
NotNan::new(x[1]).expect("not nan"),
|
||||||
|
]
|
||||||
|
});
|
||||||
|
let ys = plane_ys.map(|x| NotNan::new(x).expect("not nan"));
|
||||||
|
iterate(
|
||||||
|
&|theta| {
|
||||||
|
gradient_descent_step(
|
||||||
|
&|x| {
|
||||||
|
RankedDifferentiable::of_vector(vec![RankedDifferentiable::of_scalar(
|
||||||
|
l2_loss_2(
|
||||||
|
predict_plane,
|
||||||
|
RankedDifferentiable::of_slice_2::<_, 2>(&xs),
|
||||||
|
RankedDifferentiable::of_slice(ys),
|
||||||
|
x,
|
||||||
|
),
|
||||||
|
)])
|
||||||
|
},
|
||||||
|
theta,
|
||||||
|
&hyper,
|
||||||
|
)
|
||||||
|
},
|
||||||
|
[
|
||||||
|
RankedDifferentiable::of_slice([NotNan::zero(), NotNan::zero()]).to_unranked(),
|
||||||
|
Differentiable::Scalar(Scalar::zero()),
|
||||||
|
],
|
||||||
|
hyper.iterations,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
let [theta0, theta1] = iterated;
|
||||||
|
|
||||||
|
let theta0 = theta0.attach_rank::<1>().expect("rank 1 tensor");
|
||||||
|
let theta1 = theta1.attach_rank::<0>().expect("rank 0 tensor");
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
theta0
|
||||||
|
.to_vector()
|
||||||
|
.into_iter()
|
||||||
|
.map(|x| x.to_scalar().real_part().into_inner())
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
[3.97757644609063, 2.0496557321494446]
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
theta1.to_scalar().real_part().into_inner(),
|
||||||
|
5.786758464448078
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user