From f550ca4f8442738fe727e01366847d063952374d Mon Sep 17 00:00:00 2001 From: Smaug123 Date: Sun, 3 May 2020 13:37:34 +0100 Subject: [PATCH] Commits --- Git.Test/Git.Test.fsproj | 1 + Git.Test/TestCommit.fs | 47 ++++++++++ Git.Test/TestFromGitBook.fs | 94 +++++++++++++++++++- Git/Blob.fs | 2 - Git/Commit.fs | 171 ++++++++++++++++++++++++++++++++++++ Git/EncodedObject.fs | 9 +- Git/Git.fsproj | 4 +- Git/Hash.fs | 36 ++++++-- Git/Header.fs | 8 +- Git/Object.fs | 15 +++- Git/Parse.fs | 3 + Git/Stream.fs | 45 ++++++++++ Git/Tree.fs | 34 ++----- 13 files changed, 425 insertions(+), 44 deletions(-) create mode 100644 Git.Test/TestCommit.fs create mode 100644 Git/Commit.fs create mode 100644 Git/Parse.fs create mode 100644 Git/Stream.fs diff --git a/Git.Test/Git.Test.fsproj b/Git.Test/Git.Test.fsproj index b061d5d..7b9beee 100644 --- a/Git.Test/Git.Test.fsproj +++ b/Git.Test/Git.Test.fsproj @@ -21,6 +21,7 @@ + diff --git a/Git.Test/TestCommit.fs b/Git.Test/TestCommit.fs new file mode 100644 index 0000000..3278207 --- /dev/null +++ b/Git.Test/TestCommit.fs @@ -0,0 +1,47 @@ +namespace Git.Test + +open System.IO.Abstractions.TestingHelpers +open Git +open NUnit.Framework +open FsUnitTyped +open Microsoft.FSharp.Data.UnitSystems.SI.UnitNames + +[] +module TestCommit = + + [] + let ``Round-trip a commit`` () = + let fs = MockFileSystem () + let dir = fs.Path.GetTempFileName () + let versionDir = fs.DirectoryInfo.FromDirectoryName (dir + "_test") + versionDir.Create() + + let repo = match Repository.init versionDir with | Ok r -> r | Error e -> failwithf "Oh no: %+A" e + + let scott = + { + Name = "Scott Chacon" + Email = "schacon@gmail.com" + DateTimezone = "-0700" + Date = 1243040974 + } + + let commit1 = + { + Committer = scott + Author = scott + CommitMessage = "First commit\n" + Parents = [Hash.ofString "c7929fc1cc938780ffdd9f94e0d364e0ea74f210"] + Tree = Hash.ofString "d8329fc1cc938780ffdd9f94e0d364e0ea74f579" + } + |> Object.Commit + + let h = + EncodedObject.encode commit1 + |> EncodedObject.write repo + + let c = + EncodedObject.catFile repo h + |> EncodedObject.decode + + c |> shouldEqual commit1 diff --git a/Git.Test/TestFromGitBook.fs b/Git.Test/TestFromGitBook.fs index be20345..978932b 100644 --- a/Git.Test/TestFromGitBook.fs +++ b/Git.Test/TestFromGitBook.fs @@ -5,6 +5,7 @@ open System.IO open System.IO.Abstractions.TestingHelpers open NUnit.Framework open FsUnitTyped +open Microsoft.FSharp.Data.UnitSystems.SI.UnitNames open Git @@ -104,7 +105,6 @@ module TestFromGitBook = |> shouldEqual "version 2\n" | s -> failwithf "Oh no: +%A" s - // TODO - implement the staging area and then test it here // Add to the tree let tree1 = [ @@ -220,3 +220,95 @@ module TestFromGitBook = | s -> failwithf "Oh no: +%A" s // TODO: the section on commits + let scott = + { + Name = "Scott Chacon" + Email = "schacon@gmail.com" + DateTimezone = "-0700" + Date = 1243040974 + } + + let commit1 = + { + Committer = scott + Author = scott + CommitMessage = "First commit\n" + Parents = [] + Tree = tree1 + } + |> Object.Commit + + let c1Hash = + commit1 + |> EncodedObject.encode + |> EncodedObject.write repo + // For reasons I don't understand, `git` diverges from Pro Git at this point. + // Pro Git's version: "fdf4fc3344e67ab068f836878b6c4951e3b15f3d" + // `git` (version 2.26.1): + c1Hash + |> Hash.toString + |> shouldEqual "70d4408b5020e81d19906d6abdd87a73233ebf34" + + // Note that we can roundtrip (not done explicitly in the book): + EncodedObject.catFile repo c1Hash + |> EncodedObject.decode + |> shouldEqual commit1 + + let commit2 = + { + Committer = scott + Author = scott + CommitMessage = "Second commit\n" + Parents = [c1Hash] + Tree = tree2 + } + |> Object.Commit + let c2Hash = + commit2 + |> EncodedObject.encode + |> EncodedObject.write repo + c2Hash + |> Hash.toString + |> shouldEqual "1513b13a72f5277252cfce4ed0eda0620aca2f6a" + + EncodedObject.catFile repo c2Hash + |> EncodedObject.decode + |> shouldEqual commit2 + + let commit3 = + { + Committer = scott + Author = scott + CommitMessage = "Third commit\n" + Parents = [c2Hash] + Tree = tree3 + } + |> Object.Commit + let c3Hash = + commit3 + |> EncodedObject.encode + |> EncodedObject.write repo + c3Hash + |> Hash.toString + |> shouldEqual "95cce637b4e889eee8042515db402128bd62c0d2" + + EncodedObject.catFile repo c3Hash + |> EncodedObject.decode + |> shouldEqual commit3 + + objectsDir.EnumerateFiles ("*", SearchOption.AllDirectories) + |> Seq.map (fun f -> f.Directory.Name, f.Name) + |> Seq.toList + |> List.sort + |> shouldEqual [ + "01", "55eb4229851634a0f03eb265b69f5a2d56f341" // tree 2 + "15", "13b13a72f5277252cfce4ed0eda0620aca2f6a" // commit 2 + "1f", "7a7a472abf3dd9643fd615f6da379c4acb3e3a" // test.txt v2 + "3c", "4e9cd789d88d8d89c1073707c3585e41b0e614" // tree 3 + "70", "d4408b5020e81d19906d6abdd87a73233ebf34" // commit 1 + "83", "baae61804e65cc73a7201a7252750c76066a30" // test.txt v1 + "95", "cce637b4e889eee8042515db402128bd62c0d2" // commit 3 + "d6", "70460b4b4aece5915caf5c68d12f560a9fe3e4" // 'test content' + "d8", "329fc1cc938780ffdd9f94e0d364e0ea74f579" // tree 1 + "fa", "49b077972391ad58037050f2a75f74e3671e92" // new.txt + ] diff --git a/Git/Blob.fs b/Git/Blob.fs index ee2df27..e649130 100644 --- a/Git/Blob.fs +++ b/Git/Blob.fs @@ -1,7 +1,5 @@ namespace Git -open System.IO - [] module Blob = let encode (content : byte array) : byte array = content diff --git a/Git/Commit.fs b/Git/Commit.fs new file mode 100644 index 0000000..e0574b3 --- /dev/null +++ b/Git/Commit.fs @@ -0,0 +1,171 @@ +namespace Git + +open System +open System.IO +open Microsoft.FSharp.Data.UnitSystems.SI.UnitNames +open Git.Internals + +type Contributor = + { + Name : string + Email : string + Date : int + DateTimezone : string + } + override this.ToString () = + sprintf "%s <%s> %i %s" this.Name this.Email this.Date this.DateTimezone + +type CommitEntry = + { + Tree : Hash + Parents : Hash list + Committer : Contributor + Author : Contributor + CommitMessage : string + } + override this.ToString () = + sprintf + "tree %O\n%sauthor %O\ncommitter %O\n\n%s" + this.Tree + (this.Parents |> List.map (Hash.toString >> sprintf "parent %s\n") |> String.concat "\n") + this.Author + this.Committer + this.CommitMessage + +// TODO - implement signed commits too +[] +module Commit = + let encode (content : CommitEntry) : byte array = + if content.Author.Name.Contains '<' || content.Author.Name.Contains '\n' then + failwithf "Author name '%s' contains forbidden character" content.Author.Name + if content.Committer.Name.Contains '<' || content.Committer.Name.Contains '\n' then + failwithf "Committer name '%s' contains forbidden character" content.Committer.Name + if content.Author.Email.Contains '>' || content.Author.Email.Contains '\n' then + failwithf "Author email '%s' contains forbidden character" content.Author.Email + if content.Committer.Email.Contains '>' || content.Committer.Email.Contains '\n' then + failwithf "Committer email '%s' contains forbidden character" content.Committer.Email + seq { + yield sprintf "tree %s" (Hash.toString content.Tree) + yield! content.Parents |> List.map (Hash.toString >> sprintf "parent %s") |> Array.ofList + yield sprintf "author %s <%s> %i %s" content.Author.Name content.Author.Email content.Author.Date content.Author.DateTimezone + yield sprintf "committer %s <%s> %i %s" content.Committer.Name content.Committer.Email content.Committer.Date content.Committer.DateTimezone + yield sprintf "\n%s" content.CommitMessage + } + |> String.concat "\n" + |> fun s -> s.ToCharArray () + |> Array.map byte + + let private parseInt (chars : byte array) = + let rec acc (i : int) (soFar : int) = + if i = chars.Length then soFar else + if byte '0' <= chars.[i] && chars.[i] <= byte '9' then + acc (i + 1) (10 * soFar + int (chars.[i] - byte '0')) + else failwithf "non-digit character '%i' ('%c') at index %i" chars.[i] (char chars.[i]) i + acc 0 0 + + let decode (file : byte array) : CommitEntry = + use ms = new MemoryStream(file) + + let consumeWord (OneOf expecting) = + let word = Stream.consumeTo ms 32uy + match word with + | None -> + failwithf "Expected a word '%s' in a commit object, but stream ran out" (expecting |> String.concat "//") + | Some word -> + let word = + word + |> Array.map char + |> String + if not <| List.contains word expecting then + failwithf "Expected a word '%s' in a commit object, but got '%s'" (expecting |> String.concat "//") word + word + + let consumeHash (context : string) = + let hash = Stream.consumeTo ms 10uy + match hash with + | None -> failwithf "Stream ended before we could read hash in context '%s'." context + | Some hash -> + hash |> Hash.ofSpelling + + let consumeLabelledHash (expecting : OneOf) : string * Hash = + let w = consumeWord (expecting) + let h = consumeHash w + w, h + + let consumePerson (id : string) = + let name = Stream.consumeTo ms (byte '<') |> Option.map (Array.map char >> String) + match name with + | None -> + failwithf "No %s name present in commit object." id + | Some name -> + if name.[name.Length - 1] <> ' ' then + failwithf "Name of %s '%s' unexpectedly fails to end with a space" id name + let name = name.Substring (0, name.Length - 1) + + let email = Stream.consumeTo ms (byte '>') |> Option.map (Array.map char >> String) + match email with + | None -> + failwithf "No %s email present in commit object." id + | Some email -> + + let space = Stream.consumeTo ms 32uy + match space with + | None -> failwithf "Commit object ended after %s email" id + | Some s -> if s.Length <> 0 then failwithf "Expected a space immediately after %s email, got '%s'" id (s |> Array.map char |> String) + + let timestamp = Stream.consumeTo ms 32uy + match timestamp with + | None -> failwithf "Commit object ended before %s timestamp" id + | Some timestamp -> + let timestamp = parseInt timestamp * 1 + + let offset = Stream.consumeTo ms 10uy |> Option.map (Array.map char >> String) + match offset with + | None -> failwithf "Commit object ended before %s timezone" id + | Some offset -> + + { + Name = name + Email = email + Date = timestamp + DateTimezone = offset + } + + let treeWord, treeHash = consumeLabelledHash (OneOf ["tree"]) + if treeWord <> "tree" then + failwithf "Malformed tree indicator '%s'" treeWord + + let parents, author = + let rec consumeParentsAndAuthor (parents : Hash list) = + let w = consumeWord (OneOf ["author" ; "parent" ]) + if w = "parent" then + let parent = consumeHash "parent" + consumeParentsAndAuthor (parent :: parents) + elif w = "author" then + let author = consumePerson "author" + parents, author + else + failwithf "Expected author or parent, got '%s'" w + + consumeParentsAndAuthor [] + + let _ = consumeWord (OneOf ["committer"]) + let committer = consumePerson "committer" + + let trailingNewline = Stream.consumeTo ms 10uy + match trailingNewline with + | None -> failwith "Commit object ended at end of committer" + | Some s -> if s.Length <> 0 then failwithf "Expected an extra newline immediately after committer, got %s" (s |> Array.map char |> String) + + let message = Stream.consumeToEnd ms |> Array.map char |> String + //if message.[message.Length - 1] <> '\n' then + // failwithf "Expected commit message to end with newline, got '%c':\n%s" message.[message.Length - 1] message + //let message = message.Substring(0, message.Length - 1) + + { + Committer = committer + Author = author + CommitMessage = message + Tree = treeHash + Parents = parents + } diff --git a/Git/EncodedObject.fs b/Git/EncodedObject.fs index 756d894..37ccf12 100644 --- a/Git/EncodedObject.fs +++ b/Git/EncodedObject.fs @@ -17,6 +17,7 @@ module EncodedObject = match o with | Object.Blob c -> Blob.encode c | Object.Tree entries -> Tree.encode entries + | Object.Commit c -> Commit.encode c { Header = @@ -25,6 +26,8 @@ module EncodedObject = Header.Blob contents.Length | Object.Tree _ -> Header.Tree contents.Length + | Object.Commit _ -> + Header.Commit contents.Length Content = contents } @@ -36,6 +39,9 @@ module EncodedObject = | Header.Blob _ -> Blob.decode e.Content |> Object.Blob + | Header.Commit _ -> + Commit.decode e.Content + |> Object.Commit let hash (o : EncodedObject) : Hash = use hasher = SHA1.Create () @@ -50,7 +56,7 @@ module EncodedObject = |> Array.concat use ms = new MemoryStream(toWrite) - use ds = new Ionic.Zlib.ZlibStream(dest, CompressionMode.Compress) + use ds = new Ionic.Zlib.ZlibStream(dest, CompressionMode.Compress, CompressionLevel.Level0) ms.CopyTo ds /// Read the header of the stream seeked to the beginning of the content. @@ -82,6 +88,7 @@ module EncodedObject = match header with | Header.Blob i -> i | Header.Tree i -> i + | Header.Commit i -> i let result = { Header = header diff --git a/Git/Git.fsproj b/Git/Git.fsproj index 51ab3c1..0561995 100644 --- a/Git/Git.fsproj +++ b/Git/Git.fsproj @@ -5,19 +5,21 @@ + + + - diff --git a/Git/Hash.fs b/Git/Hash.fs index c6d0753..bb3c041 100644 --- a/Git/Hash.fs +++ b/Git/Hash.fs @@ -4,8 +4,15 @@ open System open System.Globalization open System.Text -[] -type Hash = Hash of byte list +type Hash = + | Hash of byte list + override this.ToString () = + match this with + | Hash h -> + let t = StringBuilder (List.length h * 2) + h + |> List.iter (fun b -> t.AppendFormat ("{0:x2}" , b) |> ignore) + t.ToString () [] module Hash = @@ -21,8 +28,23 @@ module Hash = b 0 |> ofBytes - let toString (Hash h) : string = - let t = StringBuilder (List.length h * 2) - h - |> List.iter (fun b -> t.AppendFormat ("{0:x2}" , b) |> ignore) - t.ToString () + // Given a byte array of *characters* spelling out e.g. 'b' 'd' '6' '3', return the hash this is spelling out. + let ofSpelling (input : byte array) : Hash = + let inline value (b : byte) = + let c = char b + if '0' <= c && c <= '9' then b - byte '0' + elif 'A' <= c && c <= 'F' then b - (byte 'A') + 10uy + elif 'a' <= c && c <= 'f' then b - (byte 'a') + 10uy + else failwithf "Byte '%i' ('%c') is not a hex digit" b (char b) + + let rec b (pos : int) = + seq { + if pos < input.Length then + yield value (input.[pos]) * 16uy + value input.[pos + 1] + yield! b (pos + 2) + } + b 0 + |> ofBytes + + let toString (h : Hash) : string = + h.ToString () \ No newline at end of file diff --git a/Git/Header.fs b/Git/Header.fs index 743c3a8..c29eab0 100644 --- a/Git/Header.fs +++ b/Git/Header.fs @@ -5,7 +5,7 @@ open System type Header = | Blob of int // length of content | Tree of int // length of content - // | Commit + | Commit of int // length of content // | Tag [] @@ -19,6 +19,8 @@ module internal Header = sprintf "blob %i" length | Header.Tree length -> sprintf "tree %i" length + | Header.Commit length -> + sprintf "commit %i" length [| s.ToCharArray () |> Array.map byte [| 0uy |] @@ -34,4 +36,8 @@ module internal Header = let number = s.[5..] |> Array.map char |> String |> Int32.Parse Header.Tree number |> Some + elif s.[0..5] = ("commit".ToCharArray () |> Array.map byte) then + let number = s.[7..] |> Array.map char |> String |> Int32.Parse + Header.Commit number + |> Some else None diff --git a/Git/Object.fs b/Git/Object.fs index dc384e7..5760b48 100644 --- a/Git/Object.fs +++ b/Git/Object.fs @@ -3,7 +3,16 @@ namespace Git type Object = | Blob of byte array | Tree of TreeEntry list + | Commit of CommitEntry -[] -module Object = - do () + override this.ToString () = + match this with + | Blob b -> + sprintf "blob: %+A" b + | Tree t -> + t + |> List.map (fun i -> i.ToString ()) + |> String.concat "\n" + |> sprintf "tree:\n%+A" + | Commit c -> + sprintf "commit:\n%O" c diff --git a/Git/Parse.fs b/Git/Parse.fs new file mode 100644 index 0000000..fd2632a --- /dev/null +++ b/Git/Parse.fs @@ -0,0 +1,3 @@ +namespace Git.Internals + +type OneOf = OneOf of string list \ No newline at end of file diff --git a/Git/Stream.fs b/Git/Stream.fs new file mode 100644 index 0000000..8439e55 --- /dev/null +++ b/Git/Stream.fs @@ -0,0 +1,45 @@ +namespace Git.Internals + +open System.IO + +[] +module internal Stream = + + /// Consume the stream until and including the first instance of `stopAt`. + /// Return None instead if the stream is already used up; throw if we hit the end of the stream + /// before hitting `stopAt`. + let consumeTo (b : Stream) (stopAt : byte) : byte array option = + let rec consumeTo () : byte seq = + seq { + let b = b.ReadByte () + if b < 0 then failwithf "Stream ended in the middle while consuming to '%i'." stopAt + if b <> int stopAt then + yield byte b + yield! consumeTo () + } + + // Read the first one to see if we can + let firstByte = b.ReadByte () + if firstByte < 0 then None else + + let firstByte = byte firstByte + if firstByte = stopAt then Array.empty |> Some + else + seq { + yield firstByte + yield! consumeTo () + } + |> Seq.toArray + |> Some + + /// Consume the first n bytes of the stream. Throw if the stream runs out first. + let consume (b : Stream) (n : int) : byte array = + let output = Array.zeroCreate n + let total = b.Read (output, 0, n) + if total <> n then failwithf "Reached the end of the stream while consuming %i bytes" n + output + + let consumeToEnd (b : MemoryStream) : byte array = + use newMs = new MemoryStream() + b.CopyTo(newMs) + newMs.ToArray () diff --git a/Git/Tree.fs b/Git/Tree.fs index eb7f855..cf5cd95 100644 --- a/Git/Tree.fs +++ b/Git/Tree.fs @@ -3,6 +3,7 @@ namespace Git open System open System.IO open System.Text +open Git.Internals type TreeEntry = { @@ -10,6 +11,8 @@ type TreeEntry = Name : string Hash : Hash } + override this.ToString () = + sprintf "%i %s %O" this.Mode this.Name this.Hash [] module Tree = @@ -34,42 +37,17 @@ module Tree = /// decode as a tree object. let decode (b : byte array) : TreeEntry list = use b = new MemoryStream(b) - let consumeTo (stopAt : byte) : byte array option = - let rec consumeTo () : byte seq = - seq { - let b = b.ReadByte () - if b < 0 then failwithf "Stream ended in the middle while consuming to '%i'." stopAt - if b <> int stopAt then - yield byte b - yield! consumeTo () - } - - // Read the first one to see if we can - let firstByte = b.ReadByte () - if firstByte < 0 then None else - seq { - yield byte firstByte - yield! consumeTo () - } - |> Seq.toArray - |> Some - - let consume (n : int) : byte array = - let output = Array.zeroCreate n - let total = b.Read (output, 0, n) - if total <> n then failwithf "Reached the end of the stream while consuming %i bytes" n - output let stripRow () : TreeEntry option = - let mode = consumeTo 32uy + let mode = Stream.consumeTo b 32uy match mode with | None -> None | Some mode -> - let name = consumeTo 0uy + let name = Stream.consumeTo b 0uy match name with | None -> failwith "Stream ended before we could consume a name" | Some name -> - let hash = consume 20 + let hash = Stream.consume b 20 { Mode = mode |> Array.map char |> String |> Int32.Parse Name = name |> Array.map char |> String