mirror of
https://github.com/Smaug123/managed-git
synced 2025-10-09 09:48:45 +00:00
Learn to read packfiles (#5)
This commit is contained in:
@@ -24,6 +24,7 @@
|
||||
<Compile Include="TestCommit.fs" />
|
||||
<Compile Include="TestObject.fs" />
|
||||
<Compile Include="TestLog.fs" />
|
||||
<Compile Include="TestPack.fs" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
|
@@ -36,6 +36,7 @@ module TestCommit =
|
||||
CommitMessage = "First commit\n"
|
||||
Parents = [ Hash.ofString "c7929fc1cc938780ffdd9f94e0d364e0ea74f210" ]
|
||||
Tree = Hash.ofString "d8329fc1cc938780ffdd9f94e0d364e0ea74f579"
|
||||
GpgSignature = None
|
||||
}
|
||||
|> Object.Commit
|
||||
|
||||
|
@@ -273,6 +273,7 @@ module TestFromGitBook =
|
||||
CommitMessage = "First commit\n"
|
||||
Parents = []
|
||||
Tree = tree1
|
||||
GpgSignature = None
|
||||
}
|
||||
|> Object.Commit
|
||||
|
||||
@@ -299,6 +300,7 @@ module TestFromGitBook =
|
||||
CommitMessage = "Second commit\n"
|
||||
Parents = [ c1Hash ]
|
||||
Tree = tree2
|
||||
GpgSignature = None
|
||||
}
|
||||
|> Object.Commit
|
||||
|
||||
@@ -322,6 +324,7 @@ module TestFromGitBook =
|
||||
CommitMessage = "Third commit\n"
|
||||
Parents = [ c2Hash ]
|
||||
Tree = tree3
|
||||
GpgSignature = None
|
||||
}
|
||||
|> Object.Commit
|
||||
|
||||
|
53
Git.Test/TestPack.fs
Normal file
53
Git.Test/TestPack.fs
Normal file
@@ -0,0 +1,53 @@
|
||||
namespace Git.Test
|
||||
|
||||
open System.IO.Abstractions
|
||||
open NUnit.Framework
|
||||
open FsUnitTyped
|
||||
open Git
|
||||
|
||||
[<TestFixture>]
|
||||
module TestPack =
|
||||
|
||||
[<Test>]
|
||||
[<Explicit "Hits a real filesystem, only intended to work while developing">]
|
||||
let ``Example`` () =
|
||||
let fs = FileSystem ()
|
||||
|
||||
let fi =
|
||||
fs.FileInfo.FromFileName
|
||||
"/Users/patrick/Documents/GitHub/stable-diffusion/.git/objects/pack/pack-871a8f18e20fa6104dbd769a07ca12f832048d00.pack"
|
||||
|
||||
let index =
|
||||
fs.FileInfo.FromFileName
|
||||
"/Users/patrick/Documents/GitHub/stable-diffusion/.git/objects/pack/pack-871a8f18e20fa6104dbd769a07ca12f832048d00.idx"
|
||||
|> PackFile.readIndex
|
||||
|
||||
let objects = PackFile.readAll fi index
|
||||
()
|
||||
|
||||
[<Test>]
|
||||
[<Explicit "Hits a real filesystem, only intended to work while developing">]
|
||||
let ``Look up a specific object`` () =
|
||||
let fs = FileSystem ()
|
||||
|
||||
let fi =
|
||||
fs.FileInfo.FromFileName
|
||||
"/Users/patrick/Documents/GitHub/stable-diffusion/.git/objects/pack/pack-871a8f18e20fa6104dbd769a07ca12f832048d00.pack"
|
||||
|
||||
let indexFile =
|
||||
fs.FileInfo.FromFileName
|
||||
"/Users/patrick/Documents/GitHub/stable-diffusion/.git/objects/pack/pack-871a8f18e20fa6104dbd769a07ca12f832048d00.idx"
|
||||
|
||||
let desiredObject = Hash.ofString "1c4bb25a779f34d86b2d90e584ac67af91bb1303"
|
||||
|
||||
let object, name, _metadata =
|
||||
PackFile.locateObject desiredObject indexFile fi
|
||||
|> Option.get
|
||||
|> function
|
||||
| PackObject.Object (Object.Blob b, name, metadata) -> b, name, metadata
|
||||
| _ -> failwith "unexpected"
|
||||
|
||||
name |> shouldEqual desiredObject
|
||||
|
||||
System.IO.File.WriteAllBytes ("/Users/patrick/Documents/GitHub/stable-diffusion/foo2.txt", object)
|
||||
()
|
@@ -116,6 +116,7 @@ module Utils =
|
||||
CommitMessage = "First commit\n"
|
||||
Parents = []
|
||||
Tree = tree1
|
||||
GpgSignature = None
|
||||
}
|
||||
|> Object.Commit
|
||||
|
||||
@@ -134,6 +135,7 @@ module Utils =
|
||||
Author = scott
|
||||
CommitMessage = "Second commit\n"
|
||||
Parents = [ c1Hash ]
|
||||
GpgSignature = None
|
||||
Tree = tree2
|
||||
}
|
||||
|> Object.Commit
|
||||
@@ -154,6 +156,7 @@ module Utils =
|
||||
CommitMessage = "Third commit\n"
|
||||
Parents = [ c2Hash ]
|
||||
Tree = tree3
|
||||
GpgSignature = None
|
||||
}
|
||||
|> Object.Commit
|
||||
|
||||
|
17
Git.Tool/Git.Tool.fsproj
Normal file
17
Git.Tool/Git.Tool.fsproj
Normal file
@@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Include="VerifyPack.fs" />
|
||||
<Compile Include="Program.fs" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Git\Git.fsproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
28
Git.Tool/Program.fs
Normal file
28
Git.Tool/Program.fs
Normal file
@@ -0,0 +1,28 @@
|
||||
namespace Git.Tool
|
||||
|
||||
open System
|
||||
open System.IO
|
||||
open System.IO.Abstractions
|
||||
open Git
|
||||
|
||||
module Program =
|
||||
|
||||
[<EntryPoint>]
|
||||
let main args =
|
||||
let fs = FileSystem ()
|
||||
|
||||
let repo =
|
||||
Directory.GetCurrentDirectory ()
|
||||
|> fs.DirectoryInfo.FromDirectoryName
|
||||
|> Repository.make
|
||||
|
||||
match repo with
|
||||
| None -> failwith "not in a git repo"
|
||||
| Some repo ->
|
||||
|
||||
match args with
|
||||
| [| "verify-pack" ; "-v" ; path |] ->
|
||||
let verification = VerifyPack.verify repo path
|
||||
printfn "%s" (string<PackVerification> verification)
|
||||
0
|
||||
| _ -> failwith "unrecognised args"
|
157
Git.Tool/VerifyPack.fs
Normal file
157
Git.Tool/VerifyPack.fs
Normal file
@@ -0,0 +1,157 @@
|
||||
namespace Git.Tool
|
||||
|
||||
open Git
|
||||
|
||||
type PackVerificationLine =
|
||||
{
|
||||
Object : Hash
|
||||
Type : ObjectType
|
||||
Metadata : PackObjectMetadata
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
let typeString = string<ObjectType> this.Type
|
||||
|
||||
let padding =
|
||||
Array.create
|
||||
(ObjectType.Commit.ToString().Length
|
||||
- typeString.Length)
|
||||
" "
|
||||
|> String.concat ""
|
||||
|
||||
sprintf "%s %s%s %s" (Hash.toString this.Object) typeString padding (string<PackObjectMetadata> this.Metadata)
|
||||
|
||||
type PackVerificationLineDelta =
|
||||
{
|
||||
Details : PackVerificationLine
|
||||
Depth : int
|
||||
BaseSha : Hash
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
sprintf "%s %i %s" (string<PackVerificationLine> this.Details) (this.Depth + 1) (Hash.toString this.BaseSha)
|
||||
|
||||
type PackVerification =
|
||||
{
|
||||
NonDeltaCount : int
|
||||
/// The zeroth entry of this array is the number of deltas with chain length 1, for example.
|
||||
/// The sixth entry of this array is the number of deltas with chain length 7, for example.
|
||||
/// The array is only as long as it needs to be, so it might not have any elements.
|
||||
ChainCounts : int[]
|
||||
Lines : Choice<PackVerificationLine, PackVerificationLineDelta> array
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
seq {
|
||||
yield!
|
||||
this.Lines
|
||||
|> Seq.map (fun line ->
|
||||
match line with
|
||||
| Choice1Of2 line -> string<PackVerificationLine> line
|
||||
| Choice2Of2 line -> string<PackVerificationLineDelta> line
|
||||
)
|
||||
|
||||
yield sprintf "non delta: %i object%s" this.NonDeltaCount (if this.NonDeltaCount = 1 then "" else "s")
|
||||
|
||||
yield!
|
||||
this.ChainCounts
|
||||
|> Seq.mapi (fun index count ->
|
||||
sprintf "chain length = %i: %i object%s" (index + 1) count (if count = 1 then "" else "s")
|
||||
)
|
||||
}
|
||||
|> String.concat "\n"
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module VerifyPack =
|
||||
|
||||
/// The ID is e.g. "871a8f18e20fa6104dbd769a07ca12f832048d00"; so the pack file
|
||||
/// derived from the ID is `.git/objects/pack/pack-{id}.pack".
|
||||
let verify (repo : Repository) (id : string) : PackVerification =
|
||||
let fs = repo.Fs
|
||||
let packDir = fs.Path.Combine (Repository.gitDir(repo).FullName, "objects", "pack")
|
||||
|
||||
let index =
|
||||
fs.Path.Combine (packDir, sprintf "pack-%s.idx" id)
|
||||
|> fs.FileInfo.FromFileName
|
||||
|
||||
let packFile =
|
||||
fs.Path.Combine (packDir, sprintf "pack-%s.pack" id)
|
||||
|> fs.FileInfo.FromFileName
|
||||
|
||||
let allPacks =
|
||||
PackFile.readIndex index
|
||||
|> PackFile.readAll packFile
|
||||
|
||||
let rec baseObject (o : PackObject) =
|
||||
match o with
|
||||
| PackObject.Object (object, name, _) -> object, name, 0
|
||||
| PackObject.Delta (object, _, name, _) ->
|
||||
let object, _, depth = baseObject object
|
||||
object, name, depth + 1
|
||||
|
||||
let lines =
|
||||
allPacks
|
||||
|> Array.map (fun object ->
|
||||
match object with
|
||||
| PackObject.Object (object, name, metadata) ->
|
||||
let objectType = Object.getType object
|
||||
|
||||
{
|
||||
Object = name
|
||||
Type = objectType
|
||||
Metadata = metadata
|
||||
}
|
||||
|> Choice1Of2
|
||||
| PackObject.Delta (object, _diff, name, metadata) ->
|
||||
let fullyResolvedBase, fullyResolvedBaseName, depth = baseObject object
|
||||
let objectType = Object.getType fullyResolvedBase
|
||||
|
||||
{
|
||||
Details =
|
||||
{
|
||||
Object = name
|
||||
Type = objectType
|
||||
Metadata = metadata
|
||||
}
|
||||
Depth = depth
|
||||
BaseSha = fullyResolvedBaseName
|
||||
}
|
||||
|> Choice2Of2
|
||||
)
|
||||
|
||||
lines
|
||||
|> Array.sortInPlaceBy (
|
||||
function
|
||||
| Choice1Of2 l -> l.Metadata.OffsetInPackFile
|
||||
| Choice2Of2 l -> l.Details.Metadata.OffsetInPackFile
|
||||
)
|
||||
|
||||
// TODO(perf): everything from here onward is monstrously inefficient as a way of collecting chain counts
|
||||
let nonDeltaCount, chainCounts =
|
||||
((0, Map.empty), lines)
|
||||
||> Array.fold (fun (nonDeltaCount, chainCounts) line ->
|
||||
match line with
|
||||
| Choice1Of2 _ -> nonDeltaCount + 1, chainCounts
|
||||
| Choice2Of2 delta ->
|
||||
nonDeltaCount,
|
||||
Map.change
|
||||
delta.Depth
|
||||
(function
|
||||
| None -> Some 1
|
||||
| Some n -> Some (n + 1))
|
||||
chainCounts
|
||||
)
|
||||
|
||||
let maxChainLength = chainCounts |> Map.keys |> Seq.last
|
||||
|
||||
let chainCounts =
|
||||
fun length ->
|
||||
Map.tryFind length chainCounts
|
||||
|> Option.defaultValue 0
|
||||
|> Array.init (maxChainLength + 1) // for the 0 index
|
||||
|
||||
{
|
||||
NonDeltaCount = nonDeltaCount
|
||||
ChainCounts = chainCounts
|
||||
Lines = lines
|
||||
}
|
6
Git.sln
6
Git.sln
@@ -4,6 +4,8 @@ Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Git", "Git\Git.fsproj", "{0
|
||||
EndProject
|
||||
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Git.Test", "Git.Test\Git.Test.fsproj", "{CABAAA8F-186F-434A-A9F8-E847B69C3164}"
|
||||
EndProject
|
||||
Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Git.Tool", "Git.Tool\Git.Tool.fsproj", "{3874E18F-DD8F-4A67-854D-9E11B8C628FC}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -18,5 +20,9 @@ Global
|
||||
{CABAAA8F-186F-434A-A9F8-E847B69C3164}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{CABAAA8F-186F-434A-A9F8-E847B69C3164}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{CABAAA8F-186F-434A-A9F8-E847B69C3164}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{3874E18F-DD8F-4A67-854D-9E11B8C628FC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{3874E18F-DD8F-4A67-854D-9E11B8C628FC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{3874E18F-DD8F-4A67-854D-9E11B8C628FC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{3874E18F-DD8F-4A67-854D-9E11B8C628FC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
@@ -2,4 +2,5 @@
|
||||
<s:String x:Key="/Default/CodeInspection/Highlighting/InspectionSeverities/=FSharpInterpolatedString/@EntryIndexedValue">DO_NOT_SHOW</s:String>
|
||||
<s:String x:Key="/Default/CodeInspection/Highlighting/InspectionSeverities/=FSharpRedundantDotInIndexer/@EntryIndexedValue">DO_NOT_SHOW</s:String>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=Chacon/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=failwithf/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=failwithf/@EntryIndexedValue">True</s:Boolean>
|
||||
<s:Boolean x:Key="/Default/UserDictionary/Words/=gpgsig/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
|
160
Git/Commit.fs
160
Git/Commit.fs
@@ -1,27 +1,15 @@
|
||||
namespace Git
|
||||
|
||||
open System
|
||||
open System.IO
|
||||
open System.Text
|
||||
open Microsoft.FSharp.Data.UnitSystems.SI.UnitNames
|
||||
open Git.Internals
|
||||
|
||||
type Contributor =
|
||||
{
|
||||
Name : string
|
||||
Email : string
|
||||
Date : int<second>
|
||||
DateTimezone : string
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
sprintf "%s <%s> %i %s" this.Name this.Email this.Date this.DateTimezone
|
||||
|
||||
type CommitEntry =
|
||||
{
|
||||
Tree : Hash
|
||||
Parents : Hash list
|
||||
Committer : Contributor
|
||||
GpgSignature : string option
|
||||
Author : Contributor
|
||||
CommitMessage : string
|
||||
}
|
||||
@@ -37,7 +25,6 @@ type CommitEntry =
|
||||
this.Committer
|
||||
this.CommitMessage
|
||||
|
||||
// TODO - implement signed commits too
|
||||
[<RequireQualifiedAccess>]
|
||||
module Commit =
|
||||
|
||||
@@ -81,144 +68,69 @@ module Commit =
|
||||
// TODO: assumption that may not be compatible with Git: UTF8 is used for names, emails etc
|
||||
|> Encoding.UTF8.GetBytes
|
||||
|
||||
let private parseInt (chars : byte array) =
|
||||
let rec acc (i : int) (soFar : int) =
|
||||
if i = chars.Length then
|
||||
soFar
|
||||
else if byte '0' <= chars.[i] && chars.[i] <= byte '9' then
|
||||
acc (i + 1) (10 * soFar + int (chars.[i] - byte '0'))
|
||||
else
|
||||
failwithf "non-digit character '%i' ('%c') at index %i" chars.[i] (char chars.[i]) i
|
||||
|
||||
acc 0 0
|
||||
|
||||
let decode (file : byte array) : CommitEntry =
|
||||
use ms = new MemoryStream (file)
|
||||
|
||||
let consumeWord (OneOf expecting) =
|
||||
let word = Stream.consumeTo ms 32uy
|
||||
|
||||
match word with
|
||||
| None ->
|
||||
failwithf
|
||||
"Expected a word '%s' in a commit object, but stream ran out"
|
||||
(expecting |> String.concat "//")
|
||||
| Some word ->
|
||||
|
||||
let word = word |> Array.map char |> String
|
||||
|
||||
if not <| List.contains word expecting then
|
||||
failwithf "Expected a word '%s' in a commit object, but got '%s'" (expecting |> String.concat "//") word
|
||||
|
||||
word
|
||||
|
||||
let consumeHash (context : string) =
|
||||
let hash = Stream.consumeTo ms 10uy
|
||||
|
||||
match hash with
|
||||
| None -> failwithf "Stream ended before we could read hash in context '%s'." context
|
||||
| Some hash -> hash |> Hash.ofSpelling
|
||||
|
||||
let consumeLabelledHash (expecting : OneOf) : string * Hash =
|
||||
let w = consumeWord expecting
|
||||
let h = consumeHash w
|
||||
w, h
|
||||
|
||||
let consumePerson (id : string) =
|
||||
let name =
|
||||
Stream.consumeTo ms (byte '<')
|
||||
|> Option.map (Array.map char >> String)
|
||||
|
||||
match name with
|
||||
| None -> failwithf "No %s name present in commit object." id
|
||||
| Some name ->
|
||||
|
||||
if name.[name.Length - 1] <> ' ' then
|
||||
failwithf "Name of %s '%s' unexpectedly fails to end with a space" id name
|
||||
|
||||
let name = name.Substring (0, name.Length - 1)
|
||||
|
||||
let email =
|
||||
Stream.consumeTo ms (byte '>')
|
||||
|> Option.map (Array.map char >> String)
|
||||
|
||||
match email with
|
||||
| None -> failwithf "No %s email present in commit object." id
|
||||
| Some email ->
|
||||
|
||||
let space = Stream.consumeTo ms 32uy
|
||||
|
||||
match space with
|
||||
| None -> failwithf "Commit object ended after %s email" id
|
||||
| Some s ->
|
||||
if s.Length <> 0 then
|
||||
failwithf "Expected a space immediately after %s email, got '%s'" id (s |> Array.map char |> String)
|
||||
|
||||
let timestamp = Stream.consumeTo ms 32uy
|
||||
|
||||
match timestamp with
|
||||
| None -> failwithf "Commit object ended before %s timestamp" id
|
||||
| Some timestamp ->
|
||||
|
||||
let timestamp = parseInt timestamp * 1<second>
|
||||
|
||||
let offset =
|
||||
Stream.consumeTo ms 10uy
|
||||
|> Option.map (Array.map char >> String)
|
||||
|
||||
match offset with
|
||||
| None -> failwithf "Commit object ended before %s timezone" id
|
||||
| Some offset ->
|
||||
|
||||
{
|
||||
Name = name
|
||||
Email = email
|
||||
Date = timestamp
|
||||
DateTimezone = offset
|
||||
}
|
||||
|
||||
let treeWord, treeHash = consumeLabelledHash (OneOf [ "tree" ])
|
||||
let treeWord, treeHash = Parse.consumeLabelledHash "commit" (OneOf [ "tree" ]) ms
|
||||
|
||||
if treeWord <> "tree" then
|
||||
failwithf "Malformed tree indicator '%s'" treeWord
|
||||
|
||||
let parents, author =
|
||||
let rec consumeParentsAndAuthor (parents : Hash list) =
|
||||
let w = consumeWord (OneOf [ "author" ; "parent" ])
|
||||
let w = Parse.consumeWord "commit" (OneOf [ "author" ; "parent" ]) ms
|
||||
|
||||
if w = "parent" then
|
||||
let parent = consumeHash "parent"
|
||||
let parent = Parse.consumeHash "parent" ms
|
||||
consumeParentsAndAuthor (parent :: parents)
|
||||
elif w = "author" then
|
||||
let author = consumePerson "author"
|
||||
let author = Parse.consumePerson "author" ms
|
||||
parents, author
|
||||
else
|
||||
failwithf "Expected author or parent, got '%s'" w
|
||||
|
||||
consumeParentsAndAuthor []
|
||||
|
||||
let _ = consumeWord (OneOf [ "committer" ])
|
||||
let committer = consumePerson "committer"
|
||||
let _ = Parse.consumeWord "commit" (OneOf [ "committer" ]) ms
|
||||
let committer = Parse.consumePerson "committer" ms
|
||||
|
||||
let trailingNewline = Stream.consumeTo ms 10uy
|
||||
|
||||
match trailingNewline with
|
||||
| None -> failwith "Commit object ended at end of committer"
|
||||
| Some s ->
|
||||
if s.Length <> 0 then
|
||||
failwithf
|
||||
"Expected an extra newline immediately after committer, got %s"
|
||||
(s |> Array.map char |> String)
|
||||
let gpgSignature =
|
||||
match trailingNewline with
|
||||
| None -> failwith "Commit object ended at end of committer"
|
||||
| Some [||] -> None
|
||||
| Some data when data.[0..6] = Encoding.ASCII.GetBytes "gpgsig " ->
|
||||
let result = StringBuilder ()
|
||||
|
||||
let message = Stream.consumeToEnd ms |> Array.map char |> String
|
||||
//if message.[message.Length - 1] <> '\n' then
|
||||
// failwithf "Expected commit message to end with newline, got '%c':\n%s" message.[message.Length - 1] message
|
||||
//let message = message.Substring(0, message.Length - 1)
|
||||
result.Append (Encoding.ASCII.GetString data.[7..])
|
||||
|> ignore
|
||||
|
||||
result.Append '-' |> ignore
|
||||
|
||||
let remaining =
|
||||
match Stream.consumeTo ms (byte '-') with
|
||||
| None -> failwith "GPG signature unexpectedly did not terminate in '-' character"
|
||||
| Some s -> Encoding.ASCII.GetString s
|
||||
|
||||
result.Append remaining |> ignore
|
||||
|
||||
let trailer =
|
||||
match Stream.consumeTo ms 10uy with
|
||||
| None -> failwith "GPG signature was not followed by a newline"
|
||||
| Some s -> Encoding.ASCII.GetString s
|
||||
|
||||
result.Append trailer |> ignore
|
||||
result.ToString () |> Some
|
||||
| Some data -> failwithf "Unexpected trailer to committer, got %s" (Encoding.UTF8.GetString data)
|
||||
|
||||
let message = Stream.consumeToEnd ms |> Encoding.UTF8.GetString
|
||||
|
||||
{
|
||||
Committer = committer
|
||||
Author = author
|
||||
CommitMessage = message
|
||||
GpgSignature = gpgSignature
|
||||
Tree = treeHash
|
||||
Parents = parents
|
||||
}
|
||||
|
@@ -1,5 +1,7 @@
|
||||
namespace Git
|
||||
|
||||
open Microsoft.FSharp.Data.UnitSystems.SI.UnitNames
|
||||
|
||||
type BranchName =
|
||||
| BranchName of string
|
||||
|
||||
@@ -7,3 +9,27 @@ type BranchName =
|
||||
override this.ToString () =
|
||||
match this with
|
||||
| BranchName s -> s
|
||||
|
||||
type Contributor =
|
||||
{
|
||||
Name : string
|
||||
Email : string
|
||||
Date : int<second>
|
||||
DateTimezone : string
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
sprintf "%s <%s> %i %s" this.Name this.Email this.Date this.DateTimezone
|
||||
|
||||
type ObjectType =
|
||||
| Commit
|
||||
| Blob
|
||||
| Tag
|
||||
| Tree
|
||||
|
||||
override this.ToString () =
|
||||
match this with
|
||||
| ObjectType.Commit -> "commit"
|
||||
| ObjectType.Blob -> "blob"
|
||||
| ObjectType.Tag -> "tag"
|
||||
| ObjectType.Tree -> "tree"
|
||||
|
@@ -18,21 +18,24 @@ module EncodedObject =
|
||||
| Object.Blob c -> Blob.encode c
|
||||
| Object.Tree entries -> Tree.encode entries
|
||||
| Object.Commit c -> Commit.encode c
|
||||
| Object.Tag t -> Tag.encode t
|
||||
|
||||
{
|
||||
Header =
|
||||
match o with
|
||||
| Object.Blob _ -> Header.Blob contents.Length
|
||||
| Object.Tree _ -> Header.Tree contents.Length
|
||||
| Object.Commit _ -> Header.Commit contents.Length
|
||||
| Object.Blob _ -> ObjectType.Blob, contents.Length
|
||||
| Object.Tree _ -> ObjectType.Tree, contents.Length
|
||||
| Object.Commit _ -> ObjectType.Commit, contents.Length
|
||||
| Object.Tag _ -> ObjectType.Tag, contents.Length
|
||||
Content = contents
|
||||
}
|
||||
|
||||
let decode (e : EncodedObject) : Git.Object =
|
||||
match e.Header with
|
||||
| Header.Tree _ -> Tree.decode e.Content |> Object.Tree
|
||||
| Header.Blob _ -> Blob.decode e.Content |> Object.Blob
|
||||
| Header.Commit _ -> Commit.decode e.Content |> Object.Commit
|
||||
| ObjectType.Tree, _ -> Tree.decode e.Content |> Object.Tree
|
||||
| ObjectType.Blob, _ -> Blob.decode e.Content |> Object.Blob
|
||||
| ObjectType.Commit, _ -> Commit.decode e.Content |> Object.Commit
|
||||
| ObjectType.Tag, _ -> Tag.decode e.Content |> Object.Tag
|
||||
|
||||
let hash (o : EncodedObject) : Hash =
|
||||
use hasher = SHA1.Create ()
|
||||
@@ -76,11 +79,7 @@ module EncodedObject =
|
||||
use r = new BinaryReader (ms)
|
||||
let header = consumeHeader r
|
||||
|
||||
let expectedLength =
|
||||
match header with
|
||||
| Header.Blob i -> i
|
||||
| Header.Tree i -> i
|
||||
| Header.Commit i -> i
|
||||
let expectedLength = snd header
|
||||
|
||||
let result =
|
||||
{
|
||||
|
@@ -3,19 +3,22 @@
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netstandard2.0</TargetFramework>
|
||||
<DisableImplicitFSharpCoreReference>true</DisableImplicitFSharpCoreReference>
|
||||
<WarningsAsErrors>true</WarningsAsErrors>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Include="Domain.fs" />
|
||||
<Compile Include="Hash.fs" />
|
||||
<Compile Include="Stream.fs" />
|
||||
<Compile Include="Parse.fs" />
|
||||
<Compile Include="Domain.fs" />
|
||||
<Compile Include="Header.fs" />
|
||||
<Compile Include="Repository.fs" />
|
||||
<Compile Include="Hash.fs" />
|
||||
<Compile Include="Tree.fs" />
|
||||
<Compile Include="Blob.fs" />
|
||||
<Compile Include="Commit.fs" />
|
||||
<Compile Include="Tag.fs" />
|
||||
<Compile Include="Object.fs" />
|
||||
<Compile Include="PackFile.fs" />
|
||||
<Compile Include="EncodedObject.fs" />
|
||||
<Compile Include="Reference.fs" />
|
||||
<Compile Include="SymbolicReference.fs" />
|
||||
@@ -23,6 +26,8 @@
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Crc32.NET" Version="1.2.0" />
|
||||
<PackageReference Include="SharpZipLib.NETStandard" Version="1.0.7" />
|
||||
<PackageReference Include="System.IO.Abstractions" Version="11.0.4" />
|
||||
<PackageReference Include="FSharp.Core" Version="4.3.4" />
|
||||
</ItemGroup>
|
||||
|
19
Git/Hash.fs
19
Git/Hash.fs
@@ -5,23 +5,24 @@ open System.Globalization
|
||||
open System.Text
|
||||
|
||||
type Hash =
|
||||
| Hash of byte list
|
||||
private
|
||||
| Hash of byte array
|
||||
|
||||
override this.ToString () =
|
||||
match this with
|
||||
| Hash h ->
|
||||
|
||||
let t = StringBuilder (List.length h * 2)
|
||||
let t = StringBuilder (h.Length * 2)
|
||||
|
||||
h
|
||||
|> List.iter (fun b -> t.AppendFormat ("{0:x2}", b) |> ignore)
|
||||
for b in h do
|
||||
t.AppendFormat ("{0:x2}", b) |> ignore
|
||||
|
||||
t.ToString ()
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module Hash =
|
||||
|
||||
let ofBytes s = s |> Seq.toList |> Hash
|
||||
let ofBytes (s : byte array) = s |> Hash
|
||||
|
||||
let ofString (s : string) : Hash =
|
||||
let rec b (pos : int) =
|
||||
@@ -31,7 +32,7 @@ module Hash =
|
||||
yield! b (pos + 2)
|
||||
}
|
||||
|
||||
b 0 |> ofBytes
|
||||
b 0 |> Seq.toArray |> ofBytes
|
||||
|
||||
// Given a byte array of *characters* spelling out e.g. 'b' 'd' '6' '3', return the hash this is spelling out.
|
||||
let ofSpelling (input : byte array) : Hash =
|
||||
@@ -54,6 +55,10 @@ module Hash =
|
||||
yield! b (pos + 2)
|
||||
}
|
||||
|
||||
b 0 |> ofBytes
|
||||
fun i ->
|
||||
value input.[2 * i] * 16uy
|
||||
+ value input.[2 * i + 1]
|
||||
|> Array.init (input.Length / 2)
|
||||
|> ofBytes
|
||||
|
||||
let toString (h : Hash) : string = h.ToString ()
|
||||
|
@@ -1,10 +1,7 @@
|
||||
namespace Git
|
||||
|
||||
type Header =
|
||||
| Blob of int // length of content
|
||||
| Tree of int // length of content
|
||||
| Commit of int // length of content
|
||||
// | Tag
|
||||
/// An object type and the length of its content.
|
||||
type internal Header = ObjectType * int
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module internal Header =
|
||||
@@ -20,9 +17,12 @@ module internal Header =
|
||||
let toBytes (h : Header) : byte array =
|
||||
let s =
|
||||
match h with
|
||||
| Header.Blob length -> sprintf "blob %i" length
|
||||
| Header.Tree length -> sprintf "tree %i" length
|
||||
| Header.Commit length -> sprintf "commit %i" length
|
||||
| ObjectType.Blob, length -> sprintf "blob %i" length
|
||||
| ObjectType.Tree, length -> sprintf "tree %i" length
|
||||
| ObjectType.Commit, length -> sprintf "commit %i" length
|
||||
| ObjectType.Tag, length ->
|
||||
// TODO - is this correct?
|
||||
sprintf "tag %i" length
|
||||
|
||||
// If perf critical, could optimise allocation here
|
||||
Array.append (System.Text.Encoding.ASCII.GetBytes s) [| 0uy |]
|
||||
@@ -41,7 +41,7 @@ module internal Header =
|
||||
&& s.[4] = 32uy
|
||||
then
|
||||
let number = parseIntFromAsciiBytes 5 s
|
||||
Header.Blob number |> Some
|
||||
(ObjectType.Blob, number) |> Some
|
||||
else
|
||||
None
|
||||
| 116uy ->
|
||||
@@ -53,7 +53,7 @@ module internal Header =
|
||||
&& s.[4] = 32uy
|
||||
then
|
||||
let number = parseIntFromAsciiBytes 5 s
|
||||
Header.Tree number |> Some
|
||||
(ObjectType.Tree, number) |> Some
|
||||
else
|
||||
None
|
||||
| 99uy ->
|
||||
@@ -68,7 +68,7 @@ module internal Header =
|
||||
&& s.[6] = 32uy
|
||||
then
|
||||
let number = parseIntFromAsciiBytes 7 s
|
||||
Header.Commit number |> Some
|
||||
(ObjectType.Commit, number) |> Some
|
||||
else
|
||||
None
|
||||
| _ -> None
|
||||
|
@@ -6,6 +6,7 @@ type Object =
|
||||
| Blob of byte array
|
||||
| Tree of TreeEntry list
|
||||
| Commit of CommitEntry
|
||||
| Tag of TagEntry
|
||||
|
||||
override this.ToString () =
|
||||
match this with
|
||||
@@ -16,6 +17,7 @@ type Object =
|
||||
|> String.concat "\n"
|
||||
|> sprintf "tree:\n%+A"
|
||||
| Commit c -> sprintf "commit:\n%O" c
|
||||
| Tag t -> sprintf "tag:\n%O" t
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module Object =
|
||||
@@ -63,3 +65,10 @@ module Object =
|
||||
|> Seq.map (fun i -> sprintf "%s%s" i.Directory.Name i.Name)
|
||||
|> Seq.map Hash.ofString
|
||||
|> List.ofSeq
|
||||
|
||||
let getType (o : Object) : ObjectType =
|
||||
match o with
|
||||
| Object.Blob _ -> ObjectType.Blob
|
||||
| Object.Tag _ -> ObjectType.Tag
|
||||
| Object.Tree _ -> ObjectType.Tree
|
||||
| Object.Commit _ -> ObjectType.Commit
|
||||
|
625
Git/PackFile.fs
Normal file
625
Git/PackFile.fs
Normal file
@@ -0,0 +1,625 @@
|
||||
namespace Git
|
||||
|
||||
open System
|
||||
open System.IO
|
||||
open System.IO.Abstractions
|
||||
open Git.Internals
|
||||
open Force.Crc32
|
||||
open ICSharpCode.SharpZipLib.Zip.Compression
|
||||
open ICSharpCode.SharpZipLib.Zip.Compression.Streams
|
||||
|
||||
type PackObjectType =
|
||||
| ObjCommit = 1uy
|
||||
| ObjTree = 2uy
|
||||
| ObjBlob = 3uy
|
||||
| ObjTag = 4uy
|
||||
/// Encodes the base object's offset in the pack file.
|
||||
| ObjOfsDelta = 6uy
|
||||
/// Encodes the name of the base object.
|
||||
| ObjRefDelta = 7uy
|
||||
|
||||
type private PackIndexOffset =
|
||||
| RawOffset of uint32
|
||||
/// This entry is indicating that the index's fifth layer contains the offset, and it's
|
||||
/// at this offset within the fifth layer.
|
||||
| LayerFiveEntry of uint32
|
||||
|
||||
type PackIndex =
|
||||
private
|
||||
{
|
||||
Names : byte[][]
|
||||
Offsets : uint64[]
|
||||
ObjectChecksums : uint32[]
|
||||
/// 20-byte checksum of entire pack file
|
||||
Checksum : byte[]
|
||||
}
|
||||
|
||||
type PackObjectMetadata =
|
||||
{
|
||||
SizeCompressed : uint64
|
||||
SizeUncompressed : uint64
|
||||
OffsetInPackFile : uint64
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
sprintf "%i %i %i" this.SizeUncompressed this.SizeCompressed this.OffsetInPackFile
|
||||
|
||||
type PackObject =
|
||||
/// TODO: interpret the byte array as a delta. It's already been decompressed.
|
||||
| Delta of PackObject * byte array * Hash * PackObjectMetadata
|
||||
| Object of Git.Object * Hash * PackObjectMetadata
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module PackFile =
|
||||
|
||||
/// Returns the int, and the bytes read (for CRC32 purposes)
|
||||
let private readSizeEncodedInt (s : Stream) : uint64 * byte[] =
|
||||
let rec go (count : int) (bytes : ResizeArray<byte>) (ans : uint64) =
|
||||
let b = s.ReadByte ()
|
||||
|
||||
if b < 0 then
|
||||
failwith "File ended while reading size encoded int"
|
||||
|
||||
let ans = ans + (uint64 (b % 128) <<< (count * 7))
|
||||
bytes.Add (byte b)
|
||||
|
||||
if b >= 128 then
|
||||
go (count + 1) bytes ans
|
||||
else
|
||||
ans, bytes.ToArray ()
|
||||
|
||||
go 0 (ResizeArray ()) 0UL
|
||||
|
||||
let private readDeltaEncodedInt (s : Stream) : uint64 * byte[] =
|
||||
let rec go (count : int) (bytes : ResizeArray<byte>) (ans : uint64) =
|
||||
let b = s.ReadByte ()
|
||||
|
||||
if b < 0 then
|
||||
failwith "File ended while reading size encoded int"
|
||||
|
||||
let ans = (ans <<< 7) + uint64 (b % 128)
|
||||
bytes.Add (byte b)
|
||||
|
||||
if b >= 128 then
|
||||
go (count + 1) bytes ans
|
||||
else
|
||||
let bytes = bytes.ToArray ()
|
||||
let mutable ans = ans
|
||||
|
||||
for i in 1..count do
|
||||
ans <- ans + (1UL <<< (7 * i))
|
||||
|
||||
ans, bytes
|
||||
|
||||
go 0 (ResizeArray ()) 0UL
|
||||
|
||||
let private toUint (bytes : byte[]) : uint32 =
|
||||
let mutable ans = 0u
|
||||
|
||||
for b in bytes do
|
||||
ans <- ans * 256u + uint32 b
|
||||
|
||||
ans
|
||||
|
||||
let private toUint64 (bytes : byte[]) : uint64 =
|
||||
let mutable ans = 0uL
|
||||
|
||||
for b in bytes do
|
||||
ans <- ans * 256uL + uint64 b
|
||||
|
||||
ans
|
||||
|
||||
type private Preamble =
|
||||
/// 20-byte name
|
||||
| BaseObjectName of Hash
|
||||
| Offset of uint64
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
type private ParsedPackObject =
|
||||
| Object of Git.Object
|
||||
| Delta of Preamble * data : byte[]
|
||||
|
||||
/// If this was the last object, i.e. if untilPos was None, returns the 20-byte footer.
|
||||
let private parseObject
|
||||
(untilPos : uint64 option)
|
||||
(expectedCrc : uint32)
|
||||
(s : Stream)
|
||||
: ParsedPackObject * PackObjectMetadata
|
||||
=
|
||||
let startingOffset = s.Position |> uint64
|
||||
let firstByte = s.ReadByte ()
|
||||
|
||||
if firstByte < 0 then
|
||||
failwith "expected to read an object, but file ended"
|
||||
|
||||
let firstByte = byte firstByte
|
||||
|
||||
let objectType =
|
||||
Microsoft.FSharp.Core.LanguagePrimitives.EnumOfValue<byte, PackObjectType> ((firstByte >>> 4) % 8uy)
|
||||
|
||||
let startSize = firstByte % 16uy
|
||||
|
||||
let size, header =
|
||||
if firstByte >= 128uy then
|
||||
let output, bytes = readSizeEncodedInt s
|
||||
(output <<< 4) + (uint64 startSize), [| yield firstByte ; yield! bytes |]
|
||||
else
|
||||
uint64 startSize, [| firstByte |]
|
||||
|
||||
let preamble =
|
||||
match objectType with
|
||||
| PackObjectType.ObjOfsDelta ->
|
||||
let offset, bytes = readDeltaEncodedInt s
|
||||
(Preamble.Offset offset, Some bytes) |> Some
|
||||
| PackObjectType.ObjRefDelta ->
|
||||
let name = Stream.consume s 20 |> Hash.ofBytes
|
||||
|
||||
(Preamble.BaseObjectName name, None) |> Some
|
||||
| _ -> None
|
||||
|
||||
let object =
|
||||
match untilPos with
|
||||
| None ->
|
||||
let finalObjectAndFooter = Stream.consumeToEnd s
|
||||
|
||||
finalObjectAndFooter.[0 .. finalObjectAndFooter.Length - 21]
|
||||
| Some untilPos ->
|
||||
let numToConsume =
|
||||
if untilPos < uint64 s.Position then
|
||||
failwith "Tried to consume into a negative stream offset"
|
||||
|
||||
untilPos - uint64 s.Position
|
||||
|
||||
if numToConsume > uint64 Int32.MaxValue then
|
||||
failwith "Internal error: object too large for this implementation to consume"
|
||||
|
||||
let numToConsume = int numToConsume
|
||||
|
||||
Stream.consume s numToConsume
|
||||
|
||||
// TODO - check CRCs, this is currently failing
|
||||
//let obtainedCrc = Crc32Algorithm.Compute (object)
|
||||
//if obtainedCrc <> expectedCrc then
|
||||
// failwithf "Compressed object had unexpected CRC. Expected: %i. Got: %i" expectedCrc obtainedCrc
|
||||
|
||||
use objectStream = new MemoryStream (object)
|
||||
use s = new InflaterInputStream (objectStream, Inflater ())
|
||||
use resultStream = new MemoryStream ()
|
||||
s.CopyTo resultStream
|
||||
let decompressedObject = resultStream.ToArray ()
|
||||
|
||||
if uint64 decompressedObject.LongLength <> size then
|
||||
failwithf "Object had unexpected length. Expected: %i. Got: %i" size decompressedObject.LongLength
|
||||
|
||||
let packObjectMetadata =
|
||||
{
|
||||
SizeCompressed =
|
||||
let size = uint64 object.LongLength + uint64 header.Length
|
||||
|
||||
match preamble with
|
||||
| None
|
||||
| Some (_, None) -> size
|
||||
| Some (_, Some bytes) -> size + uint64 bytes.LongLength
|
||||
SizeUncompressed = size
|
||||
OffsetInPackFile = startingOffset
|
||||
}
|
||||
|
||||
let toRet =
|
||||
match objectType, preamble with
|
||||
| PackObjectType.ObjBlob, None ->
|
||||
Object.Blob decompressedObject
|
||||
|> ParsedPackObject.Object
|
||||
| PackObjectType.ObjCommit, None ->
|
||||
Commit.decode decompressedObject
|
||||
|> Object.Commit
|
||||
|> ParsedPackObject.Object
|
||||
| PackObjectType.ObjTree, None ->
|
||||
Tree.decode decompressedObject
|
||||
|> Object.Tree
|
||||
|> ParsedPackObject.Object
|
||||
| PackObjectType.ObjOfsDelta, Some (preamble, _) -> ParsedPackObject.Delta (preamble, decompressedObject)
|
||||
| PackObjectType.ObjRefDelta, Some (preamble, _) -> ParsedPackObject.Delta (preamble, decompressedObject)
|
||||
| PackObjectType.ObjTag, None ->
|
||||
Tag.decode decompressedObject
|
||||
|> Object.Tag
|
||||
|> ParsedPackObject.Object
|
||||
| PackObjectType.ObjBlob, Some _
|
||||
| PackObjectType.ObjTag, Some _
|
||||
| PackObjectType.ObjTree, Some _
|
||||
| PackObjectType.ObjCommit, Some _ ->
|
||||
failwith "Logic error in this library, got a preamble for an unexpected object type"
|
||||
| PackObjectType.ObjRefDelta, None
|
||||
| PackObjectType.ObjOfsDelta, None ->
|
||||
failwith "Logic error in this library, got no preamble for a delta type"
|
||||
| _, _ -> failwith "Unexpected object type"
|
||||
|
||||
toRet, packObjectMetadata
|
||||
|
||||
type private HeaderMetadata =
|
||||
{
|
||||
Stream : Stream
|
||||
NumberOfObjects : uint32
|
||||
}
|
||||
|
||||
let private readAndValidateHeader (file : IFileInfo) : HeaderMetadata =
|
||||
let s = file.OpenRead ()
|
||||
let header = Stream.consume s 4
|
||||
|
||||
if header <> [| 80uy ; 65uy ; 67uy ; 75uy |] then
|
||||
// "PACK"
|
||||
failwithf "Invalid pack file header: %+A" header
|
||||
|
||||
let versionBytes = Stream.consume s 4
|
||||
let version = toUint versionBytes
|
||||
|
||||
if version <> 2u then
|
||||
failwithf "Unsupported pack file version %i" version
|
||||
|
||||
let objectNumBytes = Stream.consume s 4
|
||||
let numberOfObjects = toUint objectNumBytes
|
||||
|
||||
{
|
||||
Stream = s
|
||||
NumberOfObjects = numberOfObjects
|
||||
}
|
||||
|
||||
let private resolveDeltas (packs : (Hash * uint64 * (ParsedPackObject * PackObjectMetadata)) array) : PackObject[] =
|
||||
let packsByOffset =
|
||||
packs
|
||||
|> Seq.map (fun (hash, offset, data) -> offset, (hash, data))
|
||||
|> Map.ofSeq
|
||||
|
||||
let packsByHash =
|
||||
packs
|
||||
|> Seq.map (fun (hash, offset, data) -> hash, (offset, data))
|
||||
|> Map.ofSeq
|
||||
|
||||
let rec resolve (object : ParsedPackObject) (name : Hash) (metadata : PackObjectMetadata) : PackObject =
|
||||
match object with
|
||||
| ParsedPackObject.Object o -> PackObject.Object (o, name, metadata)
|
||||
| ParsedPackObject.Delta (deltaType, diff) ->
|
||||
match deltaType with
|
||||
| Preamble.BaseObjectName name ->
|
||||
let _, (derivedObject, derivedMetadata) =
|
||||
match Map.tryFind name packsByHash with
|
||||
| Some x -> x
|
||||
| None -> failwithf "Could not find object %s in pack file" (Hash.toString name)
|
||||
|
||||
(resolve derivedObject name derivedMetadata, diff, name, metadata)
|
||||
|> PackObject.Delta
|
||||
| Preamble.Offset offset ->
|
||||
let absolutePosition =
|
||||
if metadata.OffsetInPackFile < offset then
|
||||
failwith "tried to offset into a negative number"
|
||||
|
||||
metadata.OffsetInPackFile - offset
|
||||
|
||||
let derivedName, (derivedObject, derivedMetadata) =
|
||||
match Map.tryFind absolutePosition packsByOffset with
|
||||
| None ->
|
||||
failwithf
|
||||
"Unable to find object %s at absolute position %i (calculated as offset %i from position %i)"
|
||||
(Hash.toString name)
|
||||
absolutePosition
|
||||
offset
|
||||
metadata.OffsetInPackFile
|
||||
| Some (a, b) -> a, b
|
||||
|
||||
(resolve derivedObject derivedName derivedMetadata, diff, name, metadata)
|
||||
|> PackObject.Delta
|
||||
|
||||
packs
|
||||
|> Array.map (fun (name, _, (parsed, metadata)) -> resolve parsed name metadata)
|
||||
|
||||
let readAll (file : IFileInfo) (index : PackIndex) =
|
||||
let header = readAndValidateHeader file
|
||||
|
||||
use stream = header.Stream
|
||||
|
||||
let sortedObjectPositions = index.Offsets |> Array.sort
|
||||
|
||||
let packs =
|
||||
Array.zip3 index.ObjectChecksums index.Names index.Offsets
|
||||
|> Array.map (fun (crc, name, offset) ->
|
||||
let nextObjectIndex =
|
||||
// TODO probably binary search this, or maintain an incrementing
|
||||
// counter
|
||||
sortedObjectPositions
|
||||
|> Array.tryFindIndex (fun pos -> pos > offset)
|
||||
|
||||
// Account for the case where the index file contains garbage
|
||||
let startingIndex =
|
||||
match nextObjectIndex with
|
||||
| None -> sortedObjectPositions.[sortedObjectPositions.Length - 1]
|
||||
| Some 0 -> uint64 stream.Position
|
||||
| Some i -> sortedObjectPositions.[i - 1]
|
||||
|
||||
stream.Seek (int64 startingIndex, SeekOrigin.Begin)
|
||||
|> ignore
|
||||
|
||||
let nextObjectPosition =
|
||||
nextObjectIndex
|
||||
|> Option.map (fun i -> sortedObjectPositions.[i])
|
||||
|
||||
Hash.ofBytes name, startingIndex, parseObject nextObjectPosition crc stream
|
||||
)
|
||||
|
||||
resolveDeltas packs
|
||||
|
||||
type private Compare =
|
||||
| Less
|
||||
| Greater
|
||||
| Equal
|
||||
|
||||
let private binarySearch<'chop, 'a>
|
||||
(get : 'chop -> 'a)
|
||||
(compare : 'a -> 'a -> Compare)
|
||||
(mean : 'chop -> 'chop -> 'chop)
|
||||
(needle : 'a)
|
||||
: 'chop -> 'chop -> 'chop option
|
||||
=
|
||||
let rec go (startPoint : 'chop) (endPoint : 'chop) =
|
||||
let start = get startPoint
|
||||
|
||||
match compare start (get endPoint) with
|
||||
| Compare.Greater -> None
|
||||
| Compare.Equal ->
|
||||
if compare start needle = Compare.Equal then
|
||||
Some startPoint
|
||||
else
|
||||
None
|
||||
| Compare.Less ->
|
||||
|
||||
let mean = mean startPoint endPoint
|
||||
|
||||
match compare (get mean) needle with
|
||||
| Compare.Equal -> Some mean
|
||||
| Compare.Greater -> go startPoint mean
|
||||
| Compare.Less -> go mean endPoint
|
||||
|
||||
go
|
||||
|
||||
let private consumeOffset (s : Stream) : PackIndexOffset =
|
||||
let firstByte = s.ReadByte ()
|
||||
|
||||
if firstByte < 0 then
|
||||
failwith "expected to read an offset, but got end of file"
|
||||
|
||||
let firstByte = byte firstByte
|
||||
let remainingBytes = Stream.consume s 3
|
||||
|
||||
if firstByte >= 128uy then
|
||||
toUint remainingBytes
|
||||
+ ((uint32 (firstByte % 128uy)) <<< 24)
|
||||
|> PackIndexOffset.LayerFiveEntry
|
||||
else
|
||||
toUint remainingBytes
|
||||
+ ((uint32 firstByte) <<< 24)
|
||||
|> PackIndexOffset.RawOffset
|
||||
|
||||
let readIndex (file : IFileInfo) : PackIndex =
|
||||
use s = file.OpenRead ()
|
||||
let header = Stream.consume s 4
|
||||
|
||||
if header <> [| 255uy ; 116uy ; 79uy ; 99uy |] then
|
||||
failwithf "Invalid pack file header, may indicate unsupported version: %+A" header
|
||||
|
||||
let versionBytes = Stream.consume s 4
|
||||
let version = toUint versionBytes
|
||||
|
||||
if version <> 2u then
|
||||
failwithf "Unsupported pack index version %i" version
|
||||
|
||||
// Explanation from https://codewords.recurse.com/issues/three/unpacking-git-packfiles:
|
||||
// the N-th entry of this table records the number of objects in the corresponding pack,
|
||||
// the first byte of whose object name is less than or equal to N.
|
||||
// For example, if the first entry has value 4, then we eventually expect to find four objects in the
|
||||
// pack whose 20-byte object name (derived from SHA hashes) starts with 00.
|
||||
// Then if the second entry has value 4, we eventually expect to find no objects starting 01, because
|
||||
// the count is cumulative.
|
||||
let countsOfEachName =
|
||||
let arr = Array.zeroCreate<uint32> 256
|
||||
|
||||
for i in 0..255 do
|
||||
arr.[i] <- Stream.consume s 4 |> toUint
|
||||
|
||||
arr
|
||||
|
||||
let totalObjectNumber = Array.last countsOfEachName
|
||||
|
||||
if int64 totalObjectNumber > int64 Int32.MaxValue then
|
||||
failwithf
|
||||
"Internal error: we don't yet support pack files with more than %i entries (%s)"
|
||||
Int32.MaxValue
|
||||
file.FullName
|
||||
|
||||
let totalObjectNumber = int totalObjectNumber
|
||||
|
||||
let objectNames =
|
||||
fun _ -> Stream.consume s 20
|
||||
|> Array.init totalObjectNumber
|
||||
|
||||
let crc =
|
||||
fun _ -> Stream.consume s 4 |> toUint
|
||||
|> Array.init totalObjectNumber
|
||||
|
||||
let offsets =
|
||||
fun _ -> consumeOffset s
|
||||
|> Array.init totalObjectNumber
|
||||
|
||||
let bytesConsumedSoFar =
|
||||
// 4 for the header, 4 for the version, 256 * 4 for the counts, 20 * (that) for the names, 4 * (that)
|
||||
// for the crc, 4 * (that) for the small offsets.
|
||||
4
|
||||
+ 4
|
||||
+ 256 * 4
|
||||
+ 20 * totalObjectNumber
|
||||
+ 4 * totalObjectNumber
|
||||
+ 4 * totalObjectNumber
|
||||
|> uint64
|
||||
|
||||
// Fortuitously, 20 + 20 is the size of the trailer, and that's divisible by 8,
|
||||
// so we can just read in all remaining 8-byte chunks.
|
||||
let buffer = Array.zeroCreate<byte> 8
|
||||
let entries = ResizeArray<byte[]> ()
|
||||
|
||||
while s.Read (buffer, 0, 8) = 8 do
|
||||
entries.Add (buffer.Clone () |> unbox<byte[]>)
|
||||
|
||||
let checksumPackFile = Array.zeroCreate<byte> 20
|
||||
Array.Copy (entries.[entries.Count - 5], 0, checksumPackFile, 0, 8)
|
||||
Array.Copy (entries.[entries.Count - 4], 0, checksumPackFile, 8, 8)
|
||||
Array.Copy (entries.[entries.Count - 3], 0, checksumPackFile, 16, 4)
|
||||
let checksumIndexFile = Array.zeroCreate<byte> 20
|
||||
Array.Copy (entries.[entries.Count - 3], 4, checksumIndexFile, 0, 4)
|
||||
Array.Copy (entries.[entries.Count - 2], 0, checksumIndexFile, 4, 8)
|
||||
Array.Copy (entries.[entries.Count - 1], 0, checksumIndexFile, 12, 8)
|
||||
|
||||
// TODO: validate the index file checksum
|
||||
|
||||
let longEntries =
|
||||
fun i ->
|
||||
let rawOffset = entries.[i] |> toUint64
|
||||
rawOffset - bytesConsumedSoFar
|
||||
|> Array.init (entries.Count - 5)
|
||||
|
||||
let offsets =
|
||||
offsets
|
||||
|> Array.map (fun offset ->
|
||||
match offset with
|
||||
| PackIndexOffset.LayerFiveEntry i -> longEntries.[int i]
|
||||
| PackIndexOffset.RawOffset i -> uint64 i
|
||||
)
|
||||
|
||||
{
|
||||
Offsets = offsets
|
||||
Names = objectNames
|
||||
Checksum = checksumPackFile
|
||||
ObjectChecksums = crc
|
||||
}
|
||||
|
||||
/// The streams are expected to be seekable and readable.
|
||||
/// The packFile stream is expected to have a Length.
|
||||
let rec private locateObjectInStream
|
||||
(Hash object as hash)
|
||||
(packIndex : Stream)
|
||||
(packFile : Stream)
|
||||
: PackObject option
|
||||
=
|
||||
let header = Stream.consume packIndex 4
|
||||
|
||||
if header <> [| 255uy ; 116uy ; 79uy ; 99uy |] then
|
||||
failwithf "Invalid pack file header, may indicate unsupported version: %+A" header
|
||||
|
||||
let versionBytes = Stream.consume packIndex 4
|
||||
let version = toUint versionBytes
|
||||
|
||||
if version <> 2u then
|
||||
failwithf "Unsupported pack index version %i" version
|
||||
|
||||
let nameLookup = object.[0]
|
||||
|
||||
let startAtThisPrefix, endOfThisPrefix =
|
||||
if nameLookup = 0uy then
|
||||
0L, Stream.consume packIndex 4 |> toUint |> int64
|
||||
else
|
||||
packIndex.Seek ((int64 (nameLookup - 1uy)) * 4L, SeekOrigin.Current)
|
||||
|> ignore
|
||||
|
||||
let before = Stream.consume packIndex 4 |> toUint |> int64
|
||||
let after = Stream.consume packIndex 4 |> toUint |> int64
|
||||
before, after
|
||||
|
||||
let totalCount =
|
||||
packIndex.Seek (4L + 4L + 255L * 4L, SeekOrigin.Begin)
|
||||
|> ignore
|
||||
|
||||
Stream.consume packIndex 4 |> toUint |> int64
|
||||
|
||||
let comparisonMemo = System.Collections.Generic.Dictionary ()
|
||||
|
||||
let lookup (location : int64) : byte[] =
|
||||
match comparisonMemo.TryGetValue location with
|
||||
| true, v -> v
|
||||
| false, _ ->
|
||||
|
||||
packIndex.Seek (4L + 4L + 256L * 4L + location * 20L, SeekOrigin.Begin)
|
||||
|> ignore
|
||||
|
||||
let number = Stream.consume packIndex 20
|
||||
comparisonMemo.[location] <- number
|
||||
number
|
||||
|
||||
let compare (name1 : byte[]) (name2 : byte[]) : Compare =
|
||||
let rec go (i : int) =
|
||||
if i >= 20 then Compare.Equal
|
||||
else if name1.[i] < name2.[i] then Compare.Less
|
||||
elif name1.[i] > name2.[i] then Compare.Greater
|
||||
else go (i + 1)
|
||||
|
||||
go 0
|
||||
|
||||
let location =
|
||||
binarySearch lookup compare (fun x y -> (x + y) / 2L) object startAtThisPrefix endOfThisPrefix
|
||||
|
||||
match location with
|
||||
| None -> None
|
||||
| Some location ->
|
||||
|
||||
packIndex.Seek (
|
||||
4L
|
||||
+ 4L
|
||||
+ 256L * 4L
|
||||
+ totalCount * 24L
|
||||
+ location * 4L,
|
||||
SeekOrigin.Begin
|
||||
)
|
||||
|> ignore
|
||||
|
||||
let index = consumeOffset packIndex
|
||||
|
||||
let index =
|
||||
match index with
|
||||
| PackIndexOffset.RawOffset i -> int64 i
|
||||
| PackIndexOffset.LayerFiveEntry entry ->
|
||||
packIndex.Seek (
|
||||
4L
|
||||
+ 4L
|
||||
+ 256L * 4L
|
||||
+ totalCount * 28L
|
||||
+ (int64 entry) * 8L,
|
||||
SeekOrigin.Begin
|
||||
)
|
||||
|> ignore
|
||||
|
||||
Stream.consume packIndex 8 |> toUint64 |> int64
|
||||
|
||||
packFile.Seek (index, SeekOrigin.Begin) |> ignore
|
||||
|
||||
let object, metadata =
|
||||
// TODO constrain where we're reading to, and find the CRC
|
||||
parseObject (Some (uint64 packFile.Length)) 0u packFile
|
||||
|
||||
match object with
|
||||
| ParsedPackObject.Object o -> PackObject.Object (o, hash, metadata) |> Some
|
||||
| ParsedPackObject.Delta (preamble, data) ->
|
||||
|
||||
match preamble with
|
||||
| Preamble.BaseObjectName name ->
|
||||
let subObject = locateObjectInStream name packIndex packFile
|
||||
|
||||
match subObject with
|
||||
| None -> failwithf "Failed to find sub-object with name %s" (Hash.toString name)
|
||||
| Some subObject ->
|
||||
(subObject, data, hash, metadata)
|
||||
|> PackObject.Delta
|
||||
|> Some
|
||||
| Preamble.Offset offset ->
|
||||
(failwith "", data, hash, metadata)
|
||||
|> PackObject.Delta
|
||||
|> Some
|
||||
|
||||
let locateObject (h : Hash) (packIndex : IFileInfo) (packFile : IFileInfo) : PackObject option =
|
||||
use index = packIndex.OpenRead ()
|
||||
use file = packFile.OpenRead ()
|
||||
locateObjectInStream h index file
|
103
Git/Parse.fs
103
Git/Parse.fs
@@ -1,3 +1,106 @@
|
||||
namespace Git.Internals
|
||||
|
||||
open Git
|
||||
|
||||
open System.IO
|
||||
open System.Text
|
||||
open Microsoft.FSharp.Data.UnitSystems.SI.UnitNames
|
||||
|
||||
type OneOf = OneOf of string list
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module Parse =
|
||||
|
||||
let private parseInt (chars : byte array) =
|
||||
let rec acc (i : int) (soFar : int) =
|
||||
if i = chars.Length then
|
||||
soFar
|
||||
else if byte '0' <= chars.[i] && chars.[i] <= byte '9' then
|
||||
acc (i + 1) (10 * soFar + int (chars.[i] - byte '0'))
|
||||
else
|
||||
failwithf "non-digit character '%i' ('%c') at index %i" chars.[i] (char chars.[i]) i
|
||||
|
||||
acc 0 0
|
||||
|
||||
let consumeWord (context : string) (OneOf expecting) (s : Stream) =
|
||||
let word = Stream.consumeTo s 32uy
|
||||
|
||||
match word with
|
||||
| None ->
|
||||
failwithf
|
||||
"Expected a word '%s' in a %s object, but stream ran out"
|
||||
context
|
||||
(expecting |> String.concat "//")
|
||||
| Some word ->
|
||||
|
||||
let word = word |> Encoding.UTF8.GetString
|
||||
|
||||
if not <| List.contains word expecting then
|
||||
failwithf "Expected a word '%s' in a %s object, but got '%s'" (expecting |> String.concat "//") context word
|
||||
|
||||
word
|
||||
|
||||
let consumeHash (context : string) (s : Stream) =
|
||||
let hash = Stream.consumeTo s 10uy
|
||||
|
||||
match hash with
|
||||
| None -> failwithf "Stream ended before we could read hash in context '%s'." context
|
||||
| Some hash -> hash |> Hash.ofSpelling
|
||||
|
||||
let consumeLabelledHash (context : string) (expecting : OneOf) (s : Stream) : string * Hash =
|
||||
let w = consumeWord context expecting s
|
||||
let h = consumeHash (sprintf "%s: %s" context w) s
|
||||
w, h
|
||||
|
||||
let consumePerson (id : string) (s : Stream) =
|
||||
let name =
|
||||
Stream.consumeTo s (byte '<')
|
||||
|> Option.map Encoding.UTF8.GetString
|
||||
|
||||
match name with
|
||||
| None -> failwithf "No %s name present in object." id
|
||||
| Some name ->
|
||||
|
||||
if name.[name.Length - 1] <> ' ' then
|
||||
failwithf "Name of %s '%s' unexpectedly fails to end with a space" id name
|
||||
|
||||
let name = name.Substring (0, name.Length - 1)
|
||||
|
||||
let email =
|
||||
Stream.consumeTo s (byte '>')
|
||||
|> Option.map Encoding.UTF8.GetString
|
||||
|
||||
match email with
|
||||
| None -> failwithf "No %s email present in object." id
|
||||
| Some email ->
|
||||
|
||||
let space = Stream.consumeTo s 32uy
|
||||
|
||||
match space with
|
||||
| None -> failwithf "Object ended after %s email" id
|
||||
| Some s ->
|
||||
if s.Length <> 0 then
|
||||
failwithf "Expected a space immediately after %s email, got '%s'" id (Encoding.UTF32.GetString s)
|
||||
|
||||
let timestamp = Stream.consumeTo s 32uy
|
||||
|
||||
match timestamp with
|
||||
| None -> failwithf "Commit object ended before %s timestamp" id
|
||||
| Some timestamp ->
|
||||
|
||||
let timestamp = parseInt timestamp * 1<second>
|
||||
|
||||
let offset =
|
||||
Stream.consumeTo s 10uy
|
||||
|> Option.map Encoding.UTF8.GetString
|
||||
|
||||
match offset with
|
||||
| None -> failwithf "Commit object ended before %s timezone" id
|
||||
| Some offset ->
|
||||
|
||||
{
|
||||
Name = name
|
||||
Email = email
|
||||
Date = timestamp
|
||||
DateTimezone = offset
|
||||
}
|
||||
|
@@ -50,7 +50,7 @@ module internal Stream =
|
||||
|
||||
output
|
||||
|
||||
let consumeToEnd (b : MemoryStream) : byte array =
|
||||
let consumeToEnd (b : Stream) : byte array =
|
||||
use newMs = new MemoryStream ()
|
||||
b.CopyTo newMs
|
||||
newMs.ToArray ()
|
||||
|
88
Git/Tag.fs
Normal file
88
Git/Tag.fs
Normal file
@@ -0,0 +1,88 @@
|
||||
namespace Git
|
||||
|
||||
open System.IO
|
||||
open System.Text
|
||||
open Git.Internals
|
||||
|
||||
type TaggedObjectType =
|
||||
| Commit
|
||||
|
||||
override this.ToString () =
|
||||
match this with
|
||||
| TaggedObjectType.Commit -> "commit"
|
||||
|
||||
static member Parse (s : string) =
|
||||
match s with
|
||||
| "commit" -> TaggedObjectType.Commit
|
||||
| _ -> failwithf "Unrecognised tagged object type: %s" s
|
||||
|
||||
type TagEntry =
|
||||
{
|
||||
Object : Hash
|
||||
Type : TaggedObjectType
|
||||
Name : string
|
||||
Tagger : Contributor
|
||||
Message : string
|
||||
}
|
||||
|
||||
override this.ToString () =
|
||||
sprintf "object %O\ntype %O\ntag %s\ntagger %O\n\n%s" this.Object this.Type this.Name this.Tagger this.Message
|
||||
|
||||
|
||||
[<RequireQualifiedAccess>]
|
||||
module Tag =
|
||||
let encode (entry : TagEntry) : byte array =
|
||||
entry.ToString () |> Encoding.UTF8.GetBytes
|
||||
|
||||
let decode (file : byte array) : TagEntry =
|
||||
use ms = new MemoryStream (file)
|
||||
|
||||
let objectHash =
|
||||
Parse.consumeWord "tag" (OneOf [ "object" ]) ms
|
||||
|> ignore
|
||||
|
||||
match Stream.consumeTo ms (byte '\n') with
|
||||
| None -> failwith "Tag object should have had a newline in"
|
||||
| Some h -> h |> Hash.ofSpelling
|
||||
|
||||
let typeReferredTo =
|
||||
Parse.consumeWord "tag" (OneOf [ "type" ]) ms
|
||||
|> ignore
|
||||
|
||||
match Stream.consumeTo ms (byte '\n') with
|
||||
| None -> failwith "Tag type should have had a newline in"
|
||||
| Some h ->
|
||||
h
|
||||
|> Encoding.ASCII.GetString
|
||||
|> TaggedObjectType.Parse
|
||||
|
||||
let tagName =
|
||||
Parse.consumeWord "tag" (OneOf [ "tag" ]) ms
|
||||
|> ignore
|
||||
|
||||
match Stream.consumeTo ms (byte '\n') with
|
||||
| None -> failwith "Tag name should have had a newline in"
|
||||
| Some t -> t |> Encoding.ASCII.GetString
|
||||
|
||||
let tagger =
|
||||
Parse.consumeWord "tag" (OneOf [ "tagger" ]) ms
|
||||
|> ignore
|
||||
|
||||
Parse.consumePerson "tagger" ms
|
||||
|
||||
let trailingNewline = Stream.consumeTo ms 10uy
|
||||
|
||||
match trailingNewline with
|
||||
| None -> failwith "Tag unexpectedly ended before message"
|
||||
| Some [||] -> ()
|
||||
| Some arr -> failwithf "Unexpectedly received data in between tagger and message: %+A" arr
|
||||
|
||||
let message = Stream.consumeToEnd ms |> Encoding.UTF8.GetString
|
||||
|
||||
{
|
||||
Object = objectHash
|
||||
Type = typeReferredTo
|
||||
Name = tagName
|
||||
Tagger = tagger
|
||||
Message = message
|
||||
}
|
Reference in New Issue
Block a user