Speed up day 24 (#40)

This commit is contained in:
Patrick Stevens
2022-12-24 14:40:37 +00:00
committed by GitHub
parent 19fa3a96b0
commit 1a8ae7a362
4 changed files with 95 additions and 128 deletions

View File

@@ -1,6 +1,7 @@
namespace AdventOfCode2022
open System
open Microsoft.FSharp.NativeInterop
#if DEBUG
open Checked
@@ -47,76 +48,35 @@ module Day24 =
output.ToArray (), width, y
let moveBlizzards (width : int) (height : int) (board : Day24Board) : Day24Board =
#if DEBUG
let board =
{
Elements = board
Width = width
}
#else
use boardPtr = fixed board
let board =
{
Elements = boardPtr
Width = width
Length = width * height
}
#endif
let resultArr = Array.zeroCreate<byte> (width * height)
#if DEBUG
let result =
{
Elements = resultArr
Width = width
}
#else
use ptr = fixed resultArr
let result =
{
Elements = ptr
Width = width
Length = resultArr.Length
}
#endif
let moveBlizzards (width : int) (height : int) (board : Arr2D<Byte>) : unit =
for y = 1 to height - 2 do
for x = 1 to width - 2 do
let directions = Arr2D.get board x y
if directions % 2uy = 1uy then
let y = if y = 1 then height - 2 else y - 1
let prev = Arr2D.get result x y
Arr2D.set result x y (prev + 1uy)
let prev = Arr2D.get board x y
Arr2D.set board x y (prev + 16uy)
if (directions / 2uy) % 2uy = 1uy then
let y = if y = height - 2 then 1 else y + 1
let prev = Arr2D.get result x y
Arr2D.set result x y (prev + 2uy)
let prev = Arr2D.get board x y
Arr2D.set board x y (prev + 32uy)
if (directions / 4uy) % 2uy = 1uy then
let x = if x = 1 then width - 2 else x - 1
let prev = Arr2D.get result x y
Arr2D.set result x y (prev + 4uy)
let prev = Arr2D.get board x y
Arr2D.set board x y (prev + 64uy)
if (directions / 8uy) % 2uy = 1uy then
let x = if x = width - 2 then 1 else x + 1
let prev = Arr2D.get result x y
Arr2D.set result x y (prev + 8uy)
let prev = Arr2D.get board x y
Arr2D.set board x y (prev + 128uy)
resultArr
let boardAtTimeInner (store : Day24Board ResizeArray) (width : int) (height : int) (day : int) =
if store.Count > day then
store.[day]
else
for i = store.Count to day do
store.Add (moveBlizzards width height store.[i - 1])
store.[day]
for y = 1 to height - 2 do
for x = 1 to width - 2 do
let prev = Arr2D.get board x y
Arr2D.set board x y (prev >>> 4)
let inline coordToInt' (width : int) (x : int) (y : int) : int = x + y * width
let inline coordToInt (width : int) (coord : Coordinate) : int = coordToInt' width coord.X coord.Y
@@ -130,7 +90,7 @@ module Day24 =
}
/// The buffer is an array of at least 5 Coordinates, except it's had coordToInt called on it.
let availableIndividualMoves
let inline populateAvailableMoves
(buffer : int[])
(width : int)
(height : int)
@@ -167,42 +127,12 @@ module Day24 =
bufLen
/// The buffer is an array of at least 5 Coordinates, except it's had coordToInt called on it.
let inline populateAvailableMoves
(width : int)
(height : int)
(boardsStore : Day24Board ResizeArray)
(buffer : int[])
(timeStep : int)
(currPos : Coordinate)
: int
=
let board = boardAtTimeInner boardsStore width height (timeStep + 1)
#if DEBUG
let board =
{
Elements = board
Width = width
}
#else
use ptr = fixed board
let board =
{
Elements = ptr
Width = width
Length = width * height
}
#endif
availableIndividualMoves buffer width height currPos board
let inline goFrom
(start : Coordinate)
(dest : Coordinate)
(width : int)
([<InlineIfLambda>] populateAvailableMoves : int[] -> int -> Coordinate -> int)
(height : int)
(board : Arr2D<byte>)
(timeStep : int)
=
let mutable buffer = ResizeArray ()
@@ -211,21 +141,34 @@ module Day24 =
let dest = coordToInt width dest
let rec go (timeStep : int) (toExplore : int ResizeArray) =
if toExplore.Contains dest then
moveBlizzards width height board
if toExplore.BinarySearch dest >= 0 then
timeStep + 1
else
buffer.Clear ()
for currPos in toExplore do
let bufLen = populateAvailableMoves movesBuffer timeStep (intToCoord width currPos)
do
let bufLen =
populateAvailableMoves movesBuffer width height (intToCoord width toExplore.[0]) board
for move = 0 to bufLen - 1 do
let move = movesBuffer.[move]
buffer.Add move
if not (buffer.Contains move) then
for currPosIndex = 1 to toExplore.Count - 1 do
let currPos = toExplore.[currPosIndex]
if toExplore.[currPosIndex - 1] <> currPos then
let bufLen =
populateAvailableMoves movesBuffer width height (intToCoord width currPos) board
for move = 0 to bufLen - 1 do
let move = movesBuffer.[move]
buffer.Add move
buffer.Sort ()
let continueWith = buffer
buffer <- toExplore
@@ -248,6 +191,7 @@ module Day24 =
Y = height - 2
}
width
height
let goToStart width height =
goFrom
@@ -260,25 +204,48 @@ module Day24 =
Y = 1
}
width
height
let part1 (lines : StringSplitEnumerator) : int =
let board, width, height = parse lines
#if DEBUG
let board =
{
Elements = board
Width = width
}
#else
use ptr = fixed board
let store = ResizeArray ()
store.Add board
let board =
{
Elements = ptr
Width = width
Length = width * height
}
#endif
let availableMoves = populateAvailableMoves width height store
goToEnd width height availableMoves 0
goToEnd width height board 0
let part2 (lines : StringSplitEnumerator) : int =
let board, width, height = parse lines
#if DEBUG
let board =
{
Elements = board
Width = width
}
#else
use ptr = fixed board
let store = ResizeArray ()
store.Add board
let board =
{
Elements = ptr
Width = width
Length = width * height
}
#endif
let availableMoves = populateAvailableMoves width height store
let toEnd = goToEnd width height availableMoves 0
let backToStart = goToStart width height availableMoves toEnd
goToEnd width height availableMoves backToStart
let toEnd = goToEnd width height board 0
let backToStart = goToStart width height board toEnd
goToEnd width height board backToStart

View File

@@ -10,11 +10,11 @@ Apple M1 Max, 1 CPU, 10 logical and 10 physical cores
```
| Method | Day | IsPartOne | Mean | Error | StdDev |
|---------- |---- |---------- |-------------:|------------:|------------:|
| **Benchmark** | **21** | **False** | **640.9 μs** | **1.31 μs** | **1.23 μs** |
| **Benchmark** | **21** | **True** | **579.0 μs** | **8.23 μs** | **7.70 μs** |
| **Benchmark** | **22** | **False** | **326.6 μs** | **2.11 μs** | **1.97 μs** |
| **Benchmark** | **22** | **True** | **217.7 μs** | **1.04 μs** | **0.97 μs** |
| **Benchmark** | **23** | **False** | **318,993.7 μs** | **4,929.31 μs** | **4,610.88 μs** |
| **Benchmark** | **23** | **True** | **2,714.7 μs** | **4.32 μs** | **3.61 μs** |
| **Benchmark** | **24** | **False** | **65,379.9 μs** | **159.24 μs** | **124.32 μs** |
| **Benchmark** | **24** | **True** | **21,290.9 μs** | **34.03 μs** | **28.41 μs** |
| **Benchmark** | **21** | **False** | **641.1 μs** | **1.80 μs** | **1.69 μs** |
| **Benchmark** | **21** | **True** | **580.3 μs** | **6.82 μs** | **6.05 μs** |
| **Benchmark** | **22** | **False** | **323.9 μs** | **0.66 μs** | **0.52 μs** |
| **Benchmark** | **22** | **True** | **218.4 μs** | **4.35 μs** | **4.28 μs** |
| **Benchmark** | **23** | **False** | **318,156.3 μs** | **4,910.78 μs** | **4,593.55 μs** |
| **Benchmark** | **23** | **True** | **2,684.9 μs** | **5.12 μs** | **4.27 μs** |
| **Benchmark** | **24** | **False** | **47,410.3 μs** | **63.64 μs** | **56.42 μs** |
| **Benchmark** | **24** | **True** | **15,613.9 μs** | **81.16 μs** | **71.95 μs** |

View File

@@ -1,9 +1,9 @@
Method,Job,AnalyzeLaunchVariance,EvaluateOverhead,MaxAbsoluteError,MaxRelativeError,MinInvokeCount,MinIterationTime,OutlierMode,Affinity,EnvironmentVariables,Jit,Platform,PowerPlanMode,Runtime,AllowVeryLargeObjects,Concurrent,CpuGroups,Force,HeapAffinitizeMask,HeapCount,NoAffinitize,RetainVm,Server,Arguments,BuildConfiguration,Clock,EngineFactory,NuGetReferences,Toolchain,IsMutator,InvocationCount,IterationCount,IterationTime,LaunchCount,MaxIterationCount,MaxWarmupIterationCount,MemoryRandomization,MinIterationCount,MinWarmupIterationCount,RunStrategy,UnrollFactor,WarmupCount,Day,IsPartOne,Mean,Error,StdDev
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,21,False,640.9 μs,1.31 μs,1.23 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,21,True,579.0 μs,8.23 μs,7.70 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,22,False,326.6 μs,2.11 μs,1.97 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,22,True,217.7 μs,1.04 μs,0.97 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,23,False,"318,993.7 μs","4,929.31 μs","4,610.88 μs"
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,23,True,"2,714.7 μs",4.32 μs,3.61 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,24,False,"65,379.9 μs",159.24 μs,124.32 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,24,True,"21,290.9 μs",34.03 μs,28.41 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,21,False,641.1 μs,1.80 μs,1.69 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,21,True,580.3 μs,6.82 μs,6.05 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,22,False,323.9 μs,0.66 μs,0.52 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,22,True,218.4 μs,4.35 μs,4.28 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,23,False,"318,156.3 μs","4,910.78 μs","4,593.55 μs"
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,23,True,"2,684.9 μs",5.12 μs,4.27 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,24,False,"47,410.3 μs",63.64 μs,56.42 μs
Benchmark,DefaultJob,False,Default,Default,Default,Default,Default,Default,0000000000,Empty,RyuJit,Arm64,8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c,.NET 7.0,False,True,False,True,Default,Default,False,False,False,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,Default,16,Default,24,True,"15,613.9 μs",81.16 μs,71.95 μs
1 Method Job AnalyzeLaunchVariance EvaluateOverhead MaxAbsoluteError MaxRelativeError MinInvokeCount MinIterationTime OutlierMode Affinity EnvironmentVariables Jit Platform PowerPlanMode Runtime AllowVeryLargeObjects Concurrent CpuGroups Force HeapAffinitizeMask HeapCount NoAffinitize RetainVm Server Arguments BuildConfiguration Clock EngineFactory NuGetReferences Toolchain IsMutator InvocationCount IterationCount IterationTime LaunchCount MaxIterationCount MaxWarmupIterationCount MemoryRandomization MinIterationCount MinWarmupIterationCount RunStrategy UnrollFactor WarmupCount Day IsPartOne Mean Error StdDev
2 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 21 False 640.9 μs 641.1 μs 1.31 μs 1.80 μs 1.23 μs 1.69 μs
3 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 21 True 579.0 μs 580.3 μs 8.23 μs 6.82 μs 7.70 μs 6.05 μs
4 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 22 False 326.6 μs 323.9 μs 2.11 μs 0.66 μs 1.97 μs 0.52 μs
5 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 22 True 217.7 μs 218.4 μs 1.04 μs 4.35 μs 0.97 μs 4.28 μs
6 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 23 False 318,993.7 μs 318,156.3 μs 4,929.31 μs 4,910.78 μs 4,610.88 μs 4,593.55 μs
7 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 23 True 2,714.7 μs 2,684.9 μs 4.32 μs 5.12 μs 3.61 μs 4.27 μs
8 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 24 False 65,379.9 μs 47,410.3 μs 159.24 μs 63.64 μs 124.32 μs 56.42 μs
9 Benchmark DefaultJob False Default Default Default Default Default Default 0000000000 Empty RyuJit Arm64 8c5e7fda-e8bf-4a96-9a85-a6e23a8c635c .NET 7.0 False True False True Default Default False False False Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default Default 16 Default 24 True 21,290.9 μs 15,613.9 μs 34.03 μs 81.16 μs 28.41 μs 71.95 μs

View File

@@ -2,7 +2,7 @@
<html lang='en'>
<head>
<meta charset='utf-8' />
<title>AdventOfCode2022.App.Benchmark21To25-20221224-133430</title>
<title>AdventOfCode2022.App.Benchmark21To25-20221224-141907</title>
<style type="text/css">
table { border-collapse: collapse; display: block; width: 100%; overflow: auto; }
@@ -24,14 +24,14 @@ Apple M1 Max, 1 CPU, 10 logical and 10 physical cores
<table>
<thead><tr><th>Method</th><th>Day</th><th>IsPartOne</th><th> Mean</th><th>Error</th><th>StdDev</th>
</tr>
</thead><tbody><tr><td>Benchmark</td><td>21</td><td>False</td><td>640.9 &mu;s</td><td>1.31 &mu;s</td><td>1.23 &mu;s</td>
</tr><tr><td>Benchmark</td><td>21</td><td>True</td><td>579.0 &mu;s</td><td>8.23 &mu;s</td><td>7.70 &mu;s</td>
</tr><tr><td>Benchmark</td><td>22</td><td>False</td><td>326.6 &mu;s</td><td>2.11 &mu;s</td><td>1.97 &mu;s</td>
</tr><tr><td>Benchmark</td><td>22</td><td>True</td><td>217.7 &mu;s</td><td>1.04 &mu;s</td><td>0.97 &mu;s</td>
</tr><tr><td>Benchmark</td><td>23</td><td>False</td><td>318,993.7 &mu;s</td><td>4,929.31 &mu;s</td><td>4,610.88 &mu;s</td>
</tr><tr><td>Benchmark</td><td>23</td><td>True</td><td>2,714.7 &mu;s</td><td>4.32 &mu;s</td><td>3.61 &mu;s</td>
</tr><tr><td>Benchmark</td><td>24</td><td>False</td><td>65,379.9 &mu;s</td><td>159.24 &mu;s</td><td>124.32 &mu;s</td>
</tr><tr><td>Benchmark</td><td>24</td><td>True</td><td>21,290.9 &mu;s</td><td>34.03 &mu;s</td><td>28.41 &mu;s</td>
</thead><tbody><tr><td>Benchmark</td><td>21</td><td>False</td><td>641.1 &mu;s</td><td>1.80 &mu;s</td><td>1.69 &mu;s</td>
</tr><tr><td>Benchmark</td><td>21</td><td>True</td><td>580.3 &mu;s</td><td>6.82 &mu;s</td><td>6.05 &mu;s</td>
</tr><tr><td>Benchmark</td><td>22</td><td>False</td><td>323.9 &mu;s</td><td>0.66 &mu;s</td><td>0.52 &mu;s</td>
</tr><tr><td>Benchmark</td><td>22</td><td>True</td><td>218.4 &mu;s</td><td>4.35 &mu;s</td><td>4.28 &mu;s</td>
</tr><tr><td>Benchmark</td><td>23</td><td>False</td><td>318,156.3 &mu;s</td><td>4,910.78 &mu;s</td><td>4,593.55 &mu;s</td>
</tr><tr><td>Benchmark</td><td>23</td><td>True</td><td>2,684.9 &mu;s</td><td>5.12 &mu;s</td><td>4.27 &mu;s</td>
</tr><tr><td>Benchmark</td><td>24</td><td>False</td><td>47,410.3 &mu;s</td><td>63.64 &mu;s</td><td>56.42 &mu;s</td>
</tr><tr><td>Benchmark</td><td>24</td><td>True</td><td>15,613.9 &mu;s</td><td>81.16 &mu;s</td><td>71.95 &mu;s</td>
</tr></tbody></table>
</body>
</html>