From 4e44d695bc656fdea347602764512526137c4af3 Mon Sep 17 00:00:00 2001 From: Patrick Stevens <3138005+Smaug123@users.noreply.github.com> Date: Sun, 2 Mar 2025 22:49:31 +0000 Subject: [PATCH] Implement `ret` (#4) --- WoofWare.PawPrint/AbstractMachine.fs | 266 ++++++++++++++++++++------- WoofWare.PawPrint/IlOp.fs | 121 +++++++++++- 2 files changed, 315 insertions(+), 72 deletions(-) diff --git a/WoofWare.PawPrint/AbstractMachine.fs b/WoofWare.PawPrint/AbstractMachine.fs index 865c80c..ee1ee36 100644 --- a/WoofWare.PawPrint/AbstractMachine.fs +++ b/WoofWare.PawPrint/AbstractMachine.fs @@ -116,15 +116,14 @@ type EvalStack = type MethodReturnState = { JumpTo : MethodState - /// A stack of the types we're initialising. - /// Once we perform the jump, we're back in the context of the top one of these. - WasInitialising : (TypeDefinitionHandle * AssemblyName) list + WasInitialising : (TypeDefinitionHandle * AssemblyName) option } and MethodState = { // TODO: local variables are initialised to 0 if the localsinit flag is set for the method LocalVariables : CliObject ImmutableArray + /// Index into the stream of IL bytes. IlOpIndex : int EvaluationStack : EvalStack Arguments : CliObject ImmutableArray @@ -137,7 +136,9 @@ and MethodState = static member advanceProgramCounter (state : MethodState) = { state with - IlOpIndex = state.IlOpIndex + 1 + IlOpIndex = + state.IlOpIndex + + (IlOp.NumberOfBytes state.ExecutingMethod.Locations.[state.IlOpIndex]) } static member loadArgument (index : int) (state : MethodState) : MethodState = @@ -246,10 +247,38 @@ type TypeInitState = /// Tracks the initialization state of types across assemblies type TypeInitTable = ImmutableDictionary +[] +module TypeInitTable = + let beginInitialising + (thread : ThreadId) + (typeDef : TypeDefinitionHandle * AssemblyName) + (t : TypeInitTable) + : TypeInitTable + = + match t.TryGetValue typeDef with + | false, _ -> t.Add (typeDef, TypeInitState.InProgress thread) + | true, v -> failwith "Logic error: tried initialising a type which has already started initialising" + + let markInitialised + (thread : ThreadId) + (typeDef : TypeDefinitionHandle * AssemblyName) + (t : TypeInitTable) + : TypeInitTable + = + match t.TryGetValue typeDef with + | false, _ -> failwith "Logic error: completing initialisation of a type which never started initialising" + | true, TypeInitState.Initialized -> + failwith "Logic error: completing initialisation of a type which has already finished initialising" + | true, TypeInitState.InProgress thread2 -> + if thread <> thread2 then + failwith + "Logic error: completed initialisation of a type on a different thread to the one which started it!" + else + t.SetItem (typeDef, TypeInitState.Initialized) + type IlMachineState = { NextThreadId : int - EvalStacks : Map // CallStack : StackFrame list /// Multiple managed heaps are allowed, but we hopefully only need one. ManagedHeap : ManagedHeap @@ -486,7 +515,8 @@ module IlMachineState = let state = { state with TypeInitTable = - state.TypeInitTable.Add ((typeDefHandle, assemblyName), TypeInitState.InProgress currentThread) + state.TypeInitTable + |> TypeInitTable.beginInitialising currentThread (typeDefHandle, assemblyName) } // Check if the type has a base type that needs initialization @@ -521,9 +551,10 @@ module IlMachineState = resolveType loggerFactory typeReferenceHandle (state.ActiveAssembly currentThread) state logger.LogDebug ( - "Resolved base type of {TypeDefNamespace}.{TypeDefName} to this assembly, typeref, {BaseTypeNamespace}.{BaseTypeName}", + "Resolved base type of {TypeDefNamespace}.{TypeDefName} to a typeref in assembly {ResolvedAssemblyName}, {BaseTypeNamespace}.{BaseTypeName}", typeDef.Namespace, typeDef.Name, + assy.Name.Name, targetType.Namespace, targetType.Name ) @@ -545,7 +576,9 @@ module IlMachineState = match cctor with | Some ctorMethod -> - // Call the class constructor! + // Call the class constructor! Note that we *don't* use `callMethodInActiveAssembly`, because that + // performs class loading, but we're already in the middle of loading this class. + // TODO: factor out the common bit. let currentThreadState = state.ThreadState.[currentThread] let newMethodState = @@ -553,8 +586,8 @@ module IlMachineState = ctorMethod (Some { - JumpTo = currentThreadState.MethodState - WasInitialising = [ typeDefHandle, assemblyName ] + JumpTo = currentThreadState.MethodState |> MethodState.advanceProgramCounter + WasInitialising = Some (typeDefHandle, assemblyName) }) { state with @@ -573,9 +606,8 @@ module IlMachineState = let state = { state with TypeInitTable = - let key = typeDefHandle, assemblyName - assert (state.TypeInitTable.ContainsKey key) - state.TypeInitTable.SetItem (key, TypeInitState.Initialized) + state.TypeInitTable + |> TypeInitTable.markInitialised currentThread (typeDefHandle, assemblyName) } // Restore original assembly context if needed @@ -583,12 +615,64 @@ module IlMachineState = |> fst |> NothingToDo + let callMethodInActiveAssembly + (loggerFactory : ILoggerFactory) + (thread : ThreadId) + (methodToCall : WoofWare.PawPrint.MethodInfo) + (state : IlMachineState) + : IlMachineState * WhatWeDid + = + let threadState = state.ThreadState.[thread] + + match state.TypeInitTable.TryGetValue methodToCall.DeclaringType with + | false, _ -> + match + loadClass loggerFactory (fst methodToCall.DeclaringType) (snd methodToCall.DeclaringType) thread state + with + | NothingToDo state -> + // TODO: factor this out, it's the same as the Initialized flow + let newThreadState = + { threadState with + MethodState = + MethodState.Empty + methodToCall + (Some + { + JumpTo = threadState.MethodState |> MethodState.advanceProgramCounter + WasInitialising = None + }) + } + + { state with + ThreadState = state.ThreadState |> Map.add thread newThreadState + }, + WhatWeDid.Executed + | FirstLoadThis state -> state, WhatWeDid.SuspendedForClassInit + | true, TypeInitState.Initialized -> + let newThreadState = + { threadState with + MethodState = + MethodState.Empty + methodToCall + (Some + { + JumpTo = threadState.MethodState |> MethodState.advanceProgramCounter + WasInitialising = None + }) + } + + { state with + ThreadState = state.ThreadState |> Map.add thread newThreadState + }, + WhatWeDid.Executed + | true, InProgress threadId -> state, WhatWeDid.BlockedOnClassInit threadId + + let initial (dotnetRuntimeDirs : ImmutableArray) (entryAssembly : DumpedAssembly) : IlMachineState = let assyName = entryAssembly.ThisAssemblyDefinition.Name { NextThreadId = 0 - EvalStacks = Map.empty // CallStack = [] ManagedHeap = ManagedHeap.Empty ThreadState = Map.empty @@ -613,7 +697,6 @@ module IlMachineState = let newState = { state with NextThreadId = state.NextThreadId + 1 - EvalStacks = state.EvalStacks |> Map.add thread EvalStack.Empty ThreadState = state.ThreadState |> Map.add thread (ThreadState.New newThreadAssy newThreadState) @@ -629,16 +712,40 @@ module IlMachineState = ManagedHeap = heap } - let pushToStack (o : CliObject) (thread : ThreadId) (state : IlMachineState) = + let pushToEvalStack (o : CliObject) (thread : ThreadId) (state : IlMachineState) = { state with - EvalStacks = - state.EvalStacks + ThreadState = + state.ThreadState |> Map.change thread - (fun s -> - match s with - | None -> failwith "tried to push to stack of nonexistent thread" - | Some stack -> EvalStack.Push o stack |> Some + (fun threadState -> + match threadState with + | None -> failwith "Logic error: tried to push to stack of a nonexistent thread" + | Some threadState -> + { threadState with + ThreadState.MethodState.EvaluationStack = + threadState.MethodState.EvaluationStack |> EvalStack.Push o + } + |> Some + ) + } + + // TODO: which stack do we actually want to push to? + let pushToStackCoerced (o : EvalStackValue) (targetType : TypeDefn) (thread : ThreadId) (state : IlMachineState) = + { state with + ThreadState = + state.ThreadState + |> Map.change + thread + (fun threadState -> + match threadState with + | None -> failwith "Logic error: tried to push to stack of a nonexistent thread" + | Some threadState -> + { threadState with + ThreadState.MethodState.EvaluationStack = + threadState.MethodState.EvaluationStack |> EvalStack.Push (failwith "TODO") + } + |> Some ) } @@ -690,41 +797,6 @@ module IlMachineState = ) } - let callMethodInActiveAssembly - (loggerFactory : ILoggerFactory) - (thread : ThreadId) - (methodToCall : WoofWare.PawPrint.MethodInfo) - (state : IlMachineState) - : IlMachineState * WhatWeDid - = - let threadState = state.ThreadState.[thread] - - match state.TypeInitTable.TryGetValue methodToCall.DeclaringType with - | false, _ -> - match - loadClass loggerFactory (fst methodToCall.DeclaringType) (snd methodToCall.DeclaringType) thread state - with - | NothingToDo state -> state, WhatWeDid.SuspendedForClassInit - | FirstLoadThis state -> state, WhatWeDid.SuspendedForClassInit - | true, TypeInitState.Initialized -> - let newThreadState = - { threadState with - MethodState = - MethodState.Empty - methodToCall - (Some - { - JumpTo = threadState.MethodState - WasInitialising = [] - }) - } - - { state with - ThreadState = state.ThreadState |> Map.add thread newThreadState - }, - WhatWeDid.Executed - | true, InProgress threadId -> state, WhatWeDid.BlockedOnClassInit threadId - [] module AbstractMachine = type private Dummy = class end @@ -763,7 +835,51 @@ module AbstractMachine = | Ldloc_3 -> failwith "todo" | Pop -> failwith "todo" | Dup -> failwith "todo" - | Ret -> failwith "todo" + | Ret -> + let threadStateAtEndOfMethod = state.ThreadState.[currentThread] + + let returnState = + match threadStateAtEndOfMethod.MethodState.ReturnState with + | None -> failwith "Program finished execution?" + | Some returnState -> returnState + + let state = + match returnState.WasInitialising with + | None -> state + | Some finishedInitialising -> + { state with + TypeInitTable = + state.TypeInitTable + |> TypeInitTable.markInitialised currentThread finishedInitialising + } + + // Return to previous stack frame + let state = + { state with + ThreadState = + state.ThreadState + |> Map.add + currentThread + { threadStateAtEndOfMethod with + MethodState = returnState.JumpTo + } + } + + match threadStateAtEndOfMethod.MethodState.EvaluationStack.Values with + | [] -> + // no return value + state, WhatWeDid.Executed + | [ retVal ] -> + let retType = + threadStateAtEndOfMethod.MethodState.ExecutingMethod.Signature.ReturnType + + state + |> IlMachineState.pushToStackCoerced retVal retType currentThread + |> Tuple.withRight WhatWeDid.Executed + | vals -> + failwith + "Unexpected interpretation result has a local evaluation stack with more than one element on RET" + | LdcI4_0 -> failwith "todo" | LdcI4_1 -> failwith "todo" | LdcI4_2 -> failwith "todo" @@ -967,16 +1083,21 @@ module AbstractMachine = | MetadataToken.MethodDef defn -> state, (state.ActiveAssembly thread).Methods.[defn] | k -> failwith $"Unrecognised kind: %O{k}" - IlMachineState.callMethodInActiveAssembly loggerFactory thread methodToCall state + state.WithThreadSwitchedToAssembly (snd methodToCall.DeclaringType) thread + |> fst + |> IlMachineState.callMethodInActiveAssembly loggerFactory thread methodToCall | Callvirt -> failwith "todo" | Castclass -> failwith "todo" | Newobj -> + // TODO: allocate the object, and pass it as the first argument to the constructor. Check the rest of what + // newobj is supposed to do, and do it. let state, assy, ctor = match metadataToken with | MethodDef md -> let activeAssy = state.ActiveAssembly thread - state, activeAssy.Name, activeAssy.Methods.[md] + let method = activeAssy.Methods.[md] + state, activeAssy.Name, method | MemberReference mr -> resolveMember loggerFactory (state.ActiveAssembly thread) mr state | x -> failwith $"Unexpected metadata token for constructor: %O{x}" @@ -1015,14 +1136,17 @@ module AbstractMachine = if TypeDefn.isManaged field.Signature then match state.Statics.TryGetValue ((field.DeclaringType, activeAssy.Name)) with | true, v -> - IlMachineState.pushToStack (CliObject.Basic (BasicCliObject.PointerType (Some v))) thread state + IlMachineState.pushToEvalStack + (CliObject.Basic (BasicCliObject.PointerType (Some v))) + thread + state |> IlMachineState.advanceProgramCounter thread |> Tuple.withRight WhatWeDid.Executed | false, _ -> let allocation, state = state |> IlMachineState.allocate (failwith "") state - |> IlMachineState.pushToStack + |> IlMachineState.pushToEvalStack (CliObject.Basic (BasicCliObject.PointerType (Some allocation))) thread |> Tuple.withRight WhatWeDid.Executed @@ -1064,18 +1188,14 @@ module AbstractMachine = | true, v -> v, state let state = - IlMachineState.pushToStack + IlMachineState.pushToEvalStack (CliObject.Basic (BasicCliObject.ObjectReference (Some addressToLoad))) thread state - // +1 for the opcode, +4 for the bytes of the handle. - // TODO: some opcodes are multiple bytes! Should deal with that. - let mutable state = state - for i = 0 to 4 do - state <- IlMachineState.advanceProgramCounter thread state - - state, WhatWeDid.Executed + state + |> IlMachineState.advanceProgramCounter thread + |> Tuple.withRight WhatWeDid.Executed let executeOneStep (loggerFactory : ILoggerFactory) @@ -1086,11 +1206,15 @@ module AbstractMachine = let logger = loggerFactory.CreateLogger typeof.DeclaringType let instruction = state.ThreadState.[thread].MethodState + match instruction.ExecutingMethod.Locations.TryGetValue instruction.IlOpIndex with + | false, _ -> failwith "Wanted to execute a nonexistent instruction" + | true, executingInstruction -> + logger.LogInformation ( "Executing one step (index {ExecutingIlOpIndex} in method {ExecutingMethodName}): {ExecutingIlOp}", instruction.IlOpIndex, instruction.ExecutingMethod.Name, - instruction.ExecutingMethod.Locations.[instruction.IlOpIndex] + executingInstruction ) match instruction.ExecutingMethod.Locations.[instruction.IlOpIndex] with diff --git a/WoofWare.PawPrint/IlOp.fs b/WoofWare.PawPrint/IlOp.fs index a8f910f..12c14a7 100644 --- a/WoofWare.PawPrint/IlOp.fs +++ b/WoofWare.PawPrint/IlOp.fs @@ -239,6 +239,26 @@ type NullaryIlOp = | Readonly | Refanytype + /// The number of bytes this instruction takes in memory. + static member NumberOfBytes (op : NullaryIlOp) : int = + match op with + | Arglist + | Ceq + | Cgt + | Cgt_un + | Clt + | Clt_un + | Localloc + | Endfilter + | Volatile + | Tail + | Cpblk + | Initblk + | Rethrow + | Refanytype + | Readonly -> 2 + | _ -> 1 + type UnaryConstIlOp = | Stloc of uint16 | Stloc_s of int8 @@ -287,6 +307,57 @@ type UnaryConstIlOp = | Ldloca of uint16 | Ldarg of uint16 + /// The number of bytes this instruction takes in memory, including its constant argument that is inline in the + /// byte stream. + static member NumberOfBytes (op : UnaryConstIlOp) : int = + match op with + | Ldarg _uint16 + | Ldarga _uint16 + | Starg _uint16 + | Ldloc _uint16 + | Ldloca _uint16 + | Stloc _uint16 -> 2 + 2 // Two-byte opcode + two-byte argument + | Ldarg_s _ + | Ldarga_s _ + | Starg_s _ + | Ldloc_s _ + | Ldloca_s _ + | Stloc_s _ + | Ldc_I4_s _ + | Br_s _ + | Brfalse_s _ + | Brtrue_s _ + | Beq_s _ + | Blt_s _ + | Ble_s _ + | Bgt_s _ + | Bge_s _ + | Bne_un_s _ + | Bge_un_s _ + | Bgt_un_s _ + | Ble_un_s _ + | Blt_un_s _ + | Leave_s _ + | Unaligned _ -> 1 + 1 // One-byte opcode + one-byte argument + | Ldc_I8 _ -> 1 + 8 // One-byte opcode + 8-byte argument + | Ldc_I4 _ + | Br _ + | Brfalse _ + | Brtrue _ + | Beq _ + | Blt _ + | Ble _ + | Bgt _ + | Bge _ + | Bne_un _ + | Bge_un _ + | Bgt_un _ + | Ble_un _ + | Blt_un _ + | Leave _ -> 1 + 4 // One-byte opcode + 4-byte argument + | Ldc_R4 _ -> 1 + 4 // One-byte opcode + 4-byte argument + | Ldc_R8 _ -> 1 + 8 // One-byte opcode + 8-byte argument + type UnaryMetadataTokenIlOp = | Call | Calli @@ -320,7 +391,47 @@ type UnaryMetadataTokenIlOp = | Refanyval | Jmp -type UnaryStringTokenIlOp = | Ldstr + /// The number of bytes this instruction takes in memory, including its metadata token argument. + static member NumberOfBytes (op : UnaryMetadataTokenIlOp) : int = + match op with + | Ldftn + | Ldvirtftn + | Initobj + | Constrained + | Sizeof -> 2 + 4 // Two-byte opcode + 4-byte token + | Call + | Calli + | Callvirt + | Castclass + | Newobj + | Newarr + | Box + | Ldelema + | Isinst + | Stfld + | Stsfld + | Ldfld + | Ldflda + | Ldsfld + | Ldsflda + | Unbox_Any + | Stelem + | Ldelem + | Stobj + | Ldtoken + | Cpobj + | Ldobj + | Unbox + | Mkrefany + | Refanyval + | Jmp -> 1 + 4 // One-byte opcode + 4-byte token + +type UnaryStringTokenIlOp = + | Ldstr + + static member NumberOfBytes (op : UnaryStringTokenIlOp) : int = + match op with + | Ldstr -> 1 + 4 type IlOp = | Nullary of NullaryIlOp @@ -330,3 +441,11 @@ type IlOp = | Switch of int32 ImmutableArray static member Format (opCode : IlOp) (offset : int) : string = $" IL_%04X{offset}: %-20O{opCode}" + + static member NumberOfBytes (op : IlOp) = + match op with + | Nullary op -> NullaryIlOp.NumberOfBytes op + | UnaryConst op -> UnaryConstIlOp.NumberOfBytes op + | UnaryMetadataToken (op, _) -> UnaryMetadataTokenIlOp.NumberOfBytes op + | UnaryStringToken (op, _) -> UnaryStringTokenIlOp.NumberOfBytes op + | Switch arr -> 1 + 4 + arr.Length * 4