mirror of
https://github.com/Smaug123/ClassicalCiphers.jl
synced 2025-10-05 17:38:48 +00:00
Merge monoalphabetic cracking
This commit is contained in:
23
README.md
23
README.md
@@ -77,6 +77,29 @@ Notice that `encrypt_monoalphabetic` *does not* convert its input to uppercase
|
||||
when a Dict key is supplied.
|
||||
It simply makes all specified changes, and leaves the rest of the string unchanged.
|
||||
|
||||
Cracking a cipher:
|
||||
```julia
|
||||
crack_monoalphabetic(str, chatty=0, rounds=10)
|
||||
# outputs (key, decrypted_string)
|
||||
```
|
||||
|
||||
The various optional arguments to `crack_monoalphabetic` are:
|
||||
|
||||
* `starting_key=""`, which when specified (for example, as "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
|
||||
starts the simulation at the given key. The default causes it to start with the most
|
||||
common characters being decrypted to the most common English characters.
|
||||
* `min_temp=0.0001`, which is the temperature at which we stop the simulation.
|
||||
* `temp_factor=0.97`, which is the factor by which the temperature decreases each step.
|
||||
* `chatty=0`, which can be set to 1 to print whenever the key is updated, or 2 to print
|
||||
whenever any new key is considered.
|
||||
* `rounds=1`, which sets the number of repetitions we perform. Each round starts with the
|
||||
best key we've found so far.
|
||||
* `acceptance_prob=((e, ep, t) -> ep>e ? 1 : exp(-(e-ep)/t))`, which is the probability
|
||||
with which we accept new key of fitness ep, given that the current key has fitness e,
|
||||
at temperature t.
|
||||
|
||||
The simulation is set up to start each round off at a successively lower temperature.
|
||||
|
||||
### Vigenère cipher
|
||||
|
||||
Encrypt the text "Hello, World!" with a Vigenère cipher of key "ab":
|
||||
|
@@ -8,7 +8,7 @@ include("caesar.jl")
|
||||
include("vigenere.jl")
|
||||
include("solitaire.jl")
|
||||
|
||||
export encrypt_monoalphabetic, decrypt_monoalphabetic,
|
||||
export encrypt_monoalphabetic, decrypt_monoalphabetic, crack_monoalphabetic,
|
||||
encrypt_caesar, decrypt_caesar, crack_caesar,
|
||||
encrypt_vigenere, decrypt_vigenere,
|
||||
encrypt_solitaire, decrypt_solitaire,
|
||||
|
@@ -64,13 +64,35 @@ Performs a trigram analysis on the input string, to determine how close it
|
||||
is to English. That is, splits the input string into groups of three letters,
|
||||
and assigns a score based on the frequency of the trigrams in true English.
|
||||
"""
|
||||
function string_fitness(input)
|
||||
str = uppercase(letters_only(input))
|
||||
function string_fitness(input; alreadystripped=false)
|
||||
if !alreadystripped
|
||||
str = letters_only(input)
|
||||
else
|
||||
str = input
|
||||
end
|
||||
|
||||
str = uppercase(str)
|
||||
|
||||
ans = 0
|
||||
for i in 1:(length(str)-2)
|
||||
ans += get(trigram_fitnesses, str[i:i+2], 0)
|
||||
end
|
||||
|
||||
ans
|
||||
log(ans/length(str))
|
||||
end
|
||||
|
||||
"""
|
||||
Finds the frequencies of all characters in the input string, returning a Dict
|
||||
of 'a' => 4, for instance. Uppercase characters are considered distinct from lowercase.
|
||||
"""
|
||||
function frequencies(input)
|
||||
ans = Dict{Char, Integer}()
|
||||
for i in input
|
||||
if haskey(ans, i)
|
||||
ans[i] += 1
|
||||
else
|
||||
ans[i] = 0
|
||||
end
|
||||
end
|
||||
ans
|
||||
end
|
@@ -47,3 +47,125 @@ function decrypt_monoalphabetic(ciphertext, key::AbstractString)
|
||||
dict = [(a => Char(96 + search(lowercase(key), a))) for a in lowercase(key)]
|
||||
encrypt_monoalphabetic(lowercase(ciphertext), dict)
|
||||
end
|
||||
|
||||
# Cracking
|
||||
|
||||
# The method we use for cracking is simulated annealing.
|
||||
|
||||
"""
|
||||
swap_two(string) swaps two of the characters of the input string, at random.
|
||||
The characters are guaranteed to be at different positions, though "aa" would be
|
||||
'swapped' to "aa".
|
||||
"""
|
||||
function swap_two(str)
|
||||
indices = rand(1:length(str), 2)
|
||||
while indices[1] == indices[2]
|
||||
indices = rand(1:length(str), 2)
|
||||
end
|
||||
|
||||
join([i == indices[1] ? str[indices[2]] : (i == indices[2] ? str[indices[1]] : str[i]) for (i, ch) in enumerate(str)], "")
|
||||
end
|
||||
|
||||
"""
|
||||
crack_monoalphabetic cracks the given ciphertext which was encrypted by the monoalphabetic
|
||||
substitution cipher.
|
||||
Possible arguments include:
|
||||
starting_key="", which when specified (for example, as "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
|
||||
starts the simulation at the given key. The default causes it to start with the most
|
||||
common characters being decrypted to the most common English characters.
|
||||
min_temp=0.0001, which is the temperature at which we stop the simulation.
|
||||
temp_factor=0.97, which is the factor by which the temperature decreases each step.
|
||||
chatty=0, which can be set to 1 to print whenever the key is updated, or 2 to print
|
||||
whenever any new key is considered.
|
||||
rounds=1, which sets the number of repetitions we perform. Each round starts with the
|
||||
best key we've found so far.
|
||||
acceptance_prob=((e, ep, t) -> ep>e ? 1 : exp(-(e-ep)/t)), which is the probability
|
||||
with which we accept new key of fitness ep, given that the current key has fitness e,
|
||||
at temperature t.
|
||||
"""
|
||||
function crack_monoalphabetic(ciphertext; starting_key="",
|
||||
min_temp=0.0001, temp_factor=0.97,
|
||||
acceptance_prob=((e,ep,t) -> ep > e ? 1. : exp(-(e-ep)/t)),
|
||||
chatty=0,
|
||||
rounds=1)
|
||||
|
||||
if starting_key == ""
|
||||
# most common letters
|
||||
commonest = "ETAOINSHRDLUMCYWFGBPVKZJXQ"
|
||||
freqs = frequencies(uppercase(letters_only(ciphertext)))
|
||||
for c in 'A':'Z'
|
||||
if !haskey(freqs, c)
|
||||
freqs[c] = 0
|
||||
end
|
||||
end
|
||||
|
||||
freqs_input = sort(collect(freqs), by = tuple -> last(tuple), rev=true)
|
||||
start_key = ['a' for c in 1:26]
|
||||
for i in 1:26
|
||||
start_key[Int(commonest[i])-64] = freqs_input[i][1]
|
||||
end
|
||||
|
||||
key = join(start_key, "")
|
||||
else
|
||||
key = starting_key
|
||||
end
|
||||
|
||||
if chatty > 1
|
||||
println("Starting key: $(key)")
|
||||
end
|
||||
|
||||
stripped_ciphertext = letters_only(ciphertext)
|
||||
fitness = string_fitness(decrypt_monoalphabetic(stripped_ciphertext, key))
|
||||
total_best_fitness = fitness
|
||||
total_best_key = key
|
||||
total_best_decrypt = decrypt_monoalphabetic(ciphertext, key)
|
||||
|
||||
for roundcount in 1:rounds
|
||||
temp = 10^((roundcount-1)/rounds)
|
||||
while temp > min_temp
|
||||
for i in 1:round(Int, min(ceil(1/temp), 10))
|
||||
neighbour = swap_two(key)
|
||||
new_fitness = string_fitness(decrypt_monoalphabetic(stripped_ciphertext, neighbour), alreadystripped=true)
|
||||
if new_fitness > total_best_fitness
|
||||
total_best_fitness = new_fitness
|
||||
total_best_key = neighbour
|
||||
total_best_decrypt = decrypt_monoalphabetic(ciphertext, total_best_key)
|
||||
end
|
||||
|
||||
threshold = rand()
|
||||
|
||||
if chatty >= 2
|
||||
println("Current fitness: $(fitness)")
|
||||
println("New fitness: $(new_fitness)")
|
||||
println("Acceptance probability: $(acceptance_prob(fitness, new_fitness, temp))")
|
||||
println("Threshold: $(threshold)")
|
||||
end
|
||||
|
||||
if acceptance_prob(fitness, new_fitness, temp) >= threshold
|
||||
if chatty >= 1
|
||||
println("$(key) -> $(neighbour), threshold $(threshold), temperature $(temp), fitness $(new_fitness), prob $(acceptance_prob(fitness, new_fitness, temp))")
|
||||
end
|
||||
fitness = new_fitness
|
||||
key = neighbour
|
||||
end
|
||||
end
|
||||
|
||||
temp = temp * temp_factor
|
||||
|
||||
if chatty >= 2
|
||||
println("----")
|
||||
end
|
||||
end
|
||||
|
||||
key = total_best_key
|
||||
fitness = total_best_fitness
|
||||
temp = 1
|
||||
end
|
||||
|
||||
if chatty >= 1
|
||||
println("Best was $(total_best_key) at $(total_best_fitness)")
|
||||
println(total_best_decrypt)
|
||||
end
|
||||
(key, decrypt_monoalphabetic(ciphertext, key))
|
||||
end
|
||||
|
||||
|
Reference in New Issue
Block a user