Merge monoalphabetic cracking

This commit is contained in:
Smaug123
2016-01-04 21:03:26 +00:00
4 changed files with 171 additions and 4 deletions

View File

@@ -77,6 +77,29 @@ Notice that `encrypt_monoalphabetic` *does not* convert its input to uppercase
when a Dict key is supplied.
It simply makes all specified changes, and leaves the rest of the string unchanged.
Cracking a cipher:
```julia
crack_monoalphabetic(str, chatty=0, rounds=10)
# outputs (key, decrypted_string)
```
The various optional arguments to `crack_monoalphabetic` are:
* `starting_key=""`, which when specified (for example, as "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
starts the simulation at the given key. The default causes it to start with the most
common characters being decrypted to the most common English characters.
* `min_temp=0.0001`, which is the temperature at which we stop the simulation.
* `temp_factor=0.97`, which is the factor by which the temperature decreases each step.
* `chatty=0`, which can be set to 1 to print whenever the key is updated, or 2 to print
whenever any new key is considered.
* `rounds=1`, which sets the number of repetitions we perform. Each round starts with the
best key we've found so far.
* `acceptance_prob=((e, ep, t) -> ep>e ? 1 : exp(-(e-ep)/t))`, which is the probability
with which we accept new key of fitness ep, given that the current key has fitness e,
at temperature t.
The simulation is set up to start each round off at a successively lower temperature.
### Vigenère cipher
Encrypt the text "Hello, World!" with a Vigenère cipher of key "ab":

View File

@@ -8,7 +8,7 @@ include("caesar.jl")
include("vigenere.jl")
include("solitaire.jl")
export encrypt_monoalphabetic, decrypt_monoalphabetic,
export encrypt_monoalphabetic, decrypt_monoalphabetic, crack_monoalphabetic,
encrypt_caesar, decrypt_caesar, crack_caesar,
encrypt_vigenere, decrypt_vigenere,
encrypt_solitaire, decrypt_solitaire,

View File

@@ -64,13 +64,35 @@ Performs a trigram analysis on the input string, to determine how close it
is to English. That is, splits the input string into groups of three letters,
and assigns a score based on the frequency of the trigrams in true English.
"""
function string_fitness(input)
str = uppercase(letters_only(input))
function string_fitness(input; alreadystripped=false)
if !alreadystripped
str = letters_only(input)
else
str = input
end
str = uppercase(str)
ans = 0
for i in 1:(length(str)-2)
ans += get(trigram_fitnesses, str[i:i+2], 0)
end
ans
log(ans/length(str))
end
"""
Finds the frequencies of all characters in the input string, returning a Dict
of 'a' => 4, for instance. Uppercase characters are considered distinct from lowercase.
"""
function frequencies(input)
ans = Dict{Char, Integer}()
for i in input
if haskey(ans, i)
ans[i] += 1
else
ans[i] = 0
end
end
ans
end

View File

@@ -47,3 +47,125 @@ function decrypt_monoalphabetic(ciphertext, key::AbstractString)
dict = [(a => Char(96 + search(lowercase(key), a))) for a in lowercase(key)]
encrypt_monoalphabetic(lowercase(ciphertext), dict)
end
# Cracking
# The method we use for cracking is simulated annealing.
"""
swap_two(string) swaps two of the characters of the input string, at random.
The characters are guaranteed to be at different positions, though "aa" would be
'swapped' to "aa".
"""
function swap_two(str)
indices = rand(1:length(str), 2)
while indices[1] == indices[2]
indices = rand(1:length(str), 2)
end
join([i == indices[1] ? str[indices[2]] : (i == indices[2] ? str[indices[1]] : str[i]) for (i, ch) in enumerate(str)], "")
end
"""
crack_monoalphabetic cracks the given ciphertext which was encrypted by the monoalphabetic
substitution cipher.
Possible arguments include:
starting_key="", which when specified (for example, as "ABCDEFGHIJKLMNOPQRSTUVWXYZ"),
starts the simulation at the given key. The default causes it to start with the most
common characters being decrypted to the most common English characters.
min_temp=0.0001, which is the temperature at which we stop the simulation.
temp_factor=0.97, which is the factor by which the temperature decreases each step.
chatty=0, which can be set to 1 to print whenever the key is updated, or 2 to print
whenever any new key is considered.
rounds=1, which sets the number of repetitions we perform. Each round starts with the
best key we've found so far.
acceptance_prob=((e, ep, t) -> ep>e ? 1 : exp(-(e-ep)/t)), which is the probability
with which we accept new key of fitness ep, given that the current key has fitness e,
at temperature t.
"""
function crack_monoalphabetic(ciphertext; starting_key="",
min_temp=0.0001, temp_factor=0.97,
acceptance_prob=((e,ep,t) -> ep > e ? 1. : exp(-(e-ep)/t)),
chatty=0,
rounds=1)
if starting_key == ""
# most common letters
commonest = "ETAOINSHRDLUMCYWFGBPVKZJXQ"
freqs = frequencies(uppercase(letters_only(ciphertext)))
for c in 'A':'Z'
if !haskey(freqs, c)
freqs[c] = 0
end
end
freqs_input = sort(collect(freqs), by = tuple -> last(tuple), rev=true)
start_key = ['a' for c in 1:26]
for i in 1:26
start_key[Int(commonest[i])-64] = freqs_input[i][1]
end
key = join(start_key, "")
else
key = starting_key
end
if chatty > 1
println("Starting key: $(key)")
end
stripped_ciphertext = letters_only(ciphertext)
fitness = string_fitness(decrypt_monoalphabetic(stripped_ciphertext, key))
total_best_fitness = fitness
total_best_key = key
total_best_decrypt = decrypt_monoalphabetic(ciphertext, key)
for roundcount in 1:rounds
temp = 10^((roundcount-1)/rounds)
while temp > min_temp
for i in 1:round(Int, min(ceil(1/temp), 10))
neighbour = swap_two(key)
new_fitness = string_fitness(decrypt_monoalphabetic(stripped_ciphertext, neighbour), alreadystripped=true)
if new_fitness > total_best_fitness
total_best_fitness = new_fitness
total_best_key = neighbour
total_best_decrypt = decrypt_monoalphabetic(ciphertext, total_best_key)
end
threshold = rand()
if chatty >= 2
println("Current fitness: $(fitness)")
println("New fitness: $(new_fitness)")
println("Acceptance probability: $(acceptance_prob(fitness, new_fitness, temp))")
println("Threshold: $(threshold)")
end
if acceptance_prob(fitness, new_fitness, temp) >= threshold
if chatty >= 1
println("$(key) -> $(neighbour), threshold $(threshold), temperature $(temp), fitness $(new_fitness), prob $(acceptance_prob(fitness, new_fitness, temp))")
end
fitness = new_fitness
key = neighbour
end
end
temp = temp * temp_factor
if chatty >= 2
println("----")
end
end
key = total_best_key
fitness = total_best_fitness
temp = 1
end
if chatty >= 1
println("Best was $(total_best_key) at $(total_best_fitness)")
println(total_best_decrypt)
end
(key, decrypt_monoalphabetic(ciphertext, key))
end