Freq
February 5, 2016
This is easy. We split the input into blocks of the requested size, then use uniq-c from the Standard Prelude to count them:
(define (freq str . block)
(let ((block (if (pair? block) (car block) 1))
(len (string-length str)))
(if (< len block) (list)
(let loop ((k 0) (txt (list)))
(if (<= k (- len block))
(loop (+ k 1) (cons (substring str k (+ k block)) txt))
(let ((txt (sort string<? txt)))
(uniq-c string=? txt)))))))
Here’s an example:
> (define cryptogram (string-append
"V MVXEGC NK V RGIIZH HYZ IGXDK PZQ "
"YNK QLMCGIIV HYGX AYG KQX NK KYNXNXO "
"VXD HZXAK NA MVUE AYG LNXQAG NA "
"MGONXK AZ CVNX LVCE AHVNX"))
> (sort (lambda (a b) (< (cdr b) (cdr a))) (freq cryptogram 1))
((" " . 26) ("X" . 12) ("N" . 11) ("G" . 9) ("V" . 9)
("A" . 8) ("K" . 8) ("Y" . 6) ("H" . 5) ("I" . 5) ("Z" . 5)
("C" . 4) ("M" . 4) ("Q" . 4) ("E" . 3) ("L" . 3) ("D" . 2)
("O" . 2) ("P" . 1) ("R" . 1) ("U" . 1))
> (sort (lambda (a b) (< (cdr b) (cdr a))) (freq cryptogram 2))
(("K " . 6) ("NX" . 6) (" A" . 4) (" N" . 4) (" H" . 3)
(" M" . 3) ("G " . 3) ("NK" . 3) ("V " . 3) ("X " . 3)
("YG" . 3) (" K" . 2) (" L" . 2) (" V" . 2) ("A " . 2)
("AY" . 2) ("E " . 2) ("GI" . 2) ("GX" . 2) ("HY" . 2)
("II" . 2) ("MV" . 2) ("NA" . 2) ("VN" . 2) ("VX" . 2)
("XD" . 2) ("YN" . 2) ("Z " . 2) (" C" . 1) (" I" . 1)
(" P" . 1) (" Q" . 1) (" R" . 1) (" Y" . 1) ("AG" . 1)
("AH" . 1) ("AK" . 1) ("AZ" . 1) ("C " . 1) ("CE" . 1)
("CG" . 1) ("CV" . 1) ("D " . 1) ("DK" . 1) ("EG" . 1)
("GC" . 1) ("GO" . 1) ("H " . 1) ("HV" . 1) ("HZ" . 1)
("IG" . 1) ("IV" . 1) ("IZ" . 1) ("KQ" . 1) ("KY" . 1)
("LM" . 1) ("LN" . 1) ("LV" . 1) ("MC" . 1) ("MG" . 1)
("O " . 1) ("ON" . 1) ("PZ" . 1) ("Q " . 1) ("QA" . 1)
("QL" . 1) ("QX" . 1) ("RG" . 1) ("UE" . 1) ("VC" . 1)
("VU" . 1) ("XA" . 1) ("XE" . 1) ("XK" . 1) ("XN" . 1)
("XO" . 1) ("XQ" . 1) ("YZ" . 1) ("ZH" . 1) ("ZQ" . 1)
("ZX" . 1))
> (sort (lambda (a b) (< (cdr b) (cdr a))) (freq cryptogram 3))
(("NK " . 3) (" AY" . 2) (" HY" . 2) (" MV" . 2) (" NA" . 2)
(" NK" . 2) ("A M" . 2) ("AYG" . 2) ("E A" . 2) ("GII" . 2)
("NA " . 2) ("VNX" . 2) ("YG " . 2) (" AH" . 1) (" AZ" . 1)
(" CV" . 1) (" HZ" . 1) (" IG" . 1) (" KQ" . 1) (" KY" . 1)
(" LN" . 1) (" LV" . 1) (" MG" . 1) (" PZ" . 1) (" QL" . 1)
(" RG" . 1) (" V " . 1) (" VX" . 1) (" YN" . 1) ("AG " . 1)
("AHV" . 1) ("AK " . 1) ("AZ " . 1) ("C N" . 1) ("CE " . 1)
("CGI" . 1) ("CVN" . 1) ("D H" . 1) ("DK " . 1) ("EGC" . 1)
("G K" . 1) ("G L" . 1) ("G N" . 1) ("GC " . 1) ("GON" . 1)
("GX " . 1) ("GXD" . 1) ("H H" . 1) ("HVN" . 1) ("HYG" . 1)
("HYZ" . 1) ("HZX" . 1) ("IGX" . 1) ("IIV" . 1) ("IIZ" . 1)
("IV " . 1) ("IZH" . 1) ("K A" . 1) ("K K" . 1) ("K N" . 1)
("K P" . 1) ("K Q" . 1) ("K V" . 1) ("KQX" . 1) ("KYN" . 1)
("LMC" . 1) ("LNX" . 1) ("LVC" . 1) ("MCG" . 1) ("MGO" . 1)
("MVU" . 1) ("MVX" . 1) ("NX " . 1) ("NXK" . 1) ("NXN" . 1)
("NXO" . 1) ("NXQ" . 1) ("O V" . 1) ("ONX" . 1) ("PZQ" . 1)
("Q Y" . 1) ("QAG" . 1) ("QLM" . 1) ("QX " . 1) ("RGI" . 1)
("UE " . 1) ("V H" . 1) ("V M" . 1) ("V R" . 1) ("VCE" . 1)
("VUE" . 1) ("VXD" . 1) ("VXE" . 1) ("X A" . 1) ("X L" . 1)
("X N" . 1) ("XAK" . 1) ("XD " . 1) ("XDK" . 1) ("XEG" . 1)
("XK " . 1) ("XNX" . 1) ("XO " . 1) ("XQA" . 1) ("YGX" . 1)
("YNK" . 1) ("YNX" . 1) ("YZ " . 1) ("Z C" . 1) ("Z I" . 1)
("ZH " . 1) ("ZQ " . 1) ("ZXA" . 1))
You can run the program at http://ideone.com/MAI3nb, where you’ll find that the parameters to the sort command are reversed.
Python Counter does the job.
from collections import Counter
s = "V MVXEGC NK V RGIIZH HYZ IGXDK PZQ YNK QLMCGIIV HYGX AYG KQX NK KYNXNXO VXD HZXAK NA MVUE AYG LNXQAG NA MGONXK AZ CVNX. LVCE AHVNX"
unigraphs = Counter(s)
del unigraphs[‘ ‘]
digraphs = Counter([s[i:i+2] for i in range(len(s)-1) if ‘ ‘ not in s[i:i+2]])
trigraphs = Counter([s[i:i+3] for i in range(len(s)-2) if ‘ ‘ not in s[i:i+3]])
print unigraphs.most_common(5)
print digraphs.most_common(5)
print trigraphs.most_common(5)
# output
# [(‘X’, 12), (‘N’, 11), (‘G’, 9), (‘V’, 9), (‘A’, 8)]
# [(‘NX’, 6), (‘YG’, 3), (‘NK’, 3), (‘HY’, 2), (‘YN’, 2)]
# [(‘VNX’, 2), (‘AYG’, 2), (‘GII’, 2), (‘VUE’, 1), (‘YNK’, 1)]
Python’s strings have this nifty pair of methods to build and apply translation tables. I had
forgotten what the message was (though I quickly remembered it was by Mark Twain) so
I had a chance to actually work it out (trying to not remember that it was by Mark Twain).
All I used was the short words, especially the
combination NK V, and after a promising start, the most frequent unigram heuristic. At
that point, clear patterns started to emerge and the message revealed itself. The last
translation steps are commented out in order to show that stage of the output.
There’s a typo in HZXAK.
s = """V MVXEGC NK V RGIIZH HYZ IGXDK PZQ YNK QLMCGIIV HYGX AYG KQX NK KYNXNXO VXD HZXAK NA MVUE AYG LNXQAG NA MGONXK AZ CVNX. LVCE AHVNX""" from collections import Counter unigraphs = Counter(c for c in s if not c == " ") digraphs = Counter(c + d for c, d in zip(s, s[1:])) trigraphs = Counter(c + d + e for c, d, e in zip(s, s[1:], s[2:])) print("1-grams:", *unigraphs.most_common(7)) print("2-grams:", *digraphs.most_common(6)) print("3-grams:", *trigraphs.most_common(5)) print("1-words:", *(w for w in s.split() if len(w) == 1)) print("2-words:", *(w for w in s.split() if len(w) == 2)) print("3-words:", *(w for w in s.split() if len(w) == 3)) print() print(s) print() print(s .translate(str.maketrans("V", "a")) # "NK V" -> "?? a", "?? i" .translate(str.maketrans("NK", "is")) # "NK V" -> "in, if, is, it" "a" .translate(str.maketrans("NA", "it")) # looks promising? go with this .translate(str.maketrans("AZ", "to")) .translate(str.maketrans("XG", "ne")) # "etaoin" are supposed to be the most frequent letters, "taoi" # are assigned above, "XG" -> "en" led to "aeD" and "Caie" but # "XG" -> "ne" worked, as follows .translate(str.maketrans("MeOins to Cain", "begins to rain")) # .translate(str.maketrans("tYe", "the")) # .translate(str.maketrans("Hho", "who")) # .translate(str.maketrans("ReIIow", "fellow")) # .translate(str.maketrans("LarE twain", "mark twain")) # .translate(str.maketrans("lenDs", "lends")) # .translate(str.maketrans("PoQ", "you")) # .translate(str.maketrans("baUk", "back")) .translate(str.maketrans("", "")))The output at the point when the message began to reveal itself, before I wrote
the commented-out translation steps:
1-grams: ('X', 12) ('N', 11) ('G', 9) ('V', 9) ('K', 8) ('A', 8) ('Y', 6) 2-grams: ('NX', 6) ('K ', 5) (' A', 4) (' N', 4) ('NK', 3) (' H', 3) 3-grams: ('A M', 2) ('VNX', 2) (' HY', 2) ('AYG', 2) (' AY', 2) 1-words: V V 2-words: NK NK NA NA AZ 3-words: HYZ PZQ YNK AYG KQX VXD AYG V MVXEGC NK V RGIIZH HYZ IGXDK PZQ YNK QLMCGIIV HYGX AYG KQX NK KYNXNXO VXD HZXAK NA MVUE AYG LNXQAG NA MGONXK AZ CVNX. LVCE AHVNX a banEer is a ReIIoH HYo IenDs PoQ Yis QLbreIIa HYen tYe sQn is sYining anD Honts it baUE tYe LinQte it begins to rain. LarE tHain