Creation

March 3, 2009

We first save the cipher-text as a string:

(define cipher-text
  (list->string (map integer->char
    '(14 11 78 17 27 12 ... 4 26 12 28 7 93))))

Since we know the password is fixed and has length n, we know that every nth character in the plain-text has been enciphered with the same character of the password. Thus, our strategy is to break down the cipher-text into n stripes, assume that the most frequently-occurring character in each stripe represents the space character, and use xor to find the password character for that stripe. We try various n in turn until one works, giving us sensible English-language text.

(define (try-pass n text)
   (let loop ((i 0) (pwd '()))
     (if (= i n)
         (list->string (reverse pwd))
         (loop (+ i 1) (cons (char-xor #\space
           (caar (freq (extract n i text)))) pwd)))))

Try-pass uses extract to get a stripe, freq to find the most common character, and char-xor to perform the xor operation:

(define (char-xor a b)
  (define (xor a b)
    (cond ((eq? a b) 0)
          ((zero? a) b)
          ((zero? b) a)
          (else (+ (if (eq? (even? a) (even? b)) 0 1)
                   (* (xor (quotient a 2) (quotient b 2)) 2)))))
  (integer->char (xor (char->integer a) (char->integer b))))

(define (extract span mod text)
  (let loop ((i 0) (ts '()))
    (cond ((= (string-length text) i)
            (list->string (reverse ts)))
          ((= (modulo i span) mod)
            (let ((t (string-ref text i)))
              (loop (+ i 1) (cons t ts))))
          (else (loop (+ i 1) ts)))))

(define (freq text)
  (if (string=? "" text) '()
    (let ((txt (sort charlist text))))
      (let loop ((txt (cdr txt)) (prev (car txt)) (cnt 1) (freqs '()))
        (cond ((null? txt)
                (sort (lambda (x y) (> (cdr x) (cdr y)))
                  (cons (cons prev cnt) freqs)))
              ((char=? (car txt) prev)
                (loop (cdr txt) prev (+ cnt 1) freqs))
              (else (loop (cdr txt) (car txt) 1
                          (cons (cons prev cnt) freqs))))))))

All that’s left is to apply the password to the cipher-text to read the plain-text:

(define (crypt password text)
  (define (cycle xs) (set-cdr! (last-pair xs) xs) xs)
  (let loop ((pwd (cycle (string->list password))) (txt (string->list text)) (out '()))
    (if (null? txt) (list->string (reverse out))
      (loop (cdr pwd) (cdr txt) (cons (char-xor (car pwd) (car txt)) out)))))

We try several password lengths, in turn:

> (try-pass 1 cipher-text)
"e"
> (try-pass 2 cipher-text)
"ee"
> (try-pass 3 cipher-text)
"ess"
> (try-pass 4 cipher-text)
"see "
> (try-pass 5 cipher-text)
"seees"
> (try-pass 6 cipher-text)
"eseess"
> (try-pass 7 cipher-text)
"Genesis"

And that works. The password is “Genesis” and the plain-text is the story of Creation from the beginning of the Holy Bible:

> (crypt "Genesis" cipher-text)
"In the beginning ... creation."

Note that the same procedure that performs encryption also performs decryption, since xor is a reflexive operator; thus the cipher-text was created like this:

> (define cipher-text (crypt "Genesis" plain-text))

The decryption program is available at http://programmingpraxis.codepad.org/N8fi45eN.

About these ads

Pages: 1 2 3

8 Responses to “Creation”

  1. Jebb said

    Can’t believe nobody else commented on this… That one was a lot of fun!
    I took a different approach from the proposed Scheme solution, went for a dictionary attack:

    #!/usr/bin/python
    
    def read_cipher(textFile):
    	'''Takes the name of the file containing the encrypted text as argument,
    	returns the list of ascii ordinals'''
    	f = open(textFile, 'r')
    	returnList = [int(a) for a in f.read().split()]
    	f.close()
    	return returnList
    
    def decrypt(ordList, password):
    	'''This function XORs a text with the code (both as list of ordinals!!)
    	and returns the encoded/decoded list of ordinals'''
    	from itertools import cycle
    	decryptedList = [a^ord(b) for a,b in zip(ordList, cycle(password))]
    	return decryptedList
    
    def make_attack_dictionary(dictFile):
    	f = open(dictFile,'r')
    	returnDict = attack_dictionary(f.read().split())
    	f.close()
    	return returnDict
    
    def attack_dictionary(rawDict):
    	'''Takes a list of words as argument, and produces an attack dictionary
    	from it: with added case variations (also capitalizes the words that
    	only exist as small case in the initial list), and sorted by length'''
    	from string import capitalize
    	# Generate a dict of already capitalized words in the raw dictionary
    	capWords = {}
    	for word in rawDict:
    		if word == capitalize(word):
    			capWords[word] = 1
    	# Now capitalize the rest of them; this works in O(n) because capWords
    	# is a dict
    	complementDict = [capitalize(word) for word in rawDict if not word in capWords]
    	# Finally, merge and sort the attack dictionary by word length
    	return sorted(rawDict + complementDict, key=len)
    
    def words_found(text, dictionary):
    	'''Returns the number of known words (from dictionary) found in text'''
    	from re import findall
    	wordCount = 0
    	for word in findall(r'\w\w+', text): 
    		if word in dictionary:
    			wordCount +=1
    	return wordCount
    
    def attack(cipherText):
    	'''Performs a dictionary attack on an encrypted text, passed as a
    	list of integers'''
    	attackDict = make_attack_dictionary('/usr/share/dict/words')
    # We'll also build a dict purely for performance reasons
    	compareDict = {}
    	for word in attackDict:
    		compareDict[word] = 1
    # Tweak these parameters for the dictionary attack
    	attackDepth = 50
    	attackLengthMax = 8
    	attackThreshold = 6
    # Now try all passwords of length <= attackLengthMax,
    # scanning the first attackDepth characters in cipherText
    # and print something when finding more than attackThreshold known words
    	for password in attackDict:
    		if len(password) > attackLengthMax: break
    		decryptedText = "".join([chr(a) 
    				for a in decrypt(cipherText[:attackDepth], password)])
    		wordsFound = words_found(decryptedText, compareDict)
    		if wordsFound >= attackThreshold:
    			print '%d matches found with %s' % (wordsFound, password)
    
    if __name__ == '__main__':
    	cipherText = read_cipher('text')
    	attack(cipherText)
    	print 'Password to try?'
    	password = raw_input()
    	print "".join([chr(a) for a in decrypt(cipherText, password)])
    

    Basically, I start by building an attack dictionary, sorted by length. Then I brute-force my way through this attack dictionary, trying to decrypt the beginning of the encrypted text using each one of the possible passwords, and I count the number of real words I can find in the decrypted version.

    Of course it could probably be automated completely by simply returning the password yielding the highest number of real words, but it works for me like that. Running it as it is shown here (scanning the first 50 letters of the encrypted text, and trying passwords up to 8 letters long), it shows me the list of following possible keys (only showing the keys producing 6 or more known words):
    6 matches found with imber
    6 matches found with infer
    6 matches found with inker
    6 matches found with ceresin
    6 matches found with chooser
    6 matches found with geronto
    6 matches found with oenolin
    6 matches found with Allower
    6 matches found with Ceresin
    6 matches found with Chooser
    6 matches found with Foresin
    8 matches found with Genesis
    6 matches found with Geronto
    6 matches found with Getaway
    6 matches found with Subjoin
    6 matches found with Thrower
    6 matches found with Vetiver

  2. Graham said

    I followed the ideas of the solution here, but wished to automate the process
    a bit more. I had my code try different passwords (given by assuming that space
    is the most common character), checking those words against membership in my
    system’s dict file. It outputs all possibilities, so it still requires a human
    eye to sort through the gobbledygook.

    #!/usr/bin/env python
    
    from collections import Counter
    from pprint import pprint
    
    def split_n(seq, n):
        """Split sequence into groupings by every nth item"""
        return (seq[i::n] for i in xrange(n))
    
    def most_freq(seq):
        """Most frequent item in sequence"""
        return Counter(seq).most_common(1)[0][0]
    
    def pass_n(c_text, n):
        """Assume that most frequent item is a space (with ord 32); return a
        password of length n from those most frequent items in """
        most_freqs = (most_freq(seq) for seq in split_n(c_text, n))
        return ''.join(chr(x ^ 32) for x in most_freqs)
    
    def decipher(c_text, password):
        """Given a password, use it to give clear text"""
        return ''.join(chr(x ^ ord(y)) for (x, y) in zip(c_text,
            password * (len(c_text) / len(password))))
    
    def decrypt(c_text, dictionary):
        """Given a dictionary of words, try successive ns in pass_n until the
        password is in dictionary. Then, return deciphered text via decipher."""
        possibles = []
        for n in xrange(len(max(dictionary, key=lambda word: len(word)))):
            password = pass_n(c_text, n)
            if password.lower() in dictionary:
                possibles.append((n, password, decipher(c_text, password)))
        return possibles
    
    if __name__ == "__main__":
        with open("creation_ctext.txt") as C_FILE:  # Numbers in single line file
            C_TEXT = [int(x) for x in C_FILE.read().split()]
            with open("/usr/share/dict/web2") as DICT_FILE:
                DICTIONARY = set(line.strip() for line in DICT_FILE.readlines())
                pprint(decrypt(C_TEXT, DICTIONARY))
    
  3. programmingpraxis said

    If you are really interested in automating this program fully, Google for ‘Kasiski examination’ and go from there. Like a lot of these things, it’s named for the wrong man; Charles Babbage figured it out twenty years before Kasiski.

  4. Graham said

    Interesting stuff! It’s reminiscent of some of Simon Singh’s “The Code Book,” a decent source and fun read.

  5. kawas said

    Like others I want to automate the process as much as possible.
    So my slightly different implementation returns a password and a “confidence number” based on the frequencies of “” and “e”.
    Then we can search for all passwords given a maximum length and a confidence threshold.

    Clojure code:

    (defn read-cipher [filepath]
      (map #(Integer/parseInt %) (seq (.split (slurp filepath) "\\s+"))))
    
    (defn stripe [text n]
      (for [i (range 0 (count text) n)] (nth text i)))
    
    (defn rsort-freq [text]
      (reverse (sort-by second (frequencies text))))
    
    (defn char-xor [ord1 ord2]
      (char (bit-xor ord1 ord2)))
    
    (defn guess-pass [text nchar]
      (loop [text text pwd [] same 0]
        (if (= nchar (count pwd))
          (vector (apply str pwd) (float (/ same nchar)))
          (let [strp (stripe text nchar)
                [[ord1 _] [ord2 _]] (take 2 (rsort-freq strp))
                letter1 (char-xor (int \space) ord1)
                letter2 (char-xor (int \e) ord2)
                same (if (= letter1 letter2) (inc same) same)]
            (recur (next text) (conj pwd letter1) same)))))
    
    (defn search-pass [filepath max-nchar confidence]
      (let [text (read-cipher filepath)]
        (filter #(< confidence (second %))
          (map #(guess-pass text %) (range 1 (inc max-nchar))))))
    

    For example, we can search for all passwords less than 20 chars with at least 80% confidence:

    user=> (search-pass "praxis/crypted.txt" 20 0.8)
    (["Genesis" 0.85714287])
    

    This one is funny:

    user=> (search-pass "praxis/crypted.txt" 20 0.6)
    (["ess" 0.6666667] ["Genesis" 0.85714287] ["GenesisGenesis" 0.78571427])
    
  6. […] In their book Software Tools, Brian Kernighan and P. J. Plauger describe a simple command for encrypting files. It works by xor-ing each byte of a file with a byte of the key, extending the key cyclically until it is the same length as the text. The xor operation is symmetric, so only one program is needed to perform both encryption and decryption. This isn’t a particularly secure encryption algorithm; we showed how to break it in one of our earliest exercises. […]

  7. simonliu2008 said

    I had the similar ideas with Graham, and I improve my script when I saw Graham’s since I am not familiar with Python.

    from collections import Counter

    import string

    def crack(text, n):
    pwd = []

    for seq in [text[i::n] for i in xrange(n)]:
    p = chr(ord(‘ ‘) ^ int(Counter(seq).most_common(1)[0][0]))
    if is_pwd_char(p):
    pwd.append(p)
    else:
    return None

    return pwd

    def is_pwd_char(ch):
    return ch in string.digits or ch in string.ascii_letters

    def decrypt(text, pwd):
    return [chr(ord(pwd[i % len(pwd)]) ^ int(x)) for (i, x) in enumerate(text)]

    if __name__ == ‘__main__':
    with open(‘/tmp/cipher-text’) as f:
    text = f.read().split()
    for i in range(1, 21):
    pwd = crack(text, i)
    if pwd != None:
    print ‘decrypt with pwd: %s’ % pwd
    print ”.join(decrypt(text, pwd))

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

Follow

Get every new post delivered to your Inbox.

Join 643 other followers

%d bloggers like this: