Monte Carlo Factorization

June 19, 2009

Though the explanation is lengthy, the code is agreeably short:

(define (factor n . c)
  (define (f x c) (modulo (+ (* x x) c) n))
  (let ((c (if (pair? c) (car c) 1)))
    (let loop ((x 2) (y 2) (d 1))
      (cond ((= d 1)
              (let ((x (f x c)) (y (f (f y c) c)))
                (loop x y (gcd (- x y) n))))
            ((= d n) (factor n (+ c 1)))
            (else d)))))

Factor finds a single factor of n. The pseudo-random sequence is generated by f. The main loop starts with x = y = 2; it loops if the greatest common divisor d is 1, restarts with the next c if d is n, and otherwise reports d as a factor of n.

Pollard’s method works only if n is composite, and the factor function finds only a single factor. Factors, shown below, finds all the factors of n; factors is recursive, stopping when n is prime:

(define (factors n)
  (sort < (let fact ((n n) (fs '()))
    (cond ((= n 1) fs)
          ((even? n) (fact (/ n 2) (cons 2 fs)))
          ((prime? n) (cons n fs))
          (else (let ((f (factor n)))
                  (append fs (fact f '()) (fact (/ n f) '()))))))))

Factors uses the prime? function, and its companion check?, from the exercise on the Rabin-Miller primality checker. The factors of 298 – 1 = 316912650057057350374175801343 are 3, 43, 127, 4363953127297 and 4432676798593. You can run the program at http://programmingpraxis.codepad.org/4mQExWYY.

Pages: 1 2

5 Responses to “Monte Carlo Factorization”

  1. […] Praxis – Monte Carlo factorization By Remco Niemeijer In today’s Programming Praxis problem we have to implement John Pollard’s factorization algorithm. Our […]

  2. Remco Niemeijer said

    My Haskell solution (see http://bonsaicode.wordpress.com/2009/06/19/programming-praxis-monte-carlo-factorization/ for a version with comments):

    import Control.Arrow
    import Data.Bits
    import Data.List
    import System.Random

    isPrime :: Integer -> StdGen -> Bool
    isPrime n g =
    let (s, d) = (length *** head) . span even $ iterate (`div` 2) (n-1)
    xs = map (expm n d) . take 50 $ randomRs (2, n – 2) g
    in all (\x -> elem x [1, n – 1] ||
    any (== n-1) (take s $ iterate (expm n 2) x)) xs

    expm :: Integer -> Integer -> Integer -> Integer
    expm m e b = foldl’ (\r (b’, _) -> mod (r * b’) m) 1 .
    filter (flip testBit 0 . snd) .
    zip (iterate (flip mod m . (^ 2)) b) $
    takeWhile (> 0) $ iterate (`shiftR` 1) e

    factor :: Integer -> Integer -> Integer
    factor c n = factor’ 2 2 1 where
    f x = mod (x * x + c) n
    factor’ x y 1 = factor’ x’ y’ (gcd (x’ – y’) n) where
    (x’, y’) = (f x, f $ f y)
    factor’ _ _ d = if d == n then factor (c + 1) n else d

    factors :: Integer -> StdGen -> [Integer]
    factors n g = sort $ fs n where
    fs x | even x = 2 : fs (div x 2)
    | isPrime x g = [x]
    | otherwise = f : fs (div x f) where f = factor 1 x

    main :: IO ()
    main = print . factors (2^98 – 1) =<< getStdGen [/sourcecode]

  3. Here’s my attempt in Python. A couple of issues in the code remain. The factors that it discovers aren’t guaranteed to be prime. I cribbed the Miller-Rabin test from one of the python code repositories. And, I don’t really understand exactly how this works. :-) Back to the reference books.

    #!/usr/bin/env python
    
    #
    # a basic implementation of the Pollard rho factorization
    # Written by Mark VandeWettering <markv@pixar.com>
    #
    
    import sys
    import locale
    import random
    
    class FactorError(Exception):
        def __init__(self, value):
            self.value = value
        def __str__(self):
            return repr(self.value)
    
    def miller_rabin_pass(a, n):
        d = n - 1
        s = 0
        while d % 2 == 0:
            d >>= 1
            s += 1
    
        a_to_power = pow(a, d, n)
        if a_to_power == 1:
            return True
        for i in xrange(s-1):
            if a_to_power == n - 1:
                return True
            a_to_power = (a_to_power * a_to_power) % n
        return a_to_power == n - 1
    
    def isprime(n):
        for repeat in xrange(20):
            a = 0
            while a == 0:
                a = random.randrange(n)
            if not miller_rabin_pass(a, n):
                return False
        return True
    
    def gcd(a, b):
        while b != 0:
            a, b = b, a%b
        return a
    
    def findfactor(n):
        for c in range(1, 50):
            x = y = random.randint(1, n-1)
            x = (x * x + c) % n
            y = (y * y + c) % n
            y = (y * y + c) % n
            while True:
                t = gcd(n, abs(x-y))
                if t == 1:
                    x = (x * x + c) % n
                    y = (y * y + c) % n
                    y = (y * y + c) % n
                elif t == n:
                    break
                else:
                    return t
        raise FactorError("couldn't find a factor.")
    
    def factor(n):
        r = []
        while True:
            if isprime(n):
                r.append(n)
                break
            try:
                f = findfactor(n)
                r.append(f)
                n = n / f
            except FactorError, msg:
                r.append(n)
                break
        r.sort()
        return r
    
    def doit(n):
            flist = factor(n)
            print locale.format("%d", n, True), "="
            for f in flist:
                    print "\t%s" % locale.format("%d", f, True)
    
    locale.setlocale(locale.LC_ALL, "")
    
    doit(2**98-1)
    
    
  4. Okay, I fixed a couple of things, and extended the program a tiny bit. It now is a numeric calculator of sorts. It’s not industrial strength or anything, but you can basically type any python numeric expression, and it will use eval() (at least with a predefined environment) to evaluate the number. I’ve also predefined a couple of built in functions. prime(n) will return an n digit prime. rsa(n) will return an rsa key which is the combination of two n/2 digit primes. factor(n) factors n. I’ve also added code to do some trial division as well, to get rid of small factors, and it collapses multiple occurrences of a factor (instead of printing 128 copies of 2 when factoring 2^128, it outputs “2**128”).

    #!/usr/bin/env python
    
    #
    # a basic implementation of the Pollard rho factorization
    # Written by Mark VandeWettering <markv@pixar.com>
    #
    
    import sys
    import locale
    import random
    try:
        import readline
    except ImportError, msg:
        print msg
        print "Line editing disabled."
    
    
    # an inefficient but straightforward way to find primes...
    
    def primes(n):
        primes = [2]
        for x in range(3, n, 2):
            prime = True
            for p in primes:
                if p * p > n:
                    break
                if x % p == 0:
                    # it's composite..
                    prime = False
                    break
            if prime:
                primes.append(x)
        return primes
    
    class FactorError(Exception):
        def __init__(self, value):
            self.value = value
        def __str__(self):
            return repr(self.value)
    
    def miller_rabin_pass(a, n):
        d = n - 1
        s = 0
        while d % 2 == 0:
            d >>= 1
            s += 1
    
        a_to_power = pow(a, d, n)
        if a_to_power == 1:
            return True
        for i in xrange(s-1):
            if a_to_power == n - 1:
                return True
            a_to_power = (a_to_power * a_to_power) % n
        return a_to_power == n - 1
    
    def isprime(n):
        for repeat in xrange(20):
            a = 0
            while a == 0:
                a = random.randrange(n)
            if not miller_rabin_pass(a, n):
                return False
        return True
    
    def gcd(a, b):
        while b != 0:
            a, b = b, a%b
        return a
    
    def findfactor(n):
        for c in range(1, 10):
            x = y = random.randint(1, n-1)
            x = (x * x + c) % n
            y = (y * y + c) % n
            y = (y * y + c) % n
            while True:
                t = gcd(n, abs(x-y))
                if t == 1:
                    x = (x * x + c) % n
                    y = (y * y + c) % n
                    y = (y * y + c) % n
                elif t == n:
                    break
                else:
                    return t
        raise FactorError("couldn't find a factor.")
    
    def factor(n):
        r = []
        for p in primes(10000):
            while n % p == 0:
                r.append(p)
                n = n / p
        if n == 1:
            return r
        while True:
            if isprime(n):
                r.append(n)
                break
            try:
                f = findfactor(n)
                r.append(f)
                n = n / f
            except FactorError, msg:
                r.append(n)
                break
        r.sort()
        return r
    
    # this function would be easier to write recursively, but
    # python isn't good at tail recursion, so in theory, it could
    # fail.  Too bad.
    
    def shorten(flist):
        slist = []
        idx = 0
        while flist[idx:] != []:
            hd = flist[idx]
            idx = idx + 1
            exp = 1
            while flist[idx:] != [] and flist[idx] == hd:
                exp = exp + 1
                idx = idx + 1
            if exp > 1:
                    slist.append(locale.format("%d", hd, True) + "**"+str(exp))
            else:
                    slist.append(locale.format("%d", hd, True))
        return slist
    
    def factorit(n):
            flist = factor(n)
            print locale.format("%d", n, True), "="
            for f in shorten(flist):
                    print "\t%s" % f
    
    locale.setlocale(locale.LC_ALL, "")
    
    import string
    
    def prime(n):
            "generate an n bit prime"
            while True:
                    x = int(''.join([random.choice(string.digits) for i in range(n)]))
                    if isprime(x):
                            return x
    
    def rsapair(n):
            return prime(n//2)*prime(n//2)
    
    env = { "prime" : prime,
            "rsa" : rsapair,
            "factor" : factorit}
    
    while True:
            try:
                    num = raw_input("enter a python numexpr >> ")
                    n = eval(num, env)
                    if n:
                            print n
            except NameError, msg:
                    print >> sys.stderr, msg
            except SyntaxError, msg:
                    print >> sys.stderr, msg
            except KeyboardInterrupt, msg:
                    print >> sys.stderr, "**Interrupted**"
                    continue
            except EOFError, msg:
                    print
                    break
    
  5. programmingpraxis said

    Instead of eval, you might want to build your own calculator. See the very first Programming Praxis exercise for an RPN calculator.

Leave a comment