Identifying Anagrams

April 28, 2015

Our first solution sorts the words and compares their “signatures:”

(define (anagram1 str1 str2)
  (and (not (string=? str1 str2))
       (equal? (sort charlist str1))
               (sort charlist str2)))))

> (anagram1 "DEPOSIT" "DOPIEST")
#t
> (anagram1 "STAR" "MOON")
#f
> (anagram1 "ZEBRA" "ZEBRA")
#f

Wherever there is a sorting solution to a problem, there is also generally a searching solution also. Here, we increment an array of character counts when reading the first string, decrement it when reading the second string, and check that all the counts are zero:

(define (anagram2 str1 str2)
  (let ((counts (make-vector 256 0)))
    (define (add idx n)
      (vector-set! counts idx
        (+ (vector-ref counts idx) n)))
    (do ((cs (string->list str1) (cdr cs))) ((null? cs))
      (add (char->integer (car cs)) 1))
    (do ((cs (string->list str2) (cdr cs))) ((null? cs))
      (add (char->integer (car cs)) -1))
    (let loop ((i 65))
      (cond ((= i 91) (not (string=? str1 str2)))
            ((not (zero? (vector-ref counts i))) #f)
            (else (loop (+ i 1)))))))

> (anagram2 "DEPOSIT" "DOPIEST")
#t
> (anagram2 "STAR" "MOON")
#f
> (anagram2 "ZEBRA" "ZEBRA")
#f

Our third solution maps each letter to a prime number, takes the product of the primes in each word, and compares them; beware of overflow in languages that don’t provide big integers natively:

(define (anagram3 str1 str2)
  (let ((letters (vector 2 3 5 7 11 13 17 19 23 29 31
         37 41 43 47 53 59 61 67 71 73 79 83 89 97 101)))
    (define (lookup c)
      (vector-ref letters (- (char->integer c) 65)))
    (and (not (string=? str1 str2))
         (= (apply * (map lookup (string->list str1)))
            (apply * (map lookup (string->list str2)))))))

> (anagram3 "DEPOSIT" "DOPIEST")
#t
> (anagram3 "STAR" "MOON")
#f
> (anagram3 "ZEBRA" "ZEBRA")
#f

The first solution has time complexity O(n log n) for the sort; the other two are both O(n). But given its simplicity, I would probably prefer the first solution over the other two, changing to the second solution only if the first solution proved to be a bottleneck on my program. You can run the program at http://ideone.com/p1SAvl.

Advertisement

Pages: 1 2

14 Responses to “Identifying Anagrams”

  1. Haven’t done the prime number version but the first two were the solutions I went for – both in perl tho’

    sub m1 {
      my( $a,$b ) = @_;
      return (join q(), sort split q(),$a) eq (join q(), sort split q(),$b) ? 1 : 0
    }
    
    sub m2 {
      my($a,$b)=@_;
      my %x;
      $x{$_}++ foreach split q(), $a;
      $x{$_}-- foreach split q(), $b;
      return 0 if grep { $_ } values %x;
      return 1;
    }
    
    print m1(@ARGV),"\n";
    print m2(@ARGV),"\n";
    
  2. Paul said

    In Python.

    from collections import Counter
    
    def is_anagram(a, b):
        return sorted(a) == sorted(b)
    
    def is_anagram(a, b):
        return Counter(a) == Counter(b)
    
  3. Rutger said

    Same solutions as Paul in Python. Added additional method using mapping onto primes and taking the product.

    from operator import mul
    
    enough_primes_to_index = [num for num in range(3, 4000, 2) if all(num % x for x in range(3, int(1 + num**0.5), 2))]
    
    def is_anagram_3(a, b):
    	return reduce(mul, [enough_primes_to_index[ord(x)] for x in a]) == reduce(mul, [enough_primes_to_index[ord(x)] for x in b])
    
    
  4. svenningsson said

    First version creates a bag out of each string and compares the bags for equality. The second solution removes all characters in the first string from the second string and checks that it results in the empty string, and vice versa.

    import Data.List
    import qualified Data.Map as M
    
    anagram1 s1 s2 = toBag s1 == toBag s2
      where toBag = foldr (\c -> M.insertWith (+) c 1) M.empty
    
    anagram2 s1 s2 = null (s1 \\ s2) && null (s2 \\ s1)
    
  5. matthew said

    Just one solution for the moment: a variant on the sorting method using two heaps. We save on work in the case that the two strings aren’t anagrams:

    #include <algorithm>
    #include <iostream>
    #include <string.h>
    
    bool isanag(char *a, char *b) {
      size_t n = strlen(a);
      if (n != strlen(b)) return false;
      std::make_heap(a,a+n);
      std::make_heap(b,b+n);
      for ( ; n > 0; n--) {
        if (a[0] != b[0]) return false;
        std::pop_heap(a,a+n);
        std::pop_heap(b,b+n);
      }
      return true;
    }
    
    int main(int argc, char *argv[]) {
      std::cout << isanag(argv[1],argv[2]) << "\n";
    }
    
  6. matthew said

    Here’s another one: generate all possible anagrams of a, and see if any are equal to b:

    #include <algorithm>
    #include <iostream>
    #include <string.h>
    
    bool isanag(char *a, const char *b) {
      size_t n = strlen(a);
      std::sort(a,a+n);
      while(true) {
        if (strcmp(a,b) == 0) return true;
        if (!std::next_permutation(a,a+n)) return false;
      }
    }
    
    int main(int argc, char *argv[]) {
      std::cout << isanag(argv[1],argv[2]) << "\n";
    }
    

    Not the most efficient way of solving the problem, but has a pleasant simplicity about it (and isanag only destructively modifies one of its arguments now).

  7. matthew said

    Last one, pack the counts into a single 64 bit number. There are only 2 bits per character so eg. “AAAAC” is considered an anagram of “BBBBB”, but we always correctly detect a true anagram:

    #include <iostream>
    #include <stdint.h>
    
    typedef uint64_t T;
    T sign(char *a) {
      T s = 0;
      for ( ; *a; a++) {
        s += T(1)<<(2*(*a-'A'));
      }
      return s;
    }
    
    int main(int argc, char *argv[]) {
      std::cout << (sign(argv[1]) == sign(argv[2])) << "\n";
    }
    
  8. John Cowan said

    Your second solution doesn’t work too well on a Unicode system: you’d need an array of size 1,114,112. However, a hash table or similar map from characters to integers is the same in spirit.

  9. fisherro said

    #include <map>
    #include <string>
    #include <algorithm>
    #include <iostream>

    void normalize(std::string& s)
    {
    s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace));
    s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end());
    std::transform(s.begin(), s.end(), s.begin(), ::toupper);
    }

    bool common(std::string& a, std::string& b)
    {
    normalize(a);
    normalize(b);
    if (a.size() != b.size()) return false;
    if (a == b) return false;
    return true;
    }

    std::map<char, int> analyze(std::string word)
    {
    std::map<char, int> data;
    for (auto c: word) {
    auto iter = data.find(c);
    if (data.end() == iter) data[c] = 1;
    else ++(iter->second);
    }
    return data;
    }

    bool are_anagrams1(std::string a, std::string b)
    {
    if (!common(a, b)) return false;
    return analyze(a) == analyze(b);
    }

    bool are_anagrams2(std::string a, std::string b)
    {
    if (!common(a, b)) return false;
    std::sort(a.begin(), a.end());
    std::sort(b.begin(), b.end());
    return a == b;
    }

    void test(const std::string& a, const std::string& b, std::ostream& out)
    {
    out << a << " and " << b << ": " <<
    are_anagrams1(a, b) << ", " <<
    are_anagrams2(a, b) << ‘\n’;
    }

    int main(int argc, char** argv)
    {
    std::cout.setf(std::ios_base::boolalpha);
    test("deposit ", " dopiest", std::cout);
    test("STOP", "pots", std::cout);
    test("rite", "write", std::cout);
    test("right", "write", std::cout);
    test("same", "same", std::cout);
    }

  10. fisherro said

    Sorry for the bad formatting in my previous post. Is there a way to delete it?

    #include <map>
    #include <string>
    #include <algorithm>
    #include <iostream>
    
    void normalize(std::string& s)
    {
        s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace));
        s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end());
        std::transform(s.begin(), s.end(), s.begin(), ::toupper);
    }
    
    bool common(std::string& a, std::string& b)
    {
        normalize(a);
        normalize(b);
        if (a.size() != b.size()) return false;
        if (a == b) return false;
        return true;
    }
    
    std::map<char, int> analyze(std::string word)
    {
        std::map<char, int> data;
        for (auto c: word) {
            auto iter = data.find(c);
            if (data.end() == iter) data[c] = 1;
            else ++(iter->second);
        }
        return data;
    }
    
    bool are_anagrams1(std::string a, std::string b)
    {
        if (!common(a, b)) return false;
        return analyze(a) == analyze(b);
    }
    
    bool are_anagrams2(std::string a, std::string b)
    {
        if (!common(a, b)) return false;
        std::sort(a.begin(), a.end());
        std::sort(b.begin(), b.end());
        return a == b;
    }
    
    void test(const std::string& a, const std::string& b, std::ostream& out)
    {
        out << a << " and " << b << ": " <<
            are_anagrams1(a, b) << ", " <<
            are_anagrams2(a, b) << '\n';
    }
    
    int main(int argc, char** argv)
    {
        std::cout.setf(std::ios_base::boolalpha);
        test("deposit ", " dopiest", std::cout);
        test("STOP", "pots", std::cout);
        test("rite", "write", std::cout);
        test("right", "write", std::cout);
        test("same", "same", std::cout);
    }
    
  11. Scott said
    
    def anagram?(a, b)
        dict = ('a'..'z').each_with_object({}) { |v,h| h[v] = 0 }
        a.split('').each{|let| dict[let] += 1}
        b.split('').each{|let| dict[let] -= 1}
        dict.values.select{|v| v != 0}.empty?
    end
    
    a, b = 'stop', 'pots'
    
    puts anagram?(a, b) # Approach 1
    puts a.chars.sort.join == b.chars.sort.join # Approach 2
    
    
  12. My discussion and solution in Java here http://www.capacode.com/?p=7

  13. mcmillhj said
    fun merge lt (xs, ys) = let
        fun loop(out, [], []) = List.rev out
          | loop(out, x::xs, []) = loop (x::out, xs, [])
          | loop(out, [], y::ys) = loop (y::out, [], ys)
          | loop(out, left as x::xs, right as y::ys) =
                    if lt (x, y) then loop (x::out, xs, right)
                    else loop (y::out, left, ys)
    in
        loop([], xs, ys)
    end
    
    fun mergesort lt xs = let
        val merge' = merge lt
        (* splits a list into two semi-equal halves in Linear time *)
        fun split ns = let 
            fun loop([], xs, ys) = (xs, ys)
              | loop(x::y::zs, xs, ys) = loop(zs, x::xs, y::ys)
              | loop(x::[], xs, ys) = loop([], x::xs, ys)
        in
            loop(List.rev(ns), [], [])
        end      
        fun ms []  = []
          | ms [x] = [x]
          | ms xs = let
              val (l, r) = split xs
          in
              merge'(ms l, ms r)
          end
    in
        ms xs
    end
    
    fun anagram1 a b = if a = b then true else let
        val a_list = explode a
        val b_list = explode b
        val ms     = mergesort (op <) 
    in
        (ms a_list) = (ms b_list)
    end
    
    fun anagram2 a b = let
        val array = Array.tabulate(128, fn _ => 0)
    in
        CharVector.app (fn char => Array.update (array, ord char, Array.sub (array, ord char) + 1)) a;
        CharVector.app (fn char => Array.update (array, ord char, Array.sub (array, ord char) - 1)) b;
        Array.all (fn e => e = 0) array
    end
    
  14. wert310 said
    import Data.List
    
    isAnagram s1 s2 = and $ map (`elem` s1) s2
    isAnagram' s1 s2 = (sort s1) == (sort s2)
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: