Identifying Anagrams

April 28, 2015

Two words are anagrams if they consist of the same letters, with the same number of occurrences, in a different order. For instance, DEPOSIT and DOPIEST are anagrams (aren’t you glad you know that), and OPTS, POTS, TOPS and STOP form an anagram class.

Your task is to write a program that takes two strings as input and determines whether or not they are anagrams; you may assume that the strings consist of only the letters A through Z in upper case. You must provide at least two different algorithms that work in fundamentally different ways. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.

Posted by programmingpraxis

Filed in Exercises

14 Comments »

14 Responses to “Identifying Anagrams”

James Curtis-Smith said

April 28, 2015 at 9:12 AM

Haven’t done the prime number version but the first two were the solutions I went for – both in perl tho’

sub m1 {
  my( $a,$b ) = @_;
  return (join q(), sort split q(),$a) eq (join q(), sort split q(),$b) ? 1 : 0
}

sub m2 {
  my($a,$b)=@_;
  my %x;
  $x{$_}++ foreach split q(), $a;
  $x{$_}-- foreach split q(), $b;
  return 0 if grep { $_ } values %x;
  return 1;
}

print m1(@ARGV),"\n";
print m2(@ARGV),"\n";

Paul said

April 28, 2015 at 10:47 AM

In Python.

from collections import Counter

def is_anagram(a, b):
    return sorted(a) == sorted(b)

def is_anagram(a, b):
    return Counter(a) == Counter(b)

Rutger said

April 28, 2015 at 1:52 PM

Same solutions as Paul in Python. Added additional method using mapping onto primes and taking the product.

from operator import mul

enough_primes_to_index = [num for num in range(3, 4000, 2) if all(num % x for x in range(3, int(1 + num**0.5), 2))]

def is_anagram_3(a, b):
	return reduce(mul, [enough_primes_to_index[ord(x)] for x in a]) == reduce(mul, [enough_primes_to_index[ord(x)] for x in b])

svenningsson said
April 28, 2015 at 3:53 PM
First version creates a bag out of each string and compares the bags for equality. The second solution removes all characters in the first string from the second string and checks that it results in the empty string, and vice versa.
```
import Data.List
import qualified Data.Map as M

anagram1 s1 s2 = toBag s1 == toBag s2
  where toBag = foldr (\c -> M.insertWith (+) c 1) M.empty

anagram2 s1 s2 = null (s1 \\ s2) && null (s2 \\ s1)
```

matthew said

April 28, 2015 at 9:27 PM

Just one solution for the moment: a variant on the sorting method using two heaps. We save on work in the case that the two strings aren’t anagrams:

#include <algorithm>
#include <iostream>
#include <string.h>

bool isanag(char *a, char *b) {
  size_t n = strlen(a);
  if (n != strlen(b)) return false;
  std::make_heap(a,a+n);
  std::make_heap(b,b+n);
  for ( ; n > 0; n--) {
    if (a[0] != b[0]) return false;
    std::pop_heap(a,a+n);
    std::pop_heap(b,b+n);
  }
  return true;
}

int main(int argc, char *argv[]) {
  std::cout << isanag(argv[1],argv[2]) << "\n";
}

matthew said

April 28, 2015 at 9:54 PM

Here’s another one: generate all possible anagrams of a, and see if any are equal to b:

#include <algorithm>
#include <iostream>
#include <string.h>

bool isanag(char *a, const char *b) {
  size_t n = strlen(a);
  std::sort(a,a+n);
  while(true) {
    if (strcmp(a,b) == 0) return true;
    if (!std::next_permutation(a,a+n)) return false;
  }
}

int main(int argc, char *argv[]) {
  std::cout << isanag(argv[1],argv[2]) << "\n";
}

Not the most efficient way of solving the problem, but has a pleasant simplicity about it (and isanag only destructively modifies one of its arguments now).

matthew said

April 28, 2015 at 10:48 PM

Last one, pack the counts into a single 64 bit number. There are only 2 bits per character so eg. “AAAAC” is considered an anagram of “BBBBB”, but we always correctly detect a true anagram:

#include <iostream>
#include <stdint.h>

typedef uint64_t T;
T sign(char *a) {
  T s = 0;
  for ( ; *a; a++) {
    s += T(1)<<(2*(*a-'A'));
  }
  return s;
}

int main(int argc, char *argv[]) {
  std::cout << (sign(argv[1]) == sign(argv[2])) << "\n";
}

John Cowan said
April 29, 2015 at 2:30 AM
Your second solution doesn’t work too well on a Unicode system: you’d need an array of size 1,114,112. However, a hash table or similar map from characters to integers is the same in spirit.
fisherro said
April 29, 2015 at 2:34 PM
#include <map>
#include <string>
#include <algorithm>
#include <iostream>

void normalize(std::string& s)
{
s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace));
s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end());
std::transform(s.begin(), s.end(), s.begin(), ::toupper);
}

bool common(std::string& a, std::string& b)
{
normalize(a);
normalize(b);
if (a.size() != b.size()) return false;
if (a == b) return false;
return true;
}

std::map<char, int> analyze(std::string word)
{
std::map<char, int> data;
for (auto c: word) {
auto iter = data.find(c);
if (data.end() == iter) data[c] = 1;
else ++(iter->second);
}
return data;
}

bool are_anagrams1(std::string a, std::string b)
{
if (!common(a, b)) return false;
return analyze(a) == analyze(b);
}

bool are_anagrams2(std::string a, std::string b)
{
if (!common(a, b)) return false;
std::sort(a.begin(), a.end());
std::sort(b.begin(), b.end());
return a == b;
}

void test(const std::string& a, const std::string& b, std::ostream& out)
{
out << a << " and " << b << ": " <<
are_anagrams1(a, b) << ", " <<
are_anagrams2(a, b) << ‘\n’;
}

int main(int argc, char** argv)
{
std::cout.setf(std::ios_base::boolalpha);
test("deposit ", " dopiest", std::cout);
test("STOP", "pots", std::cout);
test("rite", "write", std::cout);
test("right", "write", std::cout);
test("same", "same", std::cout);
}

fisherro said

April 29, 2015 at 2:41 PM

Sorry for the bad formatting in my previous post. Is there a way to delete it?

#include <map>
#include <string>
#include <algorithm>
#include <iostream>

void normalize(std::string& s)
{
    s.erase(s.begin(), std::find_if_not(s.begin(), s.end(), ::isspace));
    s.erase(std::find_if_not(s.rbegin(), s.rend(), ::isspace).base(), s.end());
    std::transform(s.begin(), s.end(), s.begin(), ::toupper);
}

bool common(std::string& a, std::string& b)
{
    normalize(a);
    normalize(b);
    if (a.size() != b.size()) return false;
    if (a == b) return false;
    return true;
}

std::map<char, int> analyze(std::string word)
{
    std::map<char, int> data;
    for (auto c: word) {
        auto iter = data.find(c);
        if (data.end() == iter) data[c] = 1;
        else ++(iter->second);
    }
    return data;
}

bool are_anagrams1(std::string a, std::string b)
{
    if (!common(a, b)) return false;
    return analyze(a) == analyze(b);
}

bool are_anagrams2(std::string a, std::string b)
{
    if (!common(a, b)) return false;
    std::sort(a.begin(), a.end());
    std::sort(b.begin(), b.end());
    return a == b;
}

void test(const std::string& a, const std::string& b, std::ostream& out)
{
    out << a << " and " << b << ": " <<
        are_anagrams1(a, b) << ", " <<
        are_anagrams2(a, b) << '\n';
}

int main(int argc, char** argv)
{
    std::cout.setf(std::ios_base::boolalpha);
    test("deposit ", " dopiest", std::cout);
    test("STOP", "pots", std::cout);
    test("rite", "write", std::cout);
    test("right", "write", std::cout);
    test("same", "same", std::cout);
}

Scott said

May 1, 2015 at 6:45 PM


def anagram?(a, b)
    dict = ('a'..'z').each_with_object({}) { |v,h| h[v] = 0 }
    a.split('').each{|let| dict[let] += 1}
    b.split('').each{|let| dict[let] -= 1}
    dict.values.select{|v| v != 0}.empty?
end

a, b = 'stop', 'pots'

puts anagram?(a, b) # Approach 1
puts a.chars.sort.join == b.chars.sort.join # Approach 2

truongkhanhit said
May 9, 2015 at 3:11 PM
My discussion and solution in Java here http://www.capacode.com/?p=7

mcmillhj said

May 12, 2015 at 6:01 PM

fun merge lt (xs, ys) = let
    fun loop(out, [], []) = List.rev out
      | loop(out, x::xs, []) = loop (x::out, xs, [])
      | loop(out, [], y::ys) = loop (y::out, [], ys)
      | loop(out, left as x::xs, right as y::ys) =
                if lt (x, y) then loop (x::out, xs, right)
                else loop (y::out, left, ys)
in
    loop([], xs, ys)
end

fun mergesort lt xs = let
    val merge' = merge lt
    (* splits a list into two semi-equal halves in Linear time *)
    fun split ns = let 
        fun loop([], xs, ys) = (xs, ys)
          | loop(x::y::zs, xs, ys) = loop(zs, x::xs, y::ys)
          | loop(x::[], xs, ys) = loop([], x::xs, ys)
    in
        loop(List.rev(ns), [], [])
    end      
    fun ms []  = []
      | ms [x] = [x]
      | ms xs = let
          val (l, r) = split xs
      in
          merge'(ms l, ms r)
      end
in
    ms xs
end

fun anagram1 a b = if a = b then true else let
    val a_list = explode a
    val b_list = explode b
    val ms     = mergesort (op <) 
in
    (ms a_list) = (ms b_list)
end

fun anagram2 a b = let
    val array = Array.tabulate(128, fn _ => 0)
in
    CharVector.app (fn char => Array.update (array, ord char, Array.sub (array, ord char) + 1)) a;
    CharVector.app (fn char => Array.update (array, ord char, Array.sub (array, ord char) - 1)) b;
    Array.all (fn e => e = 0) array
end

wert310 said

October 25, 2015 at 6:59 PM

import Data.List

isAnagram s1 s2 = and $ map (`elem` s1) s2
isAnagram' s1 s2 = (sort s1) == (sort s2)

Programming Praxis