Find The Difference

January 15, 2019

We consider first a solution that sorts both strings then indexes through them until it finds a difference:

(define (diff1 xstr ystr)
  (let loop ((xs (sort char<? (string->list xstr)))
             (ys (sort char<? (string->list ystr))))
    (cond ((null? xs) (car ys))
          ((char=? (car xs) (car ys))
            (loop (cdr xs) (cdr ys)))
          (else (car ys)))))

> (diff1 "abcdef" "bfxdeac")
#\x

That takes time O(n log n), where n is the length of the strings. A better solution operates in time O(n):

(define (diff2 xstr ystr)
  (let ((x (sum (map char->integer (string->list xstr))))
        (y (sum (map char->integer (string->list ystr)))))
    (integer->char (- y x))))

> (diff2 "abcdef" "bfxdeac")
#\x

That indexes through the two strings, without sorting, summing the ascii values of the characters in the strings and reporting the difference at the end. You could use XOR instead of addition if you prefer.

You can run the program at https://ideone.com/Sz9Mq6.

Posted by programmingpraxis

Filed in Exercises

21 Comments »

21 Responses to “Find The Difference”

András said
January 15, 2019 at 10:12 AM
Scala: (t.toSet–(s.toSet)).iterator.next //> res0: Char = x
James Curtis-Smith said
January 15, 2019 at 10:25 AM
Nice one for perl… in the case we are only looking for one known difference it is just finding the letter in the two strings which appears an odd number of times….
```
use feature 'say';
my( $x, $y, %f ) = ( 'abcdef', 'bfxdeac' );
$f{$_}=!$f{$_} foreach split //, $x.$y;
say grep {$f{$_}} keys %f;'
```
If you were looking for all added/removed letters you could change this to:
```
use feature 'say';
my( $x, $y, %f ) = ( 'abcdef', 'bfxdeac' );
$f{$_}++ foreach split //, $x;
$f{$_}-- foreach split //, $y;
say grep {$f{$_}} keys %f;'
```
The %f contains +ve values for letters in $x but not $y and then -ve values for letters in $y but not in $x if that makes sense (and the absolute value is the count of each).
Zack said
January 15, 2019 at 10:37 AM
Cool one for basic text processing. Here is my take with Julia 1.0.2:

function StringDifference(s::AbstractString, t::AbstractString)
S = sort(split(s, “”))
T = sort(split(t, “”))
```
for i = 1:length(s)
    if S[i] != T[i]; return T[i]; end
end

return T[end]
```
end

milroy said

January 15, 2019 at 3:47 PM

#include
#include
#include
using namespace std;

int main()
{
string s1;
string s2;
string s3;

cout<<"enter a string s1:";
cin>>s1;
cout<<endl<<"enter a string s1:";
cin>>s2;

sort(s1.begin(),s1.end());
sort(s2.begin(),s2.end());
int i=0;
int found=0;

for(i=0;i<s1.length();i++)
{
    if(s2.at(i)!=s1.at(i))
    {
    cout<<s2.at(i);
    break;
    found=1;
    }
    else if(i==s1.length()-1)
    {
     cout<<endl<<"New char found in s2:";
     cout<< s2.at(i+1);

    }

}


    cin>>s2;


return 0;

}

Graham said
January 15, 2019 at 3:47 PM
Fun exercise! I wouldn’t have thought of the xor on my own.
I also didn’t trust myself to always put the shorter string first, so my solutions allow for supplying the strings in either order.

module Main where
import Data.Bits (xor) import Data.Char (chr, ord) import Data.Foldable (foldl') import qualified Data.Set as S data Solution a b = S { _in :: String -> a                       , _compute :: a -> a -> b                       , _out :: b -> Char                       } run :: Solution a b -> String -> String -> Char run (S i c o) x y = o $ c (i x) (i y)
s1 :: Solution (S.Set Char) (S.Set Char) s1 =   S S.fromList (\x y -> (xS.union y) S.\\ (x S.intersection y)) S.findMin

s2 :: Solution Int Int
s2 = S (sum . fmap ord) (-) (chr . abs)

s3 :: Solution Int Int
s3 = S (foldl' xor 0 . fmap ord) xor chr

main :: IO ()
main = do
  let x = "abcdef"
      y = "bfxdeac"
  print $ all (== 'x') [run s1 x y, run s2 x y, run s3 x y]

Graham said

January 15, 2019 at 3:51 PM

Sorry, I tried to HTML-escape & post in code tags, but that didn’t work right. Retrying with the WordPress sourcecode attribute:

module Main where

import           Data.Bits     (xor)
import           Data.Char     (chr, ord)
import           Data.Foldable (foldl')
import qualified Data.Set      as S

data Solution a b = S { _in      :: String -> a
                      , _compute :: a -> a -> b
                      , _out     :: b -> Char
                      }

run :: Solution a b -> String -> String -> Char
run (S i c o) x y = o $ c (i x) (i y)

s1 :: Solution (S.Set Char) (S.Set Char)
s1 =
  S S.fromList (\x y -> (x `S.union` y) S.\\ (x `S.intersection` y)) S.findMin

s2 :: Solution Int Int
s2 = S (sum . fmap ord) (-) (chr . abs)

s3 :: Solution Int Int
s3 = S (foldl' xor 0 . fmap ord) xor chr

main :: IO ()
main = do
  let x = "abcdef"
      y = "bfxdeac"
  print $ all (== 'x') [run s1 x y, run s2 x y, run s3 x y]

Clearly, it’s been too long since I visited!

Graham said
January 15, 2019 at 3:52 PM
More syntax woes, apologies.
asanand21 said
January 15, 2019 at 4:35 PM
My answer in Python

s = ‘abcdef’
t = ‘cbdefxa’
for c in t:
if c not in s:
print(c)

matthew said

January 15, 2019 at 4:45 PM

My C++ is in danger of getting rusty, so let’s use it for a solution. See https://godbolt.org/z/qgGfAe for how the main function compiles.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <numeric>
#include <functional>

template <typename Iter>
auto xorseq(const Iter &from, const Iter &to) {
  return std::accumulate(from,to,0,std::bit_xor<>());
}

template <typename Iter>
auto xordiff(const Iter &from0, const Iter &to0,
             const Iter &from1, const Iter &to1) {
  return xorseq(from0,to0)^xorseq(from1,to1);
}

auto xordiff(const char *s0, const char *s1) {
  return xordiff(s0,s0+strlen(s0),s1,s1+strlen(s1));
}

int main(int argc, char *argv[]) {
  if (argc != 3) abort();
  printf("%c\n",xordiff(argv[1],argv[2]));
}

matthew said
January 15, 2019 at 5:01 PM
That needs C++14 features, so build with eg:
```
g++ -Wall -std=c++14 findone.cpp -o findone
```
Jan Van lent said
January 15, 2019 at 6:19 PM
Python solution using sets (similar to Haskell s1 solution above).
```
s = "abcdef"
t = "bfxdeac"
print((set(t) - set(s)).pop())
```

Jan Van lent said

January 15, 2019 at 6:36 PM

Three Python solutions.

from functools import reduce
from operator import xor

s = "abcdef"
t = "bfxdeac"

print((set(t)-set(s)).pop())

print(chr(sum(map(ord, t)) - sum(map(ord, s))))

print(chr(reduce(xor, map(ord, s+t), 0)))

Mike said
January 15, 2019 at 7:57 PM
Python solution.

Works when s or t have duplicate letters (set solutions don’t).
Stops when the new letter is found, so it may not process the entire string.
```
def diff(s, t):
    return next(b for a,b in zip(s+' ',t) if a!=b)
```

matthew said

January 15, 2019 at 8:12 PM

@Mike: doesn’t that assume that the characters haven’t been shuffled? Good point on use of sets though.

Here’s a more streamlined version of my C++ solution:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <numeric>
#include <functional>

template <typename Iter>
auto xorseq(const Iter &from, const Iter &to,
            typename std::iterator_traits<Iter>::value_type init = 0) {
  return std::accumulate(from,to,init,std::bit_xor<>());
}

auto xordiff(const char *s0, const char *s1) {
  return xorseq(s1,s1+strlen(s0),xorseq(s0,s0+strlen(s0),0));
}

int main(int argc, char *argv[]) {
  if (argc != 3) abort();
  printf("%c\n",xordiff(argv[1],argv[2]));
}

Globules said

January 16, 2019 at 5:11 AM

Another Haskell version.

import Data.List ((\\), find, sort)
import Data.Maybe (fromMaybe)

-- Return the extra elements that are in one list, but not in the other.
diff :: Ord a => [a] -> [a] -> [a]
diff xs ys = let (xs', ys') = (sort xs, sort ys)
             in fromMaybe [] $ find (not . null) [xs' \\ ys', ys' \\ xs']

main :: IO ()
main = do
  putStrLn $ diff "abcdef" "bfxdeac"
  -- the random letter is a duplicate (here 'e')
  putStrLn $ diff "abcdef" "bfedeac"

$ ./findthediff
x
e

Globules said

January 16, 2019 at 5:18 AM

@Graham, I use the following shell script to create comments here.

#!/bin/sh -eu

case $# in
    0) echo "Usage: ${0##*/} path-to-Haskell-source [command]" 1>&2;
       exit 1;;
    1) src=$1; shift; cmd=;;
    *) src=$1; shift; cmd=$@;;
esac

cat <<EOF
[code]
$(cat "$src")

EOF

test -z "$cmd" || {
echo "

"
    echo \$ "$cmd"
    sh -c "$cmd"
    echo "

"
}
[/code]
I run it like

praxpub findthediff.hs ./findthediff

then just paste the output in the comment widget.

Globules said
January 16, 2019 at 5:19 AM
LOL! I feel your pain. :-)

Daniel said

January 16, 2019 at 5:34 AM

Here are a few approaches in C. The table-driven approach was my initial implementation. It counts each letter and then compares the counts. The other approaches were inspired by the solution: an XOR approach, a sum approach, and a sorting approach.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char find_letter_table(char* s, char* t) {
  int* s_counts = calloc(26, sizeof(int));
  int* t_counts = calloc(26, sizeof(int));
  for (char c = *s; c; c = *(++s))
    ++(s_counts[c - 'a']);
  for (char c = *t; c; c = *(++t))
    ++(t_counts[c - 'a']);
  for (int i = 0; i < 26; ++i)
    if (t_counts[i] > s_counts[i])
      return i + 'a';
  return '\0';
}

char find_letter_xor(char* s, char* t) {
  char result = '\0';
  for (char c = *s; c; c = *(++s))
    result ^= c;
  for (char c = *t; c; c = *(++t))
    result ^= c;
  return result;
}

char find_letter_sum_diff(char* s, char* t) {
  int result = 0;
  for (char c = *t; c; c = *(++t))
    result += c;
  for (char c = *s; c; c = *(++s))
    result -= c;
  return (char)result;
}

int compare(const void* a, const void* b) {
  char a_char = *(char*)a;
  char b_char = *(char*)b;
  return (int)a_char - (int)b_char;
}

// Destructively modifies inputs.
char find_letter_sort(char* s, char* t) {
  int n = strlen(s);
  qsort(s, n, sizeof(char), compare);
  qsort(t, n + 1, sizeof(char), compare);
  while (*s && *s == *t) {
    ++s;
    ++t;
  }
  return *t;
}

int main(int argc, char* argv[]) {
  if (argc != 3) {
    fprintf(stderr, "Usage: %s ARG1 ARG2\n", argv[0]);
    return EXIT_FAILURE;
  }
  char* s = argv[1];
  char* t = argv[2];
  char c_table = find_letter_table(s, t);
  char c_xor = find_letter_xor(s, t);
  char c_sum_diff = find_letter_sum_diff(s, t);
  char c_sort = find_letter_sort(s, t);
  assert(c_table == c_xor);
  assert(c_table == c_sum_diff);
  assert(c_table == c_sort);
  printf("%c\n", c_table);
  return EXIT_SUCCESS;
}

Example:

$ ./a.out abcdef bfxdeac

Daniel said

January 16, 2019 at 5:44 AM

I forgot to free my memory allocations :-(.

Here’s an update.

char find_letter_table(char* s, char* t) {
  int* s_counts = calloc(26, sizeof(int));
  int* t_counts = calloc(26, sizeof(int));
  for (char c = *s; c; c = *(++s))
    ++(s_counts[c - 'a']);
  for (char c = *t; c; c = *(++t))
    ++(t_counts[c - 'a']);
  char result = '\0';
  for (int i = 0; i < 26; ++i) {
    if (t_counts[i] > s_counts[i]) {
      result = i + 'a';
      break;
    }
  }
  free(s_counts);
  free(t_counts);
  return result;
}

Paul said

January 16, 2019 at 8:23 AM

The fastest solution I could find in Python.

from collections import Counter
def diff1(s, t):
    d = Counter(t) - Counter(s)
    return next(iter(d.keys()))

aarroyoc said

January 22, 2019 at 10:06 PM

Prolog solution here (not optimized):

difference(S1,S2,Diff) :-
	string_codes(S1,L1),
	string_codes(S2,L2),
	length(L1,Len1),
	length(L2,Len2),
	Len2 > Len1,
	list_to_set(L1,Set1),
	list_to_set(L2,Set2),
	intersection(Set1,Set2,CommonLetters),
	subtract(Set2,CommonLetters,ExtraLetters),
	swritef(Diff,"%n",ExtraLetters).

difference(S1,S2,Diff) :-
	string_codes(S1,L1),
	string_codes(S2,L2),
	length(L1,Len1),
	length(L2,Len2),
	Len1 > Len2,
	difference(S2,S1,Diff).

Then just

?- difference("abcdef","bfxdeac",Diff).

Programming Praxis