Find The Difference

January 15, 2019

We consider first a solution that sorts both strings then indexes through them until it finds a difference:

(define (diff1 xstr ystr)
  (let loop ((xs (sort char<? (string->list xstr)))
             (ys (sort char<? (string->list ystr))))
    (cond ((null? xs) (car ys))
          ((char=? (car xs) (car ys))
            (loop (cdr xs) (cdr ys)))
          (else (car ys)))))
> (diff1 "abcdef" "bfxdeac")
#\x

That takes time O(n log n), where n is the length of the strings. A better solution operates in time O(n):

(define (diff2 xstr ystr)
  (let ((x (sum (map char->integer (string->list xstr))))
        (y (sum (map char->integer (string->list ystr)))))
    (integer->char (- y x))))
> (diff2 "abcdef" "bfxdeac")
#\x

That indexes through the two strings, without sorting, summing the ascii values of the characters in the strings and reporting the difference at the end. You could use XOR instead of addition if you prefer.

You can run the program at https://ideone.com/Sz9Mq6.

Pages: 1 2

21 Responses to “Find The Difference”

  1. András said

    Scala: (t.toSet–(s.toSet)).iterator.next //> res0: Char = x

  2. James Curtis-Smith said

    Nice one for perl… in the case we are only looking for one known difference it is just finding the letter in the two strings which appears an odd number of times….

    use feature 'say';
    my( $x, $y, %f ) = ( 'abcdef', 'bfxdeac' );
    $f{$_}=!$f{$_} foreach split //, $x.$y;
    say grep {$f{$_}} keys %f;'
    

    If you were looking for all added/removed letters you could change this to:

    use feature 'say';
    my( $x, $y, %f ) = ( 'abcdef', 'bfxdeac' );
    $f{$_}++ foreach split //, $x;
    $f{$_}-- foreach split //, $y;
    say grep {$f{$_}} keys %f;'
    

    The %f contains +ve values for letters in $x but not $y and then -ve values for letters in $y but not in $x if that makes sense (and the absolute value is the count of each).

  3. Zack said

    Cool one for basic text processing. Here is my take with Julia 1.0.2:

    function StringDifference(s::AbstractString, t::AbstractString)
    S = sort(split(s, “”))
    T = sort(split(t, “”))

    for i = 1:length(s)
        if S[i] != T[i]; return T[i]; end
    end
    
    return T[end]
    

    end

  4. milroy said

    #include
    #include
    #include
    using namespace std;

    int main()
    {
    string s1;
    string s2;
    string s3;

    cout<<"enter a string s1:";
    cin>>s1;
    cout<<endl<<"enter a string s1:";
    cin>>s2;
    
    sort(s1.begin(),s1.end());
    sort(s2.begin(),s2.end());
    int i=0;
    int found=0;
    
    for(i=0;i<s1.length();i++)
    {
        if(s2.at(i)!=s1.at(i))
        {
        cout<<s2.at(i);
        break;
        found=1;
        }
        else if(i==s1.length()-1)
        {
         cout<<endl<<"New char found in s2:";
         cout<< s2.at(i+1);
    
        }
    
    }
    
    
        cin>>s2;
    
    
    return 0;
    

    }

  5. Graham said

    Fun exercise! I wouldn’t have thought of the xor on my own.
    I also didn’t trust myself to always put the shorter string first, so my solutions allow for supplying the strings in either order.


    module Main where

    import Data.Bits (xor)
    import Data.Char (chr, ord)
    import Data.Foldable (foldl')
    import qualified Data.Set as S

    data Solution a b = S { _in :: String -> a
                          , _compute :: a -> a -> b
                          , _out :: b -> Char
                          }

    run :: Solution a b -> String -> String -> Char
    run (S i c o) x y = o $ c (i x) (i y)

    s1 :: Solution (S.Set Char) (S.Set Char)
    s1 =
      S S.fromList (\x y -> (x
    S.union y) S.\\ (x S.intersection y)) S.findMin

    s2 :: Solution Int Int
    s2 = S (sum . fmap ord) (-) (chr . abs)

    s3 :: Solution Int Int
    s3 = S (foldl' xor 0 . fmap ord) xor chr

    main :: IO ()
    main = do
      let x = "abcdef"
          y = "bfxdeac"
      print $ all (== 'x') [run s1 x y, run s2 x y, run s3 x y]

  6. Graham said

    Sorry, I tried to HTML-escape & post in code tags, but that didn’t work right. Retrying with the WordPress sourcecode attribute:

    module Main where
    
    import           Data.Bits     (xor)
    import           Data.Char     (chr, ord)
    import           Data.Foldable (foldl')
    import qualified Data.Set      as S
    
    data Solution a b = S { _in      :: String -> a
                          , _compute :: a -> a -> b
                          , _out     :: b -> Char
                          }
    
    run :: Solution a b -> String -> String -> Char
    run (S i c o) x y = o $ c (i x) (i y)
    
    s1 :: Solution (S.Set Char) (S.Set Char)
    s1 =
      S S.fromList (\x y -> (x `S.union` y) S.\\ (x `S.intersection` y)) S.findMin
    
    s2 :: Solution Int Int
    s2 = S (sum . fmap ord) (-) (chr . abs)
    
    s3 :: Solution Int Int
    s3 = S (foldl' xor 0 . fmap ord) xor chr
    
    main :: IO ()
    main = do
      let x = "abcdef"
          y = "bfxdeac"
      print $ all (== 'x') [run s1 x y, run s2 x y, run s3 x y]
    

    Clearly, it’s been too long since I visited!

  7. Graham said

    More syntax woes, apologies.

  8. asanand21 said

    My answer in Python

    s = ‘abcdef’
    t = ‘cbdefxa’
    for c in t:
    if c not in s:
    print(c)

  9. matthew said

    My C++ is in danger of getting rusty, so let’s use it for a solution. See https://godbolt.org/z/qgGfAe for how the main function compiles.

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <numeric>
    #include <functional>
    
    template <typename Iter>
    auto xorseq(const Iter &from, const Iter &to) {
      return std::accumulate(from,to,0,std::bit_xor<>());
    }
    
    template <typename Iter>
    auto xordiff(const Iter &from0, const Iter &to0,
                 const Iter &from1, const Iter &to1) {
      return xorseq(from0,to0)^xorseq(from1,to1);
    }
    
    auto xordiff(const char *s0, const char *s1) {
      return xordiff(s0,s0+strlen(s0),s1,s1+strlen(s1));
    }
    
    int main(int argc, char *argv[]) {
      if (argc != 3) abort();
      printf("%c\n",xordiff(argv[1],argv[2]));
    }
    
  10. matthew said

    That needs C++14 features, so build with eg:

    g++ -Wall -std=c++14 findone.cpp -o findone
    
  11. Jan Van lent said

    Python solution using sets (similar to Haskell s1 solution above).

    s = "abcdef"
    t = "bfxdeac"
    print((set(t) - set(s)).pop())
    
  12. Jan Van lent said

    Three Python solutions.

    from functools import reduce
    from operator import xor
    
    s = "abcdef"
    t = "bfxdeac"
    
    print((set(t)-set(s)).pop())
    
    print(chr(sum(map(ord, t)) - sum(map(ord, s))))
    
    print(chr(reduce(xor, map(ord, s+t), 0)))
    
  13. Mike said

    Python solution.

    Works when s or t have duplicate letters (set solutions don’t).
    Stops when the new letter is found, so it may not process the entire string.

    def diff(s, t):
        return next(b for a,b in zip(s+' ',t) if a!=b)
    
  14. matthew said

    @Mike: doesn’t that assume that the characters haven’t been shuffled? Good point on use of sets though.

    Here’s a more streamlined version of my C++ solution:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <numeric>
    #include <functional>
    
    template <typename Iter>
    auto xorseq(const Iter &from, const Iter &to,
                typename std::iterator_traits<Iter>::value_type init = 0) {
      return std::accumulate(from,to,init,std::bit_xor<>());
    }
    
    auto xordiff(const char *s0, const char *s1) {
      return xorseq(s1,s1+strlen(s0),xorseq(s0,s0+strlen(s0),0));
    }
    
    int main(int argc, char *argv[]) {
      if (argc != 3) abort();
      printf("%c\n",xordiff(argv[1],argv[2]));
    }
    
  15. Globules said

    Another Haskell version.

    import Data.List ((\\), find, sort)
    import Data.Maybe (fromMaybe)
    
    -- Return the extra elements that are in one list, but not in the other.
    diff :: Ord a => [a] -> [a] -> [a]
    diff xs ys = let (xs', ys') = (sort xs, sort ys)
                 in fromMaybe [] $ find (not . null) [xs' \\ ys', ys' \\ xs']
    
    main :: IO ()
    main = do
      putStrLn $ diff "abcdef" "bfxdeac"
      -- the random letter is a duplicate (here 'e')
      putStrLn $ diff "abcdef" "bfedeac"
    
    $ ./findthediff
    x
    e
    
  16. Globules said

    @Graham, I use the following shell script to create comments here.

    #!/bin/sh -eu
    
    case $# in
        0) echo "Usage: ${0##*/} path-to-Haskell-source [command]" 1>&2;
           exit 1;;
        1) src=$1; shift; cmd=;;
        *) src=$1; shift; cmd=$@;;
    esac
    
    cat <<EOF
    [code]
    $(cat "$src")
    

    EOF

    test -z "$cmd" || {
    echo "

    "
        echo \$ "$cmd"
        sh -c "$cmd"
        echo "

    "
    }
    [/code]
    I run it like

    praxpub findthediff.hs ./findthediff
    

    then just paste the output in the comment widget.

  17. Globules said

    LOL! I feel your pain. :-)

  18. Daniel said

    Here are a few approaches in C. The table-driven approach was my initial implementation. It counts each letter and then compares the counts. The other approaches were inspired by the solution: an XOR approach, a sum approach, and a sorting approach.

    #include <assert.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    char find_letter_table(char* s, char* t) {
      int* s_counts = calloc(26, sizeof(int));
      int* t_counts = calloc(26, sizeof(int));
      for (char c = *s; c; c = *(++s))
        ++(s_counts[c - 'a']);
      for (char c = *t; c; c = *(++t))
        ++(t_counts[c - 'a']);
      for (int i = 0; i < 26; ++i)
        if (t_counts[i] > s_counts[i])
          return i + 'a';
      return '\0';
    }
    
    char find_letter_xor(char* s, char* t) {
      char result = '\0';
      for (char c = *s; c; c = *(++s))
        result ^= c;
      for (char c = *t; c; c = *(++t))
        result ^= c;
      return result;
    }
    
    char find_letter_sum_diff(char* s, char* t) {
      int result = 0;
      for (char c = *t; c; c = *(++t))
        result += c;
      for (char c = *s; c; c = *(++s))
        result -= c;
      return (char)result;
    }
    
    int compare(const void* a, const void* b) {
      char a_char = *(char*)a;
      char b_char = *(char*)b;
      return (int)a_char - (int)b_char;
    }
    
    // Destructively modifies inputs.
    char find_letter_sort(char* s, char* t) {
      int n = strlen(s);
      qsort(s, n, sizeof(char), compare);
      qsort(t, n + 1, sizeof(char), compare);
      while (*s && *s == *t) {
        ++s;
        ++t;
      }
      return *t;
    }
    
    int main(int argc, char* argv[]) {
      if (argc != 3) {
        fprintf(stderr, "Usage: %s ARG1 ARG2\n", argv[0]);
        return EXIT_FAILURE;
      }
      char* s = argv[1];
      char* t = argv[2];
      char c_table = find_letter_table(s, t);
      char c_xor = find_letter_xor(s, t);
      char c_sum_diff = find_letter_sum_diff(s, t);
      char c_sort = find_letter_sort(s, t);
      assert(c_table == c_xor);
      assert(c_table == c_sum_diff);
      assert(c_table == c_sort);
      printf("%c\n", c_table);
      return EXIT_SUCCESS;
    }
    

    Example:

    $ ./a.out abcdef bfxdeac
    
    x
    
  19. Daniel said

    I forgot to free my memory allocations :-(.

    Here’s an update.

    char find_letter_table(char* s, char* t) {
      int* s_counts = calloc(26, sizeof(int));
      int* t_counts = calloc(26, sizeof(int));
      for (char c = *s; c; c = *(++s))
        ++(s_counts[c - 'a']);
      for (char c = *t; c; c = *(++t))
        ++(t_counts[c - 'a']);
      char result = '\0';
      for (int i = 0; i < 26; ++i) {
        if (t_counts[i] > s_counts[i]) {
          result = i + 'a';
          break;
        }
      }
      free(s_counts);
      free(t_counts);
      return result;
    }
    
  20. Paul said

    The fastest solution I could find in Python.

    from collections import Counter
    def diff1(s, t):
        d = Counter(t) - Counter(s)
        return next(iter(d.keys()))
    
  21. aarroyoc said

    Prolog solution here (not optimized):

    difference(S1,S2,Diff) :-
    	string_codes(S1,L1),
    	string_codes(S2,L2),
    	length(L1,Len1),
    	length(L2,Len2),
    	Len2 > Len1,
    	list_to_set(L1,Set1),
    	list_to_set(L2,Set2),
    	intersection(Set1,Set2,CommonLetters),
    	subtract(Set2,CommonLetters,ExtraLetters),
    	swritef(Diff,"%n",ExtraLetters).
    
    difference(S1,S2,Diff) :-
    	string_codes(S1,L1),
    	string_codes(S2,L2),
    	length(L1,Len1),
    	length(L2,Len2),
    	Len1 > Len2,
    	difference(S2,S1,Diff).
    

    Then just

    ?- difference("abcdef","bfxdeac",Diff).
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: