Compare Strings With One Error

January 29, 2016

At first I thought this was easy, but then I had some trouble writing it. After a couple of false starts, I settled on the following algorithm: First, if the lengths differ by more than one, I immediately report failure. Then, I extract the common prefixes and suffixes of the two strings. If the sum of their lengths is one less than the maximum length of the two strings, then they match with one error, and the length of the matching prefix is reported. Otherwise, either the two strings match or there is more than one mis-match, and failure is reported:

(define (match1 str1 str2)
  (let* ((cs1 (string->list str1)) (cs1-len (length cs1))
         (cs2 (string->list str2)) (cs2-len (length cs2))
         (min-len (min cs1-len cs2-len)) (max-len (max cs1-len cs2-len)))
    (if (< 1 (- max-len min-len)) #f
      (let* ((prefix (common char=? cs1 cs2))
             (suffix (reverse (common char=? (reverse cs1) (reverse cs2))))
             (pref-len (length prefix)) (suff-len (length suffix)))
        (if (= (- max-len pref-len suff-len) 1) pref-len #f)))))

We first convert the strings to lists of characters, because it is easier to handle lists than strings in Scheme, then use the common function shown below to compute the common prefix and suffix of the two lists:

(define (common eql? xs ys)
  (let loop ((xs xs) (ys ys) (zs (list)))
    (cond ((or (null? xs) (null? ys)) (reverse zs))
          ((not (eql? (car xs) (car ys))) (reverse zs))
          (else (loop (cdr xs) (cdr ys) (cons (car xs) zs))))))

If the strings are very long, it would be better to work directly with the strings and avoid all those list-reversals, but reverse is very fast in most Scheme implementations, so we won’t. Here are some examples:

> (match1 "ABCDE" "BCDE")
0
> (match1 "BCDE" "ABCDE")
0
> (match1 "ABCD" "ABCDE")
4
> (match1 "ABCDE" "ABDE")
2
> (match1 "ABDE" "ACD")
#f
> (match1 "ABCDE" "ADCBE")
#f
> (match1 "ABCDE" "ABCDE")
#f
> (match1 "ABCDE" "ABC")
#f

The algorithm is O(n). You can run the program at http://ideone.com/rh78RN.

Pages: 1 2

7 Responses to “Compare Strings With One Error”

  1. Sergei said
    (defun strings-one-diff-p (str1 str2)
        "Returns index at difference iff strings differ in exactly one position."
        (labels ((common-prefix-len (s1 s2)
                   (do ((len 0 (1+ len))
                        (c1 (car s1) (car s1))
                        (c2 (car s2) (car s2))
                        (s1 (cdr s1) (cdr s1))
                        (s2 (cdr s2) (cdr s2)))
                       ((or (null c1) (null c2) (not (eq c1 c2))) len))))
          (let* ((s1 (coerce str1 'list))
                 (s2 (coerce str2 'list))
                 (prefix-len (common-prefix-len s1 s2))
                 (suffix-len (common-prefix-len (reverse s1) (reverse s2))))
            (when (= (- (max (length s1) (length s2)) prefix-len suffix-len) 1) prefix-len))))
    
  2. Informatimago said

    In Common Lisp, we can use MISMATCH to find where two sequences differ.
    However, we have several cases to consider:

    same length:

    “abcde”
    “abxde”

    different lengths:

    “abc”
    “ab”

    “abcde”
    “abde”

    “abc”
    “bc”

    (defun string-equal-with-one-error (a b)
      ;; time complexity: O(length(a))
      ;; space complexity: O(1)
      (cond
        ((= (length a) (length b))
         (let ((difference (mismatch a b)))
           (and difference
                (string= a b :start1 (1+ difference)
                             :start2 (1+ difference))
                difference)))
        ((= (1+ (length a)) (length b))
         (string-equal-with-one-error b a))
        ((= (length a) (1+ (length b)))
         ;; a is longuest
         (if (string= a b :start1 1)
             0
             (let ((difference (mismatch a b)))
               (and difference
                    (string= a b :start1 (1+ difference)
                                 :start2 difference)
                    difference))))
        (t
         nil)))
    
    (defun test/string-equal-with-one-error ()
      (assert (null (string-equal-with-one-error "" "")))
      (assert (null (string-equal-with-one-error "a" "a")))
      (assert (null (string-equal-with-one-error "abc" "abc")))
      (assert (null (string-equal-with-one-error "" "abc")))
      (assert (null (string-equal-with-one-error "abc" "")))
      (assert (null (string-equal-with-one-error "abc" "ade")))
      (assert (null (string-equal-with-one-error "abc" "dbe")))
      (assert (= 0 (string-equal-with-one-error "abc" "dbc")))
      (assert (= 1 (string-equal-with-one-error "abc" "adc")))
      (assert (= 2 (string-equal-with-one-error "abc" "abd")))
      (assert (= 0 (string-equal-with-one-error "" "a")))
      (assert (= 0 (string-equal-with-one-error "a" "")))
      (assert (= 0 (string-equal-with-one-error "bcd" "abcd")))
      (assert (= 0 (string-equal-with-one-error "abcd" "bcd")))
      (assert (= 3 (string-equal-with-one-error "abc" "abcd")))
      (assert (= 3 (string-equal-with-one-error "abcd" "abc")))
      (assert (= 2 (string-equal-with-one-error "abcd" "abed")))
      (assert (= 2 (string-equal-with-one-error "abed" "abcd")))
      :success)
    
    (test/string-equal-with-one-error)
    ;; --> :success
    
  3. Rutger said

    Python version

    def supersequence(x, y):
        # True if and only if all elements in y occur in x in order (x is a supersequence of y)
        idx = 0
        try:
            for i in y:
                idx = x.index(i, idx)+1
        except ValueError, e:
            return False
        else:
            return True
    
    def has_hamming_distance_n(s1, s2, n):
        return sum(bool(ord(ch1) - ord(ch2)) for ch1, ch2 in zip(s1, s2)) == n
    
    def is_substring_diff_1(s1, s2):
    	if len(s1) == len(s2):
    		return has_hamming_distance_n(s1, s2, 1)
    	if len(s1) > len(s2):
    		s1, s2 = s2, s1
    	if len(s1) == len(s2) -1:
    		return supersequence(s2, s1)
    	return False
    
    
    testcases = [("abcde", "abxde"), ("abc", "ab"), ("abcde", "abde"), ("abc", "bc"), ("abc", "a"), ("a", "abc")]
    
    for s1,s2 in testcases:
    	print is_substring_diff_1(s1,s2), '<==', s1, s2
    
    
    # output:
    # True <== abcde abxde
    # True <== abc ab
    # True <== abcde abde
    # True <== abc bc
    # False <== abc a
    # False <== a abc
    
    
    
    
  4. Atif Farooq said

    #include
    #include
    #include
    #include
    using namespace std;
    //Function incorporating Solution.
    void Solution(void);
    void main(void)
    {
    Solution();
    }
    void Solution(void)
    {
    int DiffCount = 0;
    int LenA, LenB;
    string A,B;
    cin>>A>>B;
    LenA = A.length(), LenB = B.length();
    if ((LenA == LenB))
    {
    for (int X = 0; X = 0 && DiffCount!= 1) cout << "Error";
    else cout << "No Error";
    }
    else if (abs(LenA – LenB) == 1)
    {
    for (int X = 0; X 0) cout << "Error";
    else cout << "No Error";
    }
    else cout << "Error";
    }

  5. fisherro said
    #include <boost/optional.hpp>
    #include <boost/utility/string_ref.hpp>
    #include <boost/range/algorithm/mismatch.hpp>
    #include <iostream>
    #include <iomanip>
    #include <iterator>
    #include <algorithm>
    
    boost::optional<size_t> find_one_mismatch(boost::string_ref a, boost::string_ref b)
    {
        //Figure out which one is longer.
        auto longer = (b.size() > a.size())? b: a;
        auto shorter = (b.size() > a.size())? a: b;
        //If lengths differ by more than 1, return empty result.
        if ((longer.size() - shorter.size()) > 1) return boost::optional<size_t>();
        //Until C++14, mismatch wants the shorter one as the first parameter.
        auto mismatch1 = boost::mismatch(shorter, longer);
        if (mismatch1.first == shorter.end()) {
            //We didn't find a mismatch.
            if (mismatch1.second == longer.end()) {
                //The strings are exact matches. Return empty result.
                return boost::optional<size_t>();
            }
            //In this case, we know longer has one additional character.
            //We return the index of that character.
            return std::distance(longer.begin(), mismatch1.second);
        }
        //We found a mismatch.
        //Skip the mismatch location in the longer string.
        auto longer_start = mismatch1.second + 1;
        //If the strings aren't the same size, don't skip the mismatch location in
        //the shorter string.
        auto shorter_start = mismatch1.first + ((longer.size() > shorter.size())? 0: 1);
        //Look for a second mismatch.
        auto mismatch2 = std::mismatch(
                shorter_start, shorter.end(),
                longer_start, longer.end());
        if ((mismatch2.first == shorter.end()) &&
            (mismatch2.second == longer.end()))
        {
            //We didn't find a second mismatch; return the index of the first.
            return std::distance(longer.begin(), mismatch1.second);
        }
        //A second mismatch was found; return empty result.
        return boost::optional<size_t>();
    }
    
    void test(const char* a, const char* b)
    {
        auto index = find_one_mismatch(a, b);
        std::cout << std::quoted(a) << ", " << std::quoted(b) << ": ";
        if (index) {
            std::cout << *index;
        } else {
            std::cout << "no single mismatch found";
        }
        std::cout << '\n';
    }
    
    int main()
    {
        //Should pass:
        test("abc", "xbc");
        test("abc", "axc");
        test("abc", "abx");
    
        test("abc", "abcx");
        test("abc", "xabc");
        test("abc", "axbc");
        test("abc", "abxc");
    
        //Should fail:
        test("abc", "abc");
    
        test("abc", "abcde");
    
        test("abc", "xyz");
        test("abc", "xbz");
        test("abc", "axz");
        test("abc", "xzc");
    }
    
  6. Mike said

    Python solution. Basic idea is to find where the strings don’t match. Then there are two cases:

    1) s and t are the same length, so the mismatch must be a character substitution and the rest of the strings must match after skipping the mismatch (s[i+1:] == t[i+1:]); and

    2) s is shorter than t, so the rest of s must match the rest of t after skipping a character (s[i:] == t[i+1:])

    def onediff2(s, t):
        if len(s) > len(t):
            s,t = t,s
            
        if len(t) - len(s) > 1:
            return False
    
        for i in range(len(s)):
            if s[i] != t[i]:
                return s[i+1:] == t[i+1:] if len(s) == len(t) else s[i:]==t[i+1:]
    
        return len(t) != len(s)
    
  7. DanN said
    def oneDiff(str1, str2):
    
        if len(str1) != len(str2):
            if len(str1) > len(str2):
                if str1[:-1] == str2 or str1[1:] == str2:
                    return True 
            elif str2[:-1] == str1 or str2[1:] == str1:
                return True 
            else:
                return False 
        else:
            newStr = list(zip(str1, str2))
            newStr = list(filter(lambda x: x[0] == x[1], newStr))
    
            if len(newStr) == len(str1) - 1:
                return True 
            else:
                return False 
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: