Duplicate Items In An Array

June 14, 2016

The two programs are identical; a program that solves the second problem will also solve the first. There are basically two solutions. If you have space, insert items in a hash table, complaining when you find a duplicate; that’s O(n). If you don’t have space, sort the input in-place, then run through the array, noting adjacent duplicates; that’s O(n log n). Here are the two programs:

(define (dup-hash xs)
  (let ((h (make-hash identity = 97))
        (dups (list)))
    (do ((xs xs (cdr xs)))
        ((null? xs) dups)
      (when (pair? (h 'get (car xs)))
        (set! dups (cons (car xs) dups)))
      (h 'put (car xs) (car xs)))))
(define (dup-sort xs)
  (let ((xs (sort < xs)))
    (let loop ((prev (car xs)) (xs (cdr xs)) (dups (list)))
      (if (null? xs) dups
        (if (= prev (car xs))
            (loop prev (cdr xs) (cons (car xs) dups))
            (loop (car xs) (cdr xs) dups))))))

Here are some examples:

> (define xs '(1 2 3 1 4))
> (define ys '(1 2 3 1 2 4 1))
> (dup-hash xs)
(1)
> (dup-sort xs)
(1)
> (dup-hash ys)
(1 2 1)
> (dup-hash ys)
(1 2 1)

You can run the program at http://ideone.com/d1Ca0F, where you will also see the identity function from the Standard Prelude and the hash table from a recent exercise. We’ve done this exercise before, and you may be wondering why we do it again. One reason is because it is fundamental, and we want to keep up that kind of skill; it’s the same reason baseball players take batting practice before every game. Another reason is that this question appears again and again on beginner programming forums, and I feel some responsibility to provide an answer. And not a week goes by that I don’t have several people subscribing to the blog, so I want the newcomers to see this exercise.

Pages: 1 2

7 Responses to “Duplicate Items In An Array”

  1. Pretty much 1-liners for both of these in Perl – using core perl (+List::Util)

    Part 1 – find (only) first entry…

    use feature qw(say); 
    use List::Util qw(first);
    say first { exists $X{$_} || !($X{$_}=1) } @ARGV;
    

    Part 1 – find all entries

    [sourceode lang=”perl”]
    use feature qw(say);
    say join q( ), grep { exists $X{$_} || !($X{$_}=1) } @ARGV;
    [/sourcecode]

  2. John Cowan said

    If the numbers are bounded and small, you can use a bit vector, which takes less actual space than a hash table.

  3. Daniel said

    Here’s a solution in C. It uses GLib for its hash table.

    #include <stdio.h>
    #include <stdlib.h>
    #include <glib.h>
    
    // return code indicates if duplicate was found
    int FirstDuplicate(int arr[], size_t n, int* duplicate) {
      GHashTable* hash = g_hash_table_new(g_int_hash, g_int_equal);
      for (size_t i = 0; i < n; i++) {
        if (g_hash_table_contains(hash, arr + i)) {
          *duplicate = arr[i];
          return 1;
        } else {
          g_hash_table_insert(hash, arr + i, NULL);
        }
      }
      return 0;
    }
    
    void Duplicates(int arr[],
                    size_t n,
                    int* duplicates,
                    size_t* duplicates_n) {
      GHashTable* hash = g_hash_table_new(g_int_hash, g_int_equal);
      for (size_t i = 0; i < n; i++) {
        if (g_hash_table_contains(hash, arr + i)) {
          duplicates[(*duplicates_n)++] = arr[i];
        } else {
          g_hash_table_insert(hash, arr + i, NULL);
        }
      }
    }
    
    int main(int argc, char* argv[]) {
      int a[] = {1,2,3,1,4};
      size_t a_n = sizeof(a)/sizeof(a[0]);
    
      int duplicate = 0;
      if (FirstDuplicate(a, a_n, &duplicate)) {
        printf("duplicate: %d", duplicate);
      } else {
        printf("no duplicate");
      }
    
      printf("\n\n");
    
      int b[] = {1,2,3,1,2,4,1};
      size_t b_n = sizeof(b)/sizeof(b[0]);
    
      int* duplicates = malloc(sizeof(int) * (b_n-1));
      size_t duplicates_n = 0;
    
      Duplicates(b, b_n, duplicates, &duplicates_n);
    
      if (duplicates_n > 0) {
        printf("duplicates: [");
        for (size_t i = 0; i < duplicates_n; i++) {
          if (i > 0) {
            printf(",");
          }
          printf("%d", duplicates[i]);
        }
        printf("]");
      } else {
        puts("no duplicates");
      }
    
      printf("\n");
    
      free(duplicates);
      return 0;
    }
    

    Here’s the output:

    duplicate: 1
    
    duplicates: [1,2,1]
    
    

    The problem says “the correct output is 4”. I should seemingly say 1 (there’s only one 4, but there are two 1s).

  4. gdejohn said
    import static java.util.Arrays.stream;
    
    import java.util.Arrays;
    import java.util.HashSet;
    import java.util.OptionalInt;
    import java.util.function.IntPredicate;
    
    public class Duplicates {
        public static OptionalInt firstDuplicate(int... ints) {
            return stream(ints)
                  .filter(((IntPredicate) new HashSet<>()::add).negate())
                  .findFirst();
        }
    
        public static int[] duplicates(int... ints) {
            return stream(ints)
                  .filter(((IntPredicate) new HashSet<>()::add).negate())
                  .toArray();
        }
    
        public static void main(String[] args) {
            System.out.println(firstDuplicate(1, 2, 3, 1, 4).getAsInt());         // 1
            System.out.println(Arrays.toString(duplicates(1, 2, 3, 1, 2, 4, 1))); // [1, 2, 1]
        }
    }
    
  5. Globules said

    A Haskell version, favouring conciseness over efficiency.

    import Data.List
    
    -- Return Just x if and only if x is the only duplicate element.
    singleDup :: Ord a => [a] -> Maybe a
    singleDup xs = case filter ((>1) . length) . group . sort $ xs of
                     [x:_] -> Just x
                     _     -> Nothing
    
    -- Return the duplicate elements of the list in the order they appear.  The
    -- first occurrence is omitted.
    dups :: Eq a => [a] -> [a]
    dups = concat . snd . mapAccumL step []
      where step ds x = if x `elem` ds then (ds, [x]) else (x:ds, [])
    
    main :: IO ()
    main = do
      print $ singleDup [1, 2, 3, 1, 4 :: Int]
      print $ dups [1, 2, 3, 1, 2, 4, 1 :: Int]
    
    $ ./dups 
    Just 1
    [1,2,1]
    
  6. ijp said

    Incidentally, the first part has been an exercise on here before (albeit with an upper bound on the array elements) way back in 2011. https://programmingpraxis.com/2011/09/23/array-duplicates/

  7. Mayank said

    A good problem

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: