Stable Sort

March 23, 2018

A sorting algorithm is stable if items with equal keys appear in the output in the same order they appear in the input. In many cases the stability of a sorting algorithm doesn’t matter; in other cases it is critical. Some sorting algorithms, such as merge sort, are inherently stable (unless you muck up the implementation), which other sorting algorithms, such as quick sort, are inherently unstable. It is always possible to turn an unstable sort into a stable sort by adding an index to the data and using the index to break ties.

Your task is to write a program that converts an unstable sorting algorithm into a stable sorting algorithm. When you are finished, you are welcome to read or run a suggested solution, or to post your own solution or discuss the exercise in the comments below.

Advertisements

Pages: 1 2

2 Responses to “Stable Sort”

  1. Globules said

    Here’s a Haskell version. We write a sorting function that is guaranteed to be unstable, then apply a “stabilizing” function to it, resulting in a stable sort.

    import Data.List (sort)
    import Data.Ord (Down(..))
    
    -- A key and value.  We'll define the ordering of KVs to only consider the key,
    -- so that we can use the value to record the order in which instances having
    -- equal keys originally appeared.
    data KV a b = KV a b deriving (Show)
    
    -- Compare only on the key, not on the value.
    instance Eq a => Eq (KV a b) where
      (KV k1 _) == (KV k2 _) = k1 == k2
    
    -- Compare only on the key, not on the value.
    instance Ord a => Ord (KV a b) where
      (KV k1 _) `compare` (KV k2 _) = k1 `compare` k2
    
    -- A sort that is guaranteed to be unstable by ensuring that the positions of
    -- equal elements are the reverse of their original order.
    unstableSort :: Ord a => [a] -> [a]
    unstableSort xs = map fst $ sort $ zip xs $ map Down [0 :: Int ..]
    
    -- Given an unstable sort and a list, stably sort the list.  We do this by
    -- forcing the unstable sort to treat the original position of each element as a
    -- secondary key.
    stabilize :: Ord a => ([(a, Int)] -> [(a, Int)]) -> [a] -> [a]
    stabilize f xs = map fst $ f $ zip xs [0..]
    
    -- Stably sort the argument list.
    stableSort :: Ord a => [a] -> [a]
    stableSort = stabilize unstableSort
    
    main :: IO ()
    main = do
      let kvs = zipWith ($) [KV 'c', KV 'b', KV 'a', KV 'b'] [0 :: Int ..]
      putStrLn $ "Original order: " ++ show kvs
    
      -- Result will be [KV 'a' 2, KV 'b' 3, KV 'b' 1,KV 'c' 0], where
      -- KV 'b' 3 appears before KV 'b' 1, which is the reverse of the original
      -- order.
      putStrLn $ " Unstable sort: " ++ show (unstableSort kvs)
    
      -- Result will be [KV 'a' 2, KV 'b' 1, KV 'b' 3,KV 'c' 0], where
      -- KV 'b' 3 appears after KV 'b' 1, which matches the original order
      -- (i.e. is stable).
      putStrLn $ "   Stable sort: " ++ show (stableSort kvs)
    
    $ ./stable
    Original order: [KV 'c' 0,KV 'b' 1,KV 'a' 2,KV 'b' 3]
     Unstable sort: [KV 'a' 2,KV 'b' 3,KV 'b' 1,KV 'c' 0]
       Stable sort: [KV 'a' 2,KV 'b' 1,KV 'b' 3,KV 'c' 0]
    
  2. Daniel said

    Here’s a solution in C (with the nested function GCC extension).

    The data in the array to be sorted is not copied. Rather, arrays of indices and pointers are used. This approach seemingly complicated my solution, but would use less memory for large data types.

    /* stable_sort.c */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    typedef struct {
      size_t index;
      void* ptr;
    } indexed_ptr_t;
    
    typedef struct {
      int first;
      int second;
    } pair_t;
    
    int compare_pair_firsts(const void* a, const void* b) {
      return ((pair_t*)a)->first - ((pair_t*)b)->first;
    }
    
    void stable_sort(void* base,
                     size_t nel,
                     size_t width,
                     int (*compar)(const void*, const void*),
                     int (*sort)(void*,
                                 size_t,
                                 size_t,
                                 int (*)(const void*, const void*))) {
      indexed_ptr_t* sorted = malloc(nel*sizeof(indexed_ptr_t));
      for (size_t i = 0; i < nel; ++i) {
        sorted[i] = (indexed_ptr_t){i, (char*)base + (i*width)};
      }
      int compar_indexed_ptr(const void* a, const void* b) {
        const indexed_ptr_t* a_ = (indexed_ptr_t*)a;
        const indexed_ptr_t* b_ = (indexed_ptr_t*)b;
        int result = compar(a_->ptr, b_->ptr);
        if (result == 0) {
          if (a_->index > b_->index) {
            result = 1;
          } else if (a_->index < b_->index) {
            result = -1;
          }
        }
        return result;
      }
      sort(sorted, nel, sizeof(indexed_ptr_t), compar_indexed_ptr);
      // copy results from 'sorted' into 'base'.
      // this is done inplace, by using an array with indices.
      // this could be simplified with a a copy of 'base'.
      size_t* indexes = (size_t*)malloc(nel*sizeof(size_t));
      size_t index_tmp;
      void* data_tmp_p = malloc(width);
      for (size_t i = 0; i < nel; ++i) {
        indexes[sorted[i].index] = i;
      }
      for (size_t i = 0; i < nel; ++i) {
        while (indexes[i] != i) {
          size_t index_i = indexes[i];
          index_tmp = indexes[index_i];
          memcpy(data_tmp_p, (char*)base + (width*index_i), width);
          indexes[index_i] = index_i;
          memcpy((char*)base + (width*index_i), (char*)base + (width*i), width);
          indexes[i] = index_tmp;
          memcpy((char*)base + (width*i), data_tmp_p, width);
        }
      }
      free(sorted);
      free(indexes);
      free(data_tmp_p);
    }
    
    void print_pair_array(const pair_t* array,
                          const size_t n) {
      printf("[");
      for (size_t i = 0; i < n; ++i) {
        if (i > 0) printf(", ");
        printf("(%d,%d)", array[i].first, array[i].second);
      }
      printf("]\n");
    }
    
    int main(void) {
      pair_t array[] = {
        {3,1},{2,1},{2,2},{1,1},{3,2},{2,3},{1,2},{1,3},{3,3}
      };
      size_t n = sizeof(array) / sizeof(pair_t);
      pair_t sorted[n];
      printf("array:\n  ");
      print_pair_array(array, n);
    
      // heapsort
      memcpy(sorted, array, n*sizeof(pair_t));
      heapsort(sorted, n, sizeof(pair_t), compare_pair_firsts);
      printf("heapsort(array):\n  ");
      print_pair_array(sorted, n);
    
      // stable heapsort
      memcpy(sorted, array, n*sizeof(pair_t));
      stable_sort(sorted, n, sizeof(pair_t), compare_pair_firsts, heapsort);
      printf("stable_sort(array,heapsort):\n  ");
      print_pair_array(sorted, n);
    
      return EXIT_SUCCESS;
    }
    

    Output:

    array:
      [(3,1), (2,1), (2,2), (1,1), (3,2), (2,3), (1,2), (1,3), (3,3)]
    heapsort(array):
      [(1,2), (1,1), (1,3), (2,2), (2,3), (2,1), (3,2), (3,3), (3,1)]
    stable_sort(array,heapsort):
      [(1,1), (1,2), (1,3), (2,1), (2,2), (2,3), (3,1), (3,2), (3,3)]
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google+ photo

You are commenting using your Google+ account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: