Stable Sort

March 23, 2018

Before we get started, we need some data that will demonstrate whether a sort is stable or unstable. Here is a list of pairs; the second elements of the pairs are in decreasing order. We also provide an ordering function that ignores the second elements of the pairs:

(define xs '(
  (alfa zulu) (bravo yankee) (charlie xray) (delta whiskey) (echo uniform)
  (alfa tango) (bravo sierra) (charlie romeo) (delta papa) (echo oscar)))

 

(define (lt? a b)
  (stringstring (car a))
    (symbol->string (car b))))

Chez Scheme uses merge sort for its native sorting algorithm, and is stable. We need an unstable sorting algorithm for testing; here’s a naive quicksort, O(n²) on already-sorted data:

(define (qsort lt? xs)
  (if (or (null? xs) (null? (cdr xs))) xs
    (let ((pivot (car xs)))
      (let loop ((xs (cdr xs)) (lesser (list)) (equal-or-greater (list)))
        (if (null? xs) (append (qsort lt? lesser) (list pivot) (qsort lt? equal-or-greater))
          (if (lt? (car xs) pivot) (loop (cdr xs) (cons (car xs) lesser) equal-or-greater)
            (loop (cdr xs) lesser (cons (car xs) equal-or-greater))))))))

You can see that qsort is unstable; both delta and echo are in reverse order of their input:

> (qsort lt? xs)
((alfa zulu) (alfa tango) (bravo yankee) (bravo sierra) (charlie xray)
  (charlie romeo) (delta papa) (delta whiskey) (echo oscar)
  (echo uniform))

We are ready to show our stable-sorting wrapper. It’s arguments are a non-stable sorting algorithm, a less-than predicate, and a list to be sorted:

(define (stable-sort sort lt? xs)
  (define (less? a b)
    (or (lt? (cadr a) (cadr b))
        (and (not (lt? (cadr b) (cadr a)))
             (< (car a) (car b)))))
  (map cadr (sort less? (zip (range (length xs)) xs))))

Zip augments each data item with the index at which it appears in the input, sort calls the unstable sorting algorithm with a special ordering predicate, and map removes the index. The less? predicate uses lt? unless the two items compare equal, in which case it compares the initial indexes of the two items.

You can see that the sort is stable; at each pair of identical first elements, the second elements are in decreasing order, including delta and echo:

> (stable-sort qsort lt? xs)
((alfa zulu) (alfa tango) (bravo yankee) (bravo sierra) (charlie xray)
  (charlie romeo) (delta whiskey) (delta papa) (echo uniform)
  (echo oscar))

You can run the program at https://ideone.com/x5cN53.

Advertisement

Pages: 1 2

2 Responses to “Stable Sort”

  1. Globules said

    Here’s a Haskell version. We write a sorting function that is guaranteed to be unstable, then apply a “stabilizing” function to it, resulting in a stable sort.

    import Data.List (sort)
    import Data.Ord (Down(..))
    
    -- A key and value.  We'll define the ordering of KVs to only consider the key,
    -- so that we can use the value to record the order in which instances having
    -- equal keys originally appeared.
    data KV a b = KV a b deriving (Show)
    
    -- Compare only on the key, not on the value.
    instance Eq a => Eq (KV a b) where
      (KV k1 _) == (KV k2 _) = k1 == k2
    
    -- Compare only on the key, not on the value.
    instance Ord a => Ord (KV a b) where
      (KV k1 _) `compare` (KV k2 _) = k1 `compare` k2
    
    -- A sort that is guaranteed to be unstable by ensuring that the positions of
    -- equal elements are the reverse of their original order.
    unstableSort :: Ord a => [a] -> [a]
    unstableSort xs = map fst $ sort $ zip xs $ map Down [0 :: Int ..]
    
    -- Given an unstable sort and a list, stably sort the list.  We do this by
    -- forcing the unstable sort to treat the original position of each element as a
    -- secondary key.
    stabilize :: Ord a => ([(a, Int)] -> [(a, Int)]) -> [a] -> [a]
    stabilize f xs = map fst $ f $ zip xs [0..]
    
    -- Stably sort the argument list.
    stableSort :: Ord a => [a] -> [a]
    stableSort = stabilize unstableSort
    
    main :: IO ()
    main = do
      let kvs = zipWith ($) [KV 'c', KV 'b', KV 'a', KV 'b'] [0 :: Int ..]
      putStrLn $ "Original order: " ++ show kvs
    
      -- Result will be [KV 'a' 2, KV 'b' 3, KV 'b' 1,KV 'c' 0], where
      -- KV 'b' 3 appears before KV 'b' 1, which is the reverse of the original
      -- order.
      putStrLn $ " Unstable sort: " ++ show (unstableSort kvs)
    
      -- Result will be [KV 'a' 2, KV 'b' 1, KV 'b' 3,KV 'c' 0], where
      -- KV 'b' 3 appears after KV 'b' 1, which matches the original order
      -- (i.e. is stable).
      putStrLn $ "   Stable sort: " ++ show (stableSort kvs)
    
    $ ./stable
    Original order: [KV 'c' 0,KV 'b' 1,KV 'a' 2,KV 'b' 3]
     Unstable sort: [KV 'a' 2,KV 'b' 3,KV 'b' 1,KV 'c' 0]
       Stable sort: [KV 'a' 2,KV 'b' 1,KV 'b' 3,KV 'c' 0]
    
  2. Daniel said

    Here’s a solution in C (with the nested function GCC extension).

    The data in the array to be sorted is not copied. Rather, arrays of indices and pointers are used. This approach seemingly complicated my solution, but would use less memory for large data types.

    /* stable_sort.c */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    typedef struct {
      size_t index;
      void* ptr;
    } indexed_ptr_t;
    
    typedef struct {
      int first;
      int second;
    } pair_t;
    
    int compare_pair_firsts(const void* a, const void* b) {
      return ((pair_t*)a)->first - ((pair_t*)b)->first;
    }
    
    void stable_sort(void* base,
                     size_t nel,
                     size_t width,
                     int (*compar)(const void*, const void*),
                     int (*sort)(void*,
                                 size_t,
                                 size_t,
                                 int (*)(const void*, const void*))) {
      indexed_ptr_t* sorted = malloc(nel*sizeof(indexed_ptr_t));
      for (size_t i = 0; i < nel; ++i) {
        sorted[i] = (indexed_ptr_t){i, (char*)base + (i*width)};
      }
      int compar_indexed_ptr(const void* a, const void* b) {
        const indexed_ptr_t* a_ = (indexed_ptr_t*)a;
        const indexed_ptr_t* b_ = (indexed_ptr_t*)b;
        int result = compar(a_->ptr, b_->ptr);
        if (result == 0) {
          if (a_->index > b_->index) {
            result = 1;
          } else if (a_->index < b_->index) {
            result = -1;
          }
        }
        return result;
      }
      sort(sorted, nel, sizeof(indexed_ptr_t), compar_indexed_ptr);
      // copy results from 'sorted' into 'base'.
      // this is done inplace, by using an array with indices.
      // this could be simplified with a a copy of 'base'.
      size_t* indexes = (size_t*)malloc(nel*sizeof(size_t));
      size_t index_tmp;
      void* data_tmp_p = malloc(width);
      for (size_t i = 0; i < nel; ++i) {
        indexes[sorted[i].index] = i;
      }
      for (size_t i = 0; i < nel; ++i) {
        while (indexes[i] != i) {
          size_t index_i = indexes[i];
          index_tmp = indexes[index_i];
          memcpy(data_tmp_p, (char*)base + (width*index_i), width);
          indexes[index_i] = index_i;
          memcpy((char*)base + (width*index_i), (char*)base + (width*i), width);
          indexes[i] = index_tmp;
          memcpy((char*)base + (width*i), data_tmp_p, width);
        }
      }
      free(sorted);
      free(indexes);
      free(data_tmp_p);
    }
    
    void print_pair_array(const pair_t* array,
                          const size_t n) {
      printf("[");
      for (size_t i = 0; i < n; ++i) {
        if (i > 0) printf(", ");
        printf("(%d,%d)", array[i].first, array[i].second);
      }
      printf("]\n");
    }
    
    int main(void) {
      pair_t array[] = {
        {3,1},{2,1},{2,2},{1,1},{3,2},{2,3},{1,2},{1,3},{3,3}
      };
      size_t n = sizeof(array) / sizeof(pair_t);
      pair_t sorted[n];
      printf("array:\n  ");
      print_pair_array(array, n);
    
      // heapsort
      memcpy(sorted, array, n*sizeof(pair_t));
      heapsort(sorted, n, sizeof(pair_t), compare_pair_firsts);
      printf("heapsort(array):\n  ");
      print_pair_array(sorted, n);
    
      // stable heapsort
      memcpy(sorted, array, n*sizeof(pair_t));
      stable_sort(sorted, n, sizeof(pair_t), compare_pair_firsts, heapsort);
      printf("stable_sort(array,heapsort):\n  ");
      print_pair_array(sorted, n);
    
      return EXIT_SUCCESS;
    }
    

    Output:

    array:
      [(3,1), (2,1), (2,2), (1,1), (3,2), (2,3), (1,2), (1,3), (3,3)]
    heapsort(array):
      [(1,2), (1,1), (1,3), (2,2), (2,3), (2,1), (3,2), (3,3), (3,1)]
    stable_sort(array,heapsort):
      [(1,1), (1,2), (1,3), (2,1), (2,2), (2,3), (3,1), (3,2), (3,3)]
    

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: