Stable Sort
March 23, 2018
Before we get started, we need some data that will demonstrate whether a sort is stable or unstable. Here is a list of pairs; the second elements of the pairs are in decreasing order. We also provide an ordering function that ignores the second elements of the pairs:
(define xs '( (alfa zulu) (bravo yankee) (charlie xray) (delta whiskey) (echo uniform) (alfa tango) (bravo sierra) (charlie romeo) (delta papa) (echo oscar)))
(define (lt? a b)
(stringstring (car a))
(symbol->string (car b))))
Chez Scheme uses merge sort for its native sorting algorithm, and is stable. We need an unstable sorting algorithm for testing; here’s a naive quicksort, O(n²) on already-sorted data:
(define (qsort lt? xs)
(if (or (null? xs) (null? (cdr xs))) xs
(let ((pivot (car xs)))
(let loop ((xs (cdr xs)) (lesser (list)) (equal-or-greater (list)))
(if (null? xs) (append (qsort lt? lesser) (list pivot) (qsort lt? equal-or-greater))
(if (lt? (car xs) pivot) (loop (cdr xs) (cons (car xs) lesser) equal-or-greater)
(loop (cdr xs) lesser (cons (car xs) equal-or-greater))))))))
You can see that qsort is unstable; both delta and echo are in reverse order of their input:
> (qsort lt? xs) ((alfa zulu) (alfa tango) (bravo yankee) (bravo sierra) (charlie xray) (charlie romeo) (delta papa) (delta whiskey) (echo oscar) (echo uniform))
We are ready to show our stable-sorting wrapper. It’s arguments are a non-stable sorting algorithm, a less-than predicate, and a list to be sorted:
(define (stable-sort sort lt? xs)
(define (less? a b)
(or (lt? (cadr a) (cadr b))
(and (not (lt? (cadr b) (cadr a)))
(< (car a) (car b)))))
(map cadr (sort less? (zip (range (length xs)) xs))))
Zip augments each data item with the index at which it appears in the input, sort calls the unstable sorting algorithm with a special ordering predicate, and map removes the index. The less? predicate uses lt? unless the two items compare equal, in which case it compares the initial indexes of the two items.
You can see that the sort is stable; at each pair of identical first elements, the second elements are in decreasing order, including delta and echo:
> (stable-sort qsort lt? xs) ((alfa zulu) (alfa tango) (bravo yankee) (bravo sierra) (charlie xray) (charlie romeo) (delta whiskey) (delta papa) (echo uniform) (echo oscar))
You can run the program at https://ideone.com/x5cN53.
Here’s a Haskell version. We write a sorting function that is guaranteed to be unstable, then apply a “stabilizing” function to it, resulting in a stable sort.
Here’s a solution in C (with the nested function GCC extension).
The data in the array to be sorted is not copied. Rather, arrays of indices and pointers are used. This approach seemingly complicated my solution, but would use less memory for large data types.
/* stable_sort.c */ #include <stdio.h> #include <stdlib.h> #include <string.h> typedef struct { size_t index; void* ptr; } indexed_ptr_t; typedef struct { int first; int second; } pair_t; int compare_pair_firsts(const void* a, const void* b) { return ((pair_t*)a)->first - ((pair_t*)b)->first; } void stable_sort(void* base, size_t nel, size_t width, int (*compar)(const void*, const void*), int (*sort)(void*, size_t, size_t, int (*)(const void*, const void*))) { indexed_ptr_t* sorted = malloc(nel*sizeof(indexed_ptr_t)); for (size_t i = 0; i < nel; ++i) { sorted[i] = (indexed_ptr_t){i, (char*)base + (i*width)}; } int compar_indexed_ptr(const void* a, const void* b) { const indexed_ptr_t* a_ = (indexed_ptr_t*)a; const indexed_ptr_t* b_ = (indexed_ptr_t*)b; int result = compar(a_->ptr, b_->ptr); if (result == 0) { if (a_->index > b_->index) { result = 1; } else if (a_->index < b_->index) { result = -1; } } return result; } sort(sorted, nel, sizeof(indexed_ptr_t), compar_indexed_ptr); // copy results from 'sorted' into 'base'. // this is done inplace, by using an array with indices. // this could be simplified with a a copy of 'base'. size_t* indexes = (size_t*)malloc(nel*sizeof(size_t)); size_t index_tmp; void* data_tmp_p = malloc(width); for (size_t i = 0; i < nel; ++i) { indexes[sorted[i].index] = i; } for (size_t i = 0; i < nel; ++i) { while (indexes[i] != i) { size_t index_i = indexes[i]; index_tmp = indexes[index_i]; memcpy(data_tmp_p, (char*)base + (width*index_i), width); indexes[index_i] = index_i; memcpy((char*)base + (width*index_i), (char*)base + (width*i), width); indexes[i] = index_tmp; memcpy((char*)base + (width*i), data_tmp_p, width); } } free(sorted); free(indexes); free(data_tmp_p); } void print_pair_array(const pair_t* array, const size_t n) { printf("["); for (size_t i = 0; i < n; ++i) { if (i > 0) printf(", "); printf("(%d,%d)", array[i].first, array[i].second); } printf("]\n"); } int main(void) { pair_t array[] = { {3,1},{2,1},{2,2},{1,1},{3,2},{2,3},{1,2},{1,3},{3,3} }; size_t n = sizeof(array) / sizeof(pair_t); pair_t sorted[n]; printf("array:\n "); print_pair_array(array, n); // heapsort memcpy(sorted, array, n*sizeof(pair_t)); heapsort(sorted, n, sizeof(pair_t), compare_pair_firsts); printf("heapsort(array):\n "); print_pair_array(sorted, n); // stable heapsort memcpy(sorted, array, n*sizeof(pair_t)); stable_sort(sorted, n, sizeof(pair_t), compare_pair_firsts, heapsort); printf("stable_sort(array,heapsort):\n "); print_pair_array(sorted, n); return EXIT_SUCCESS; }Output: