Array Rotation, Timing Tests

March 9, 2018

We begin by rewriting the three algorithms so they all have a common form, taking a vector and a rotation distance as arguments, with all the supporting code tucked neatly inside the main function:

(define (juggling vec dist)
  (let ((len (vector-length vec)))
    (do ((idx 0 (+ idx 1)))
        ((= idx (gcd dist len)) vec)
      (let ((temp (vector-ref vec idx)))
        (do ((lo idx hi)
             (hi (modulo (+ idx dist) len)
                 (modulo (+ hi dist) len)))
            ((= hi idx) (vector-set! vec lo temp))
          (vector-set! vec lo (vector-ref vec hi)))))))

(define (block-swap vec dist)
  (define (swap a b m)
    (do ((i 0 (+ i 1))) ((= i m) vec)
      (let ((t (vector-ref vec (+ a i))))
        (vector-set! vec (+ a i)
          (vector-ref vec (+ b i)))
        (vector-set! vec (+ b i) t))))
  (let ((len (vector-length vec)))
    (let loop ((i dist) (j (- len dist)))
      (cond ((< i j) (swap (- dist i) (+ dist j (- i)) i)
                     (loop i (- j i)))
            ((< j i) (swap (- dist i) dist j)
                     (loop (- i j) j))
            (else (swap (- dist i) dist i) vec)))))

(define (reversal vec dist)
  (define (swap i j)
    (let ((t (vector-ref vec i)))
      (vector-set! vec i
        (vector-ref vec j))
      (vector-set! vec j t)))
  (define (reverse lo hi)
    (when (< lo hi)
      (swap lo hi)
      (reverse (+ lo 1) (- hi 1))))
  (let ((len (vector-length vec)))
    (reverse 0 (- dist 1))
    (reverse dist (- len 1))
    (reverse 0 (- len 1)))
    vec)

Next we write a timing function, using the Chez Scheme (cpu-time) function to calculate the times:

(define (timing rotate vec dist)
  (let ((start (cpu-time)))
    (rotate vec dist)
    (- (cpu-time) start)))

And now we are ready for some timings:

> (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing juggling vec 100)) (range 50)))
(109 94 109 94 109 93 94 109 94 109 109 94 109 94 109 93 110
 93 94 109 94 93 109 94 109 94 109 94 93 125 94 93 109 94 94
 109 93 94 109 94 93 110 93 125 109 94 109 94 93 109)
> (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing block-swap vec 100)) (range 50)))
(187 171 172 187 187 172 203 203 171 187 188 187 171 203 172
 187 172 187 171 188 187 171 188 171 203 187 172 187 172 187
 171 188 187 187 187 172 187 187 172 187 172 187 171 188 187
 171 203 187 188 187)
> (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing reversal vec 100)) (range 50)))
(140 156 141 156 140 141 140 156 140 156 141 140 156 125 156
 156 140 156 141 140 156 141 140 140 172 140 141 140 156 141
 140 156 140 141 156 140 141 156 156 140 140 141 156 156 156
 140 156 156 187 141)

So the juggling algorithm is fastest, the block-swap algorithm is slowest, and the reversal algorithm is somewhere in the middle. Our result differs from Bentley, who found that block-swap was just a little bit faster than reversal, and both were much faster than juggling. Bentley explained the differences based on CPU memory-cache effects; apparently Scheme is a higher-level language than C and is less influenced by caching.

Despite the timings, I will continue to use the reversal algorithm when I need to rotate an array. It’s time is competitive with juggling, and the code is very much simpler; in fact, it’s hard to see how you could get the reversal code wrong, whereas the juggling code still baffles me, just a little bit, even after reading Bentley’s explanation and copying his implementation.

You can run the program at https://ideone.com/DQLleZ.

Posted by programmingpraxis

Filed in Exercises

3 Comments »

3 Responses to “Array Rotation, Timing Tests”

gambiteer said
March 10, 2018 at 3:52 AM
I compiled the code with Gambit, added declarations

(declare (standard-bindings)
(extended-bindings)
(block)
(fixnum)
(not safe))

except for

(define (timing rotate vec dist)
(declare (generic))
(let ((start (cpu-time)))
(rotate vec dist)
(- (cpu-time) start)))

and got times

(let ((vec (list->vector (range 1000000))))
(map (lambda (x) (timing juggling vec 100)) (range 50)))
(.029591999999993845
.02927400000000091
.027829000000011206
.03642400000001089
.024536999999995146
.02830999999999051
.029009999999999536
.02474899999999991
.02875699999999881
.0312900000000127
.02902900000000841
.028691000000009126
.028640999999993255
.026204000000007
.024539000000018518
.02653800000000217
.028767000000001985
.029217000000002713
.027430000000009613
.025491999999999848
.02925899999999615
.028920999999982655
.024906000000001427
.03006200000000092
.027745999999993387
.03249999999999886
.025558000000003744
.030752000000006774
.025658000000007064
.025202000000007274
.02870499999998799
.02643400000000895
.02801700000000551
.025395000000003165
.027732999999997787
.02871199999999874
.024641000000002578
.02881400000001122
.0244689999999963
.024765000000002146
.02464700000000164
.02467299999999284
.02451000000000647
.02820400000000234
.02889599999998893
.02443200000000445
.02453400000000272
.029072999999982585
.024445999999997525
.02456900000001383)

(let ((vec (list->vector (range 1000000))))
(map (lambda (x) (timing block-swap vec 100)) (range 50)))
(.005112999999994372
.006988000000006878
.005129000000010819
.00770599999999888
.006287999999997851
.005140999999994733
.005306000000004474
.006874000000010483
.0067049999999824195
.005473999999992429
.005122999999997546
.006666999999993095
.006698999999997568
.005892000000002895
.005227000000004978
.00513300000000072
.0065400000000011005
.005146000000010531
.005233999999987304
.005146999999993795
.00519599999999798
.0053230000000041855
.006732999999996991
.00784100000001331
.0065659999999923
.005431999999998993
.005251000000001227
.005134999999995671
.005264999999994302
.005147000000008006
.0053039999999953125
.005131999999989034
.005214999999992642
.005148999999988746
.005223000000000866
.0052220000000176015
.005243000000007214
.005140999999994733
.005228999999999928
.00518300000000238
.00517000000000678
.0060610000000167474
.005145000000013056
.0052450000000163755
.009970000000009804
.005146000000010531
.005279999999999063
.005134999999995671
.005239000000003102
.005659999999991783)
(let ((vec (list->vector (range 1000000))))
(map (lambda (x) (timing reversal vec 100)) (range 50)))
(.0030000000000001137
.0031049999999908096
.003112000000001558
.003090000000000259
.003170999999980495
.003987000000009289
.0032210000000105765
.0033960000000092805
.0032839999999936254
.0030319999999903757
.003071000000005597
.0032810000000011996
.003694000000010078
.0031510000000167793
.003117000000003145
.0030380000000178597
.0031689999999997553
.003107999999997446
.0033580000000057453
.003789999999995075
.00334400000001267
.0033090000000015607
.0031930000000102154
.0033829999999994698
.003629000000003657
.003005999999999176
.0031090000000091322
.0031000000000034333
.0039209999999911815
.0031829999999928305
.003528000000002862
.0032950000000084856
.0031599999999940565
.0030909999999977344
.003120999999993046
.0034139999999922566
.003697000000002504
.00402800000000525
.003558999999995649
.0033030000000167092
.0032910000000043738
.00314399999999182
.0032059999999916045
.002983000000000402
.003740000000007626
.003374999999991246
.0037559999999956517
.0033249999999895863
.003296999999989225
.003203999999996654)

which is closer to Bentley’s results

Daniel said

April 1, 2018 at 10:58 PM

Here’s a solution in C.

The output times follow the code.

The swapping approach worked fastest of my implementations.

I was able to improve the speed of the juggle approach by using subtraction to calculate the remainder instead of using the modulus operator.

I also added code to validate that my rotation implementations are working correctly.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

void rotate_juggling(int* array, size_t n, size_t i) {
  if (n == 0) return;
  size_t n_moved = 0;
  size_t start_idx = 0;
  while (1) {
    int tmp = array[start_idx];
    size_t dest_idx = start_idx;
    size_t source_idx = start_idx + i;
    if (source_idx >= n) source_idx -= n;
    while (1) {
      ++n_moved;
      if (source_idx == start_idx) {
        array[dest_idx] = tmp;
        break;
      }
      array[dest_idx] = array[source_idx];
      dest_idx = source_idx;
      source_idx = source_idx + i;
      if (source_idx >= n) source_idx -= n;
    }
    if (n_moved == n) break;
    ++start_idx;
  }
}

static void swap(int* a1, int* a2, size_t n) {
  for (size_t i = 0; i < n; ++i) {
    int tmp = a1[i];
    a1[i] = a2[i];
    a2[i] = tmp;
  }
}

void rotate_swap(int* array, size_t n, size_t i) {
  i %= n;
  while (1) {
    if (n <= 1 || i == 0) break;
    if (i == n - i) {
      swap(array, array + i, i);
      break;
    } else if (i < n - i) {
      swap(array, array + n - i, i);
      n -= i;
    } else {
      swap(array, array + i, n - i);
      array += n - i;
      size_t n_ = n;
      n = i;
      i -= n_ - i;
    }
  }
}

static void reverse(int* array, size_t n) {
  for (size_t i = 0; i < n / 2; ++i) {
    int tmp = array[i];
    array[i] = array[n - i - 1];
    array[n - i - 1] = tmp;
  }
}

void rotate_reverse(int* array, size_t n, size_t i) {
  reverse(array, i);
  reverse(array + i, n - i);
  reverse(array, n);
}

clock_t calc_rotate_time(void (*rotate) (int*, size_t, size_t),
                        int* array,
                        size_t n,
                        size_t i,
                        size_t iterations) {
  int* array_ = malloc(sizeof(int) * n);
  clock_t total = 0;
  for (size_t iter = 0; iter < iterations; ++iter) {
    memcpy(array_, array, n * sizeof(int));
    clock_t start = clock();
    rotate(array, n, i);
    clock_t end = clock();
    total += end - start;
  }
  free(array_);
  return (double)total;
}

int main(void) {
  size_t iterations = 1000;
  size_t n = 10000;
  int* array = malloc(n * sizeof(int));
  for (size_t idx = 0; idx < n; ++idx) {
    array[idx] = idx;
  }
  size_t increment = 1000;
  void (*rotate_functions[])(int*,size_t,size_t) = {
    rotate_juggling,
    rotate_swap,
    rotate_reverse
  };
  char* rotate_function_names[] = {"juggle", "swap", "reverse"};
  size_t n_rotate_functions = sizeof(rotate_function_names) / sizeof(char*);
  printf("dist");
  for (size_t i = 0; i < n_rotate_functions; ++i) {
    printf("\t%s", rotate_function_names[i]);
  }
  printf("\n");
  for (size_t distance = increment; distance < n; distance += increment) {
    printf("%zu", distance);
    for (size_t i = 0; i < n_rotate_functions; ++i) {
      void (*rotate_function)(int*,size_t,size_t) = rotate_functions[i];
      // ***************************************
      // * Validate Rotation
      // ***************************************
      int* rotated = malloc(n * sizeof(int));
      memcpy(rotated, array, n * sizeof(int));
      rotate_function(rotated, n, distance);
      for (size_t k = 0; k < n; ++k) {
        assert(rotated[k] == array[(k+distance)%n]);
      }
      free(rotated);
      // ***************************************
      // * Time Rotation
      // ***************************************
      clock_t time = calc_rotate_time(
        rotate_function, array, n, distance, iterations);
      printf("\t%lu", time);
    }
    printf("\n");
  }
  free(array);
  return EXIT_SUCCESS;
}

Output (-O0):

dist    juggle  swap    reverse
1000    39265   25692   31132
2000    32060   19986   29618
3000    29878   27256   33861
4000    30236   20669   30122
5000    32395   12986   29730
6000    33031   20343   33086
7000    38557   28355   37712
8000    35738   21311   33593
9000    36285   22194   29386

Output (-O2):

dist    juggle  swap    reverse
1000    17076   3093    9534
2000    10190   2641    7591
3000    9402    2594    7187
4000    8445    2308    7191
5000    10205   1757    7581
6000    9354    2572    7175
7000    8411    2166    7001
8000    8927    2396    7330
9000    8956    2326    7605

Daniel said
April 1, 2018 at 11:03 PM
My calc_rotate_time function casts the result to a double in the return statement. That should be removed. An earlier implementation was calculating seconds, and the cast to double was there to bypass integer division. Removing the cast to double won’t have a substantive effect on the numbers reported above.

Programming Praxis