Array Rotation, Timing Tests

March 9, 2018

We begin by rewriting the three algorithms so they all have a common form, taking a vector and a rotation distance as arguments, with all the supporting code tucked neatly inside the main function:

(define (juggling vec dist)
  (let ((len (vector-length vec)))
    (do ((idx 0 (+ idx 1)))
        ((= idx (gcd dist len)) vec)
      (let ((temp (vector-ref vec idx)))
        (do ((lo idx hi)
             (hi (modulo (+ idx dist) len)
                 (modulo (+ hi dist) len)))
            ((= hi idx) (vector-set! vec lo temp))
          (vector-set! vec lo (vector-ref vec hi)))))))
(define (block-swap vec dist)
  (define (swap a b m)
    (do ((i 0 (+ i 1))) ((= i m) vec)
      (let ((t (vector-ref vec (+ a i))))
        (vector-set! vec (+ a i)
          (vector-ref vec (+ b i)))
        (vector-set! vec (+ b i) t))))
  (let ((len (vector-length vec)))
    (let loop ((i dist) (j (- len dist)))
      (cond ((< i j) (swap (- dist i) (+ dist j (- i)) i)
                     (loop i (- j i)))
            ((< j i) (swap (- dist i) dist j)
                     (loop (- i j) j))
            (else (swap (- dist i) dist i) vec)))))
(define (reversal vec dist)
  (define (swap i j)
    (let ((t (vector-ref vec i)))
      (vector-set! vec i
        (vector-ref vec j))
      (vector-set! vec j t)))
  (define (reverse lo hi)
    (when (< lo hi)
      (swap lo hi)
      (reverse (+ lo 1) (- hi 1))))
  (let ((len (vector-length vec)))
    (reverse 0 (- dist 1))
    (reverse dist (- len 1))
    (reverse 0 (- len 1)))
    vec)

Next we write a timing function, using the Chez Scheme (cpu-time) function to calculate the times:

(define (timing rotate vec dist)
  (let ((start (cpu-time)))
    (rotate vec dist)
    (- (cpu-time) start)))

And now we are ready for some timings:

> (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing juggling vec 100)) (range 50)))
(109 94 109 94 109 93 94 109 94 109 109 94 109 94 109 93 110
 93 94 109 94 93 109 94 109 94 109 94 93 125 94 93 109 94 94
 109 93 94 109 94 93 110 93 125 109 94 109 94 93 109)
> (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing block-swap vec 100)) (range 50)))
(187 171 172 187 187 172 203 203 171 187 188 187 171 203 172
 187 172 187 171 188 187 171 188 171 203 187 172 187 172 187
 171 188 187 187 187 172 187 187 172 187 172 187 171 188 187
 171 203 187 188 187)
> (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing reversal vec 100)) (range 50)))
(140 156 141 156 140 141 140 156 140 156 141 140 156 125 156
 156 140 156 141 140 156 141 140 140 172 140 141 140 156 141
 140 156 140 141 156 140 141 156 156 140 140 141 156 156 156
 140 156 156 187 141)

So the juggling algorithm is fastest, the block-swap algorithm is slowest, and the reversal algorithm is somewhere in the middle. Our result differs from Bentley, who found that block-swap was just a little bit faster than reversal, and both were much faster than juggling. Bentley explained the differences based on CPU memory-cache effects; apparently Scheme is a higher-level language than C and is less influenced by caching.

Despite the timings, I will continue to use the reversal algorithm when I need to rotate an array. It’s time is competitive with juggling, and the code is very much simpler; in fact, it’s hard to see how you could get the reversal code wrong, whereas the juggling code still baffles me, just a little bit, even after reading Bentley’s explanation and copying his implementation.

You can run the program at https://ideone.com/DQLleZ.

Advertisement

Pages: 1 2

3 Responses to “Array Rotation, Timing Tests”

  1. gambiteer said

    I compiled the code with Gambit, added declarations

    (declare (standard-bindings)
    (extended-bindings)
    (block)
    (fixnum)
    (not safe))

    except for

    (define (timing rotate vec dist)
    (declare (generic))
    (let ((start (cpu-time)))
    (rotate vec dist)
    (- (cpu-time) start)))

    and got times

    (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing juggling vec 100)) (range 50)))
    (.029591999999993845
    .02927400000000091
    .027829000000011206
    .03642400000001089
    .024536999999995146
    .02830999999999051
    .029009999999999536
    .02474899999999991
    .02875699999999881
    .0312900000000127
    .02902900000000841
    .028691000000009126
    .028640999999993255
    .026204000000007
    .024539000000018518
    .02653800000000217
    .028767000000001985
    .029217000000002713
    .027430000000009613
    .025491999999999848
    .02925899999999615
    .028920999999982655
    .024906000000001427
    .03006200000000092
    .027745999999993387
    .03249999999999886
    .025558000000003744
    .030752000000006774
    .025658000000007064
    .025202000000007274
    .02870499999998799
    .02643400000000895
    .02801700000000551
    .025395000000003165
    .027732999999997787
    .02871199999999874
    .024641000000002578
    .02881400000001122
    .0244689999999963
    .024765000000002146
    .02464700000000164
    .02467299999999284
    .02451000000000647
    .02820400000000234
    .02889599999998893
    .02443200000000445
    .02453400000000272
    .029072999999982585
    .024445999999997525
    .02456900000001383)

    (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing block-swap vec 100)) (range 50)))
    (.005112999999994372
    .006988000000006878
    .005129000000010819
    .00770599999999888
    .006287999999997851
    .005140999999994733
    .005306000000004474
    .006874000000010483
    .0067049999999824195
    .005473999999992429
    .005122999999997546
    .006666999999993095
    .006698999999997568
    .005892000000002895
    .005227000000004978
    .00513300000000072
    .0065400000000011005
    .005146000000010531
    .005233999999987304
    .005146999999993795
    .00519599999999798
    .0053230000000041855
    .006732999999996991
    .00784100000001331
    .0065659999999923
    .005431999999998993
    .005251000000001227
    .005134999999995671
    .005264999999994302
    .005147000000008006
    .0053039999999953125
    .005131999999989034
    .005214999999992642
    .005148999999988746
    .005223000000000866
    .0052220000000176015
    .005243000000007214
    .005140999999994733
    .005228999999999928
    .00518300000000238
    .00517000000000678
    .0060610000000167474
    .005145000000013056
    .0052450000000163755
    .009970000000009804
    .005146000000010531
    .005279999999999063
    .005134999999995671
    .005239000000003102
    .005659999999991783)
    (let ((vec (list->vector (range 1000000))))
    (map (lambda (x) (timing reversal vec 100)) (range 50)))
    (.0030000000000001137
    .0031049999999908096
    .003112000000001558
    .003090000000000259
    .003170999999980495
    .003987000000009289
    .0032210000000105765
    .0033960000000092805
    .0032839999999936254
    .0030319999999903757
    .003071000000005597
    .0032810000000011996
    .003694000000010078
    .0031510000000167793
    .003117000000003145
    .0030380000000178597
    .0031689999999997553
    .003107999999997446
    .0033580000000057453
    .003789999999995075
    .00334400000001267
    .0033090000000015607
    .0031930000000102154
    .0033829999999994698
    .003629000000003657
    .003005999999999176
    .0031090000000091322
    .0031000000000034333
    .0039209999999911815
    .0031829999999928305
    .003528000000002862
    .0032950000000084856
    .0031599999999940565
    .0030909999999977344
    .003120999999993046
    .0034139999999922566
    .003697000000002504
    .00402800000000525
    .003558999999995649
    .0033030000000167092
    .0032910000000043738
    .00314399999999182
    .0032059999999916045
    .002983000000000402
    .003740000000007626
    .003374999999991246
    .0037559999999956517
    .0033249999999895863
    .003296999999989225
    .003203999999996654)

    which is closer to Bentley’s results

  2. Daniel said

    Here’s a solution in C.

    The output times follow the code.

    The swapping approach worked fastest of my implementations.

    I was able to improve the speed of the juggle approach by using subtraction to calculate the remainder instead of using the modulus operator.

    I also added code to validate that my rotation implementations are working correctly.

    #include <assert.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <time.h>
    
    void rotate_juggling(int* array, size_t n, size_t i) {
      if (n == 0) return;
      size_t n_moved = 0;
      size_t start_idx = 0;
      while (1) {
        int tmp = array[start_idx];
        size_t dest_idx = start_idx;
        size_t source_idx = start_idx + i;
        if (source_idx >= n) source_idx -= n;
        while (1) {
          ++n_moved;
          if (source_idx == start_idx) {
            array[dest_idx] = tmp;
            break;
          }
          array[dest_idx] = array[source_idx];
          dest_idx = source_idx;
          source_idx = source_idx + i;
          if (source_idx >= n) source_idx -= n;
        }
        if (n_moved == n) break;
        ++start_idx;
      }
    }
    
    static void swap(int* a1, int* a2, size_t n) {
      for (size_t i = 0; i < n; ++i) {
        int tmp = a1[i];
        a1[i] = a2[i];
        a2[i] = tmp;
      }
    }
    
    void rotate_swap(int* array, size_t n, size_t i) {
      i %= n;
      while (1) {
        if (n <= 1 || i == 0) break;
        if (i == n - i) {
          swap(array, array + i, i);
          break;
        } else if (i < n - i) {
          swap(array, array + n - i, i);
          n -= i;
        } else {
          swap(array, array + i, n - i);
          array += n - i;
          size_t n_ = n;
          n = i;
          i -= n_ - i;
        }
      }
    }
    
    static void reverse(int* array, size_t n) {
      for (size_t i = 0; i < n / 2; ++i) {
        int tmp = array[i];
        array[i] = array[n - i - 1];
        array[n - i - 1] = tmp;
      }
    }
    
    void rotate_reverse(int* array, size_t n, size_t i) {
      reverse(array, i);
      reverse(array + i, n - i);
      reverse(array, n);
    }
    
    clock_t calc_rotate_time(void (*rotate) (int*, size_t, size_t),
                            int* array,
                            size_t n,
                            size_t i,
                            size_t iterations) {
      int* array_ = malloc(sizeof(int) * n);
      clock_t total = 0;
      for (size_t iter = 0; iter < iterations; ++iter) {
        memcpy(array_, array, n * sizeof(int));
        clock_t start = clock();
        rotate(array, n, i);
        clock_t end = clock();
        total += end - start;
      }
      free(array_);
      return (double)total;
    }
    
    int main(void) {
      size_t iterations = 1000;
      size_t n = 10000;
      int* array = malloc(n * sizeof(int));
      for (size_t idx = 0; idx < n; ++idx) {
        array[idx] = idx;
      }
      size_t increment = 1000;
      void (*rotate_functions[])(int*,size_t,size_t) = {
        rotate_juggling,
        rotate_swap,
        rotate_reverse
      };
      char* rotate_function_names[] = {"juggle", "swap", "reverse"};
      size_t n_rotate_functions = sizeof(rotate_function_names) / sizeof(char*);
      printf("dist");
      for (size_t i = 0; i < n_rotate_functions; ++i) {
        printf("\t%s", rotate_function_names[i]);
      }
      printf("\n");
      for (size_t distance = increment; distance < n; distance += increment) {
        printf("%zu", distance);
        for (size_t i = 0; i < n_rotate_functions; ++i) {
          void (*rotate_function)(int*,size_t,size_t) = rotate_functions[i];
          // ***************************************
          // * Validate Rotation
          // ***************************************
          int* rotated = malloc(n * sizeof(int));
          memcpy(rotated, array, n * sizeof(int));
          rotate_function(rotated, n, distance);
          for (size_t k = 0; k < n; ++k) {
            assert(rotated[k] == array[(k+distance)%n]);
          }
          free(rotated);
          // ***************************************
          // * Time Rotation
          // ***************************************
          clock_t time = calc_rotate_time(
            rotate_function, array, n, distance, iterations);
          printf("\t%lu", time);
        }
        printf("\n");
      }
      free(array);
      return EXIT_SUCCESS;
    }
    

    Output (-O0):

    dist    juggle  swap    reverse
    1000    39265   25692   31132
    2000    32060   19986   29618
    3000    29878   27256   33861
    4000    30236   20669   30122
    5000    32395   12986   29730
    6000    33031   20343   33086
    7000    38557   28355   37712
    8000    35738   21311   33593
    9000    36285   22194   29386
    

    Output (-O2):

    dist    juggle  swap    reverse
    1000    17076   3093    9534
    2000    10190   2641    7591
    3000    9402    2594    7187
    4000    8445    2308    7191
    5000    10205   1757    7581
    6000    9354    2572    7175
    7000    8411    2166    7001
    8000    8927    2396    7330
    9000    8956    2326    7605
    
  3. Daniel said

    My calc_rotate_time function casts the result to a double in the return statement. That should be removed. An earlier implementation was calculating seconds, and the cast to double was there to bypass integer division. Removing the cast to double won’t have a substantive effect on the numbers reported above.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: