Trailing Zero-Bits

July 7, 2020

The easiest solution is to loop through the trailing bits:

(define (zerobits n)
  (let loop ((n n) (b 0))
    (if (odd? n) b (loop (/ n 2) (+ b 1)))))

That takes time O(log n) to iterate through the bits of the number. A little trick using some bit-hackery and a lookup table gives us an O(1) solution:

(define (zerobits n)
  (vector-ref (vector 32 0 1 26 2 23 27 0 3 16 24 30 28 11 0 13
      4 7 17 0 25 22 31 15 29 10 12 6 0 21 14 9 5 20 8 19 18)
    (modulo (bitwise-and n (- n)) 37)))

Who thinks up things like this? You can run the program at https://ideone.com/8hx3c7.

Pages: 1 2

16 Responses to “Trailing Zero-Bits”

  1. Zack said

    Cool little drill. Here is my take on it using Julia 1.4.1: https://pastebin.com/GbYGLYKV

    First approach: create the bin string and work with that. Easier for figuring out what’s going on.

    Second approach: just solve the task and forget about binary representation. Faster but opaque.

  2. matthew said

    If you are just going to do fixed size integers, then the looping solution is constant time as well. For integers of any length, represented as bignums, I don’t think you can do better than some sort of loop over the the bits of the number.

    The array solution is quite cute though (37 presumably is the lowest modulus m such that 2^i mod m is different for i in 0..31).

  3. Milbrae said

    Python. Simple bit-testing.

    def trail(n):
    “”” “””
    z = 0
    while (n & 1) == 0:
    z += 1
    n >>= 1
    return z

    n = [18, 48]
    for k in n:
    print (“%d = %d” % (k, trail(k)))
    [/code]

    Sample output:
    18 = 1
    48 = 4[/code]

  4. Milbrae said

    Again with proper indentation

    def trail(n):
        """ """
        z = 0
        while (n & 1) == 0:
            z += 1
            n >>= 1
        return z
    
    n = [18, 48]
    for k in n:
        print ("%d = %d" % (k, trail(k)))
    
    18 = 1
    48 = 4
  5. kernelbob said

    I’ve been playing with Compiler Explorer lately. So I wrote three implementations in C and explored them there.

    Link.

    Then I wrapped a test harness around them to see how they performed. On my machine, tz2 and tz3 are about twice as fast as tz, wth a slight edge to tz3. You can see from Compiler Explorer that the compiler unrolled the loops in tz1 and tz3.

    #include <assert.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <sys/time.h>
    
    extern unsigned tz(unsigned);
    extern unsigned tz2(unsigned);
    extern unsigned tz3(unsigned);
    
    unsigned stupid_tz(unsigned num)
    {
        for (unsigned i = 0; i < 32; i++)
            if (num & (1LL << i))
                return i;
        return 32;
    }
    
    void test(unsigned (*tzf)(unsigned), const char *label)
    {
        printf("testing %s\n", label);
        // printf("%s(0) => %u\n", label, (*tzf)(0));
        assert((*tzf)(0) == 32);
        assert((*tzf)(1) == 0);
        assert((*tzf)(18) == 1);
        assert((*tzf)(48) == 4);
        unsigned num = 0;
        for (int i = 0; i < 10000000; i++) {
            unsigned expected = stupid_tz(num);
            unsigned actual = (*tzf)(num);
            if (actual != expected) {
                fprintf(stderr,
                        "%#x: expected %u, got %u\n",
                        num, expected, actual);
                abort();
            }
            num += 1537;
        }
    }
    
    double dt(struct timeval *before, struct timeval *after)
    {
        double dsec = after->tv_sec - before->tv_sec;
        double dusec = after->tv_usec - before->tv_usec;
        return dsec + dusec / 1000000.0;
    }
    
    void perf(unsigned (*tzf)(unsigned), const char *label)
    {
        (void)tzf;
        (void)label;
        struct timeval before, after;
        unsigned nums[] = {0, 1, 0x10, 0x100, 0x1000, 0x10000,
                           0x100000, 0x1000000, 0x10000000, 0x80000000};
        size_t n_nums = (&nums)[1] - nums;
    
        printf("perf %s\n", label);
        for (size_t i = 0; i < n_nums; i++) {
            unsigned num = nums[i];
            gettimeofday(&before, NULL);
            for (int i = 0; i < 1000000; i++)
                (void)(*tzf)(num);
            gettimeofday(&after, NULL);
            printf("%s(%#x): %g nsec\n", label, num, 1000 * dt(&before, &after));
        }
        printf("\n");
    }
    
    int main()
    {
        setbuf(stdout, NULL);
        test(stupid_tz, "stupid_tz");
        test(tz, "tz");
        test(tz2, "tz2");
        test(tz3, "tz3");
        perf(tz, "tz");
        perf(tz2, "tz2");
        perf(tz3, "tz3");
        return 0;
    }
    
    $ cc -std=c99 -Wall -Wextra -Werror -O3 -DNDEBUG -I../.. -MMD   -c -o t.o t.c
    $ g++ -std=c++11 -Wall -Wextra -Werror -O3 -DNDEBUG -I../.. -MMD    t.o tz.o   -o a.out
    $ a.out
    testing stupid_tz
    testing tz
    testing tz2
    testing tz3
    perf tz
    tz(0): 8.252 nsec
    tz(0x1): 1.909 nsec
    tz(0x10): 1.988 nsec
    tz(0x100): 2.705 nsec
    tz(0x1000): 3.776 nsec
    tz(0x10000): 4.712 nsec
    tz(0x100000): 5.628 nsec
    tz(0x1000000): 6.368 nsec
    tz(0x10000000): 7.711 nsec
    tz(0x80000000): 7.943 nsec
    
    perf tz2
    tz2(0): 1.827 nsec
    tz2(0x1): 2.957 nsec
    tz2(0x10): 3.016 nsec
    tz2(0x100): 3.062 nsec
    tz2(0x1000): 2.986 nsec
    tz2(0x10000): 2.94 nsec
    tz2(0x100000): 3.009 nsec
    tz2(0x1000000): 3.091 nsec
    tz2(0x10000000): 3.07 nsec
    tz2(0x80000000): 2.939 nsec
    
    perf tz3
    tz3(0): 1.296 nsec
    tz3(0x1): 2.771 nsec
    tz3(0x10): 2.896 nsec
    tz3(0x100): 2.909 nsec
    tz3(0x1000): 2.703 nsec
    tz3(0x10000): 2.711 nsec
    tz3(0x100000): 2.709 nsec
    tz3(0x1000000): 2.703 nsec
    tz3(0x10000000): 2.81 nsec
    tz3(0x80000000): 2.724 nsec
    
    $ 
    
  6. matthew said

    @kernelbob: how does this compare?

    unsigned tz4(unsigned num) {
       return __builtin_ctz(num);
    }
    
  7. matthew said

    Or another nice one:

    uint32_t tz5(uint32_t n) {
      n = (n&-n)-1;
      n = ((n >> 1)&0x55555555) + (n&0x55555555);
      n = ((n >> 2)&0x33333333) + (n&0x33333333);
      n = ((n >> 4)&0x0f0f0f0f) + (n&0x0f0f0f0f);
      n = ((n >> 8)&0x00ff00ff) + (n&0x00ff00ff);
      n = ((n >> 16) + n) & 0xffff;
      return n;
    }
    
  8. kernelbob said

    The rabbit hole always goes deeper…

    After I posted, I found and read Our Gracious Host’s table lookup method, and then matthew asked about the __builtin_ctz function. __builtin_ctz is undefined for input zero, so I wrote two versions.

    tz4 calls __builtin_ctz, and can’t be used for zero. (It generates a run-time error with clang’s “-fsanitize=undefined” flag.)
    tz4z checks for zero explicitly, then calls __builtin_ctz on nonzero inputs.
    tz5 uses table lookup mod 37.

    Compiler Explorer.

    Test and microbenchmark run. __builtin_ctz, with or without zero handling is fastest; table lookup is faster than any of my solutions.

    $ cc -std=c99 -Wall -Wextra -Werror -O3 t.c tz.c
    $ a.out
    testing tz4
    testing tz4z
    testing tz5
    perf tz4
    tz4(0x1): 1.271 nsec
    tz4(0x10): 1.269 nsec
    tz4(0x100): 1.27 nsec
    tz4(0x1000): 1.268 nsec
    tz4(0x10000): 1.284 nsec
    tz4(0x100000): 1.288 nsec
    tz4(0x1000000): 1.253 nsec
    tz4(0x10000000): 1.289 nsec
    tz4(0x80000000): 1.297 nsec
    
    perf tz4z
    tz4z(0): 1.269 nsec
    tz4z(0x1): 1.014 nsec
    tz4z(0x10): 1.003 nsec
    tz4z(0x100): 1.003 nsec
    tz4z(0x1000): 1.002 nsec
    tz4z(0x10000): 1.002 nsec
    tz4z(0x100000): 1.002 nsec
    tz4z(0x1000000): 1.003 nsec
    tz4z(0x10000000): 1.004 nsec
    tz4z(0x80000000): 1.002 nsec
    
    perf tz5
    tz5(0): 1.808 nsec
    tz5(0x1): 1.8 nsec
    tz5(0x10): 1.858 nsec
    tz5(0x100): 1.746 nsec
    tz5(0x1000): 1.747 nsec
    tz5(0x10000): 1.833 nsec
    tz5(0x100000): 1.733 nsec
    tz5(0x1000000): 1.811 nsec
    tz5(0x10000000): 1.812 nsec
    tz5(0x80000000): 1.789 nsec
    
    $ 
    
  9. Xero said

    A python solution:

    def trailing_zero_bits(n):
        return bin(n)[::-1].find("1")
    
  10. Gambiteer said
    (define (first-bit-set x) (- (log (+ (bitwise-xor x (- x 1)) 1) 2) 1))
    

    Doesn’t work with zero.

  11. Globules said

    Here’s another Haskell version having a few variants of the function. The first three work on arbitrary sized numbers (because popCount does).

    import Data.Bits
    
    ntz0, ntz1, ntz2 :: (Bits a, Integral a) => a -> Int
    
    -- From Warren's "Hacker's Delight".  We take advantage of popCount not
    -- requiring a finite size type.
    ntz0 x = popCount (complement x .&. (x - 1))
    ntz1 x = popCount (complement (x .|. negate x))
    ntz2 x = popCount ((x .&. negate x) - 1)
    
    -- We can use a library function for finite size types.
    ntz3 :: FiniteBits a => a -> Int
    ntz3 = countTrailingZeros
    
    main :: IO ()
    main = do
      mapM_ (print . ($ (2^123 :: Integer))) [ntz0, ntz1, ntz2]
      
      -- How many trailing zeros does 0 have in an arbitrarily large integer type?
      -- These functions return -1 in that case.
      mapM_ (print . ($ (0 :: Integer))) [ntz0, ntz1, ntz2]
    
      print $ ntz3 (123 * 2^5 :: Int)
      print $ ntz3 (0 :: Int)
    
    $ ./ntz
    123
    123
    123
    -1
    -1
    -1
    5
    64
    
  12. Daniel said

    Here’s a binary search solution in C.

    #include <assert.h>
    #include <stdint.h>
    #include <stdio.h>
    #include <stdlib.h>
    
    int ntz(uint64_t x) {
      assert(x > 0);
      int n = 0;
      if (!(x & 0x00000000ffffffff)) {n = 32; x >>= 32;}
      if (!(x & 0x000000000000ffff)) {n += 16; x >>= 16;}
      if (!(x & 0x00000000000000ff)) {n += 8; x >>= 8;}
      if (!(x & 0x000000000000000f)) {n += 4; x >>= 4;}
      if (!(x & 0x0000000000000003)) {n += 2; x >>= 2;}
      if (!(x & 0x0000000000000001)) {n += 1;}
      return n;
    }
    
    int main(int argc, char* argv[]) {
      assert(argc == 2);
      uint64_t x = strtoull(argv[1], NULL, 10);
      printf("%d\n", ntz(x));
      return EXIT_SUCCESS;
    }
    

    Example:

    $ ./a.out 18
    1
    
    $ ./a.out 48
    4
    
  13. Steve said
    
    function binary(n)
       { str = "";
         pwr = int(log(n)/log(2));
         for (i = pwr; i > -1; i--) {
                 flg = ((i == pwr)?1:0);
                 str = flg str;
                 if (flg == 1) {
                         n = n - (2^pwr);
                         pwr = int(log(n)/log(2))
                 }
         };
         return str
    }
    
    {
       print $0 " --> " index(binary($0),1)-1
    }
    
    

    $ echo 18 | awk -f /tmp/binary.awk
    18 –> 1

    $ echo 48 | awk -f /tmp/binary.awk
    48 –> 4

  14. Antonio Leonti said

    Simple solution in C. In this the number of trailing zeros for 0 is the number of bits in an int.

    int solve(int n){
        int ret = 0;
        while(!(n % 2) && ret < 8 * sizeof(int)){ // If the last bit is 0
            n >>= 1; // shift bits to the right
            ret++; // and increase the return value by 1.
        }
        return ret;
    }
    
  15. Hakan said

    function checkTrailingZeros($n){
    // Take the number and convert to binary
    $binary = decbin($n);

    // Typecast the resulting number into a string
    $string = (string) $binary;
    
    // Determine length of the string
    $length = strlen($string);
    
    // Put the digits into an array
    for ($i=0; $i < $length; $i++) { 
        $array[] = $string[$i];
    }
    
    // Reverse the array
    $array = array_reverse($array);
    
    // Prepare a counter
    $counter = 0;
    
    // Loop through the array
    foreach ($array as $key => $digit) {
        // check whether the digit is zero
        if ($digit == 0) {
            // if yes, increment the counter by one
            $counter += 1;
        } else {
            break;
        }
    }
    
    // Present the number and the counter
    echo $n.". has got ".$counter." trailing zeros in binary (".$string.")<br>";
    

    }

  16. Hakan said

    Oops, sorry for the bad paste. Will do it the right way next time round

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: