Trailing Zero-Bits

July 7, 2020

The easiest solution is to loop through the trailing bits:

(define (zerobits n)
  (let loop ((n n) (b 0))
    (if (odd? n) b (loop (/ n 2) (+ b 1)))))

That takes time O(log n) to iterate through the bits of the number. A little trick using some bit-hackery and a lookup table gives us an O(1) solution:

(define (zerobits n)
  (vector-ref (vector 32 0 1 26 2 23 27 0 3 16 24 30 28 11 0 13
      4 7 17 0 25 22 31 15 29 10 12 6 0 21 14 9 5 20 8 19 18)
    (modulo (bitwise-and n (- n)) 37)))

Who thinks up things like this? You can run the program at https://ideone.com/8hx3c7.

Posted by programmingpraxis

Filed in Exercises

16 Comments »

16 Responses to “Trailing Zero-Bits”

Zack said
July 7, 2020 at 9:34 AM
Cool little drill. Here is my take on it using Julia 1.4.1: https://pastebin.com/GbYGLYKV

First approach: create the bin string and work with that. Easier for figuring out what’s going on.

Second approach: just solve the task and forget about binary representation. Faster but opaque.
matthew said
July 7, 2020 at 2:29 PM
If you are just going to do fixed size integers, then the looping solution is constant time as well. For integers of any length, represented as bignums, I don’t think you can do better than some sort of loop over the the bits of the number.

The array solution is quite cute though (37 presumably is the lowest modulus m such that 2^i mod m is different for i in 0..31).
Milbrae said
July 7, 2020 at 2:40 PM
Python. Simple bit-testing.

def trail(n):
“”” “””
z = 0
while (n & 1) == 0:
z += 1
n >>= 1
return z

n = [18, 48]
for k in n:
print (“%d = %d” % (k, trail(k)))
[/code]

Sample output:
18 = 1
48 = 4[/code]

Milbrae said

July 7, 2020 at 2:42 PM

Again with proper indentation

def trail(n):
    """ """
    z = 0
    while (n & 1) == 0:
        z += 1
        n >>= 1
    return z

n = [18, 48]
for k in n:
    print ("%d = %d" % (k, trail(k)))

18 = 1
48 = 4

kernelbob said

July 7, 2020 at 4:08 PM

I’ve been playing with Compiler Explorer lately. So I wrote three implementations in C and explored them there.

Link.

Then I wrapped a test harness around them to see how they performed. On my machine, tz2 and tz3 are about twice as fast as tz, wth a slight edge to tz3. You can see from Compiler Explorer that the compiler unrolled the loops in tz1 and tz3.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

extern unsigned tz(unsigned);
extern unsigned tz2(unsigned);
extern unsigned tz3(unsigned);

unsigned stupid_tz(unsigned num)
{
    for (unsigned i = 0; i < 32; i++)
        if (num & (1LL << i))
            return i;
    return 32;
}

void test(unsigned (*tzf)(unsigned), const char *label)
{
    printf("testing %s\n", label);
    // printf("%s(0) => %u\n", label, (*tzf)(0));
    assert((*tzf)(0) == 32);
    assert((*tzf)(1) == 0);
    assert((*tzf)(18) == 1);
    assert((*tzf)(48) == 4);
    unsigned num = 0;
    for (int i = 0; i < 10000000; i++) {
        unsigned expected = stupid_tz(num);
        unsigned actual = (*tzf)(num);
        if (actual != expected) {
            fprintf(stderr,
                    "%#x: expected %u, got %u\n",
                    num, expected, actual);
            abort();
        }
        num += 1537;
    }
}

double dt(struct timeval *before, struct timeval *after)
{
    double dsec = after->tv_sec - before->tv_sec;
    double dusec = after->tv_usec - before->tv_usec;
    return dsec + dusec / 1000000.0;
}

void perf(unsigned (*tzf)(unsigned), const char *label)
{
    (void)tzf;
    (void)label;
    struct timeval before, after;
    unsigned nums[] = {0, 1, 0x10, 0x100, 0x1000, 0x10000,
                       0x100000, 0x1000000, 0x10000000, 0x80000000};
    size_t n_nums = (&nums)[1] - nums;

    printf("perf %s\n", label);
    for (size_t i = 0; i < n_nums; i++) {
        unsigned num = nums[i];
        gettimeofday(&before, NULL);
        for (int i = 0; i < 1000000; i++)
            (void)(*tzf)(num);
        gettimeofday(&after, NULL);
        printf("%s(%#x): %g nsec\n", label, num, 1000 * dt(&before, &after));
    }
    printf("\n");
}

int main()
{
    setbuf(stdout, NULL);
    test(stupid_tz, "stupid_tz");
    test(tz, "tz");
    test(tz2, "tz2");
    test(tz3, "tz3");
    perf(tz, "tz");
    perf(tz2, "tz2");
    perf(tz3, "tz3");
    return 0;
}

$ cc -std=c99 -Wall -Wextra -Werror -O3 -DNDEBUG -I../.. -MMD   -c -o t.o t.c
$ g++ -std=c++11 -Wall -Wextra -Werror -O3 -DNDEBUG -I../.. -MMD    t.o tz.o   -o a.out
$ a.out
testing stupid_tz
testing tz
testing tz2
testing tz3
perf tz
tz(0): 8.252 nsec
tz(0x1): 1.909 nsec
tz(0x10): 1.988 nsec
tz(0x100): 2.705 nsec
tz(0x1000): 3.776 nsec
tz(0x10000): 4.712 nsec
tz(0x100000): 5.628 nsec
tz(0x1000000): 6.368 nsec
tz(0x10000000): 7.711 nsec
tz(0x80000000): 7.943 nsec

perf tz2
tz2(0): 1.827 nsec
tz2(0x1): 2.957 nsec
tz2(0x10): 3.016 nsec
tz2(0x100): 3.062 nsec
tz2(0x1000): 2.986 nsec
tz2(0x10000): 2.94 nsec
tz2(0x100000): 3.009 nsec
tz2(0x1000000): 3.091 nsec
tz2(0x10000000): 3.07 nsec
tz2(0x80000000): 2.939 nsec

perf tz3
tz3(0): 1.296 nsec
tz3(0x1): 2.771 nsec
tz3(0x10): 2.896 nsec
tz3(0x100): 2.909 nsec
tz3(0x1000): 2.703 nsec
tz3(0x10000): 2.711 nsec
tz3(0x100000): 2.709 nsec
tz3(0x1000000): 2.703 nsec
tz3(0x10000000): 2.81 nsec
tz3(0x80000000): 2.724 nsec

$

matthew said
July 7, 2020 at 4:19 PM
@kernelbob: how does this compare?
```
unsigned tz4(unsigned num) {
   return __builtin_ctz(num);
}
```

matthew said

July 7, 2020 at 4:36 PM

Or another nice one:

uint32_t tz5(uint32_t n) {
  n = (n&-n)-1;
  n = ((n >> 1)&0x55555555) + (n&0x55555555);
  n = ((n >> 2)&0x33333333) + (n&0x33333333);
  n = ((n >> 4)&0x0f0f0f0f) + (n&0x0f0f0f0f);
  n = ((n >> 8)&0x00ff00ff) + (n&0x00ff00ff);
  n = ((n >> 16) + n) & 0xffff;
  return n;
}

kernelbob said

July 7, 2020 at 5:33 PM

The rabbit hole always goes deeper…

After I posted, I found and read Our Gracious Host’s table lookup method, and then matthew asked about the __builtin_ctz function. __builtin_ctz is undefined for input zero, so I wrote two versions.

tz4 calls __builtin_ctz, and can’t be used for zero. (It generates a run-time error with clang’s “-fsanitize=undefined” flag.)
tz4z checks for zero explicitly, then calls __builtin_ctz on nonzero inputs.
tz5 uses table lookup mod 37.

Compiler Explorer.

Test and microbenchmark run. __builtin_ctz, with or without zero handling is fastest; table lookup is faster than any of my solutions.

$ cc -std=c99 -Wall -Wextra -Werror -O3 t.c tz.c
$ a.out
testing tz4
testing tz4z
testing tz5
perf tz4
tz4(0x1): 1.271 nsec
tz4(0x10): 1.269 nsec
tz4(0x100): 1.27 nsec
tz4(0x1000): 1.268 nsec
tz4(0x10000): 1.284 nsec
tz4(0x100000): 1.288 nsec
tz4(0x1000000): 1.253 nsec
tz4(0x10000000): 1.289 nsec
tz4(0x80000000): 1.297 nsec

perf tz4z
tz4z(0): 1.269 nsec
tz4z(0x1): 1.014 nsec
tz4z(0x10): 1.003 nsec
tz4z(0x100): 1.003 nsec
tz4z(0x1000): 1.002 nsec
tz4z(0x10000): 1.002 nsec
tz4z(0x100000): 1.002 nsec
tz4z(0x1000000): 1.003 nsec
tz4z(0x10000000): 1.004 nsec
tz4z(0x80000000): 1.002 nsec

perf tz5
tz5(0): 1.808 nsec
tz5(0x1): 1.8 nsec
tz5(0x10): 1.858 nsec
tz5(0x100): 1.746 nsec
tz5(0x1000): 1.747 nsec
tz5(0x10000): 1.833 nsec
tz5(0x100000): 1.733 nsec
tz5(0x1000000): 1.811 nsec
tz5(0x10000000): 1.812 nsec
tz5(0x80000000): 1.789 nsec

$

Xero said

July 9, 2020 at 1:20 AM

A python solution:

def trailing_zero_bits(n):
    return bin(n)[::-1].find("1")

Gambiteer said

July 10, 2020 at 11:22 PM

(define (first-bit-set x) (- (log (+ (bitwise-xor x (- x 1)) 1) 2) 1))

Doesn’t work with zero.

Globules said

July 11, 2020 at 2:54 AM

Here’s another Haskell version having a few variants of the function. The first three work on arbitrary sized numbers (because popCount does).

import Data.Bits

ntz0, ntz1, ntz2 :: (Bits a, Integral a) => a -> Int

-- From Warren's "Hacker's Delight".  We take advantage of popCount not
-- requiring a finite size type.
ntz0 x = popCount (complement x .&. (x - 1))
ntz1 x = popCount (complement (x .|. negate x))
ntz2 x = popCount ((x .&. negate x) - 1)

-- We can use a library function for finite size types.
ntz3 :: FiniteBits a => a -> Int
ntz3 = countTrailingZeros

main :: IO ()
main = do
  mapM_ (print . ($ (2^123 :: Integer))) [ntz0, ntz1, ntz2]
  
  -- How many trailing zeros does 0 have in an arbitrarily large integer type?
  -- These functions return -1 in that case.
  mapM_ (print . ($ (0 :: Integer))) [ntz0, ntz1, ntz2]

  print $ ntz3 (123 * 2^5 :: Int)
  print $ ntz3 (0 :: Int)

$ ./ntz
123
123
123
-1
-1
-1
5
64

Daniel said

July 14, 2020 at 10:32 PM

Here’s a binary search solution in C.

#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

int ntz(uint64_t x) {
  assert(x > 0);
  int n = 0;
  if (!(x & 0x00000000ffffffff)) {n = 32; x >>= 32;}
  if (!(x & 0x000000000000ffff)) {n += 16; x >>= 16;}
  if (!(x & 0x00000000000000ff)) {n += 8; x >>= 8;}
  if (!(x & 0x000000000000000f)) {n += 4; x >>= 4;}
  if (!(x & 0x0000000000000003)) {n += 2; x >>= 2;}
  if (!(x & 0x0000000000000001)) {n += 1;}
  return n;
}

int main(int argc, char* argv[]) {
  assert(argc == 2);
  uint64_t x = strtoull(argv[1], NULL, 10);
  printf("%d\n", ntz(x));
  return EXIT_SUCCESS;
}

Example:

$ ./a.out 18
1

$ ./a.out 48
4

Steve said

July 18, 2020 at 12:37 AM


function binary(n)
   { str = "";
     pwr = int(log(n)/log(2));
     for (i = pwr; i > -1; i--) {
             flg = ((i == pwr)?1:0);
             str = flg str;
             if (flg == 1) {
                     n = n - (2^pwr);
                     pwr = int(log(n)/log(2))
             }
     };
     return str
}

{
   print $0 " --> " index(binary($0),1)-1
}

$ echo 18 | awk -f /tmp/binary.awk
18 –> 1

$ echo 48 | awk -f /tmp/binary.awk
48 –> 4

Antonio Leonti said

July 24, 2020 at 5:23 PM

Simple solution in C. In this the number of trailing zeros for 0 is the number of bits in an int.

int solve(int n){
    int ret = 0;
    while(!(n % 2) && ret < 8 * sizeof(int)){ // If the last bit is 0
        n >>= 1; // shift bits to the right
        ret++; // and increase the return value by 1.
    }
    return ret;
}

Hakan said

August 9, 2020 at 3:33 PM

function checkTrailingZeros($n){
// Take the number and convert to binary
$binary = decbin($n);

// Typecast the resulting number into a string
$string = (string) $binary;

// Determine length of the string
$length = strlen($string);

// Put the digits into an array
for ($i=0; $i < $length; $i++) { 
    $array[] = $string[$i];
}

// Reverse the array
$array = array_reverse($array);

// Prepare a counter
$counter = 0;

// Loop through the array
foreach ($array as $key => $digit) {
    // check whether the digit is zero
    if ($digit == 0) {
        // if yes, increment the counter by one
        $counter += 1;
    } else {
        break;
    }
}

// Present the number and the counter
echo $n.". has got ".$counter." trailing zeros in binary (".$string.")<br>";

}

Hakan said
August 9, 2020 at 5:13 PM
Oops, sorry for the bad paste. Will do it the right way next time round

Programming Praxis