# Comparison of algorithms for bitcount

no optimizing Brian Kernighan by means of a table recursive
#Instructions 26 11 22 13
#Bytes 52 22 48 + 16 26
#Loops 8 4 2 7

### no optimizing:

A mask in which just 1 bit is set will be "anded" with the argument. If the result is not equal to zero the counter will be incremented.
 ```byte bitCountSimple(byte x) __attribute__((noinline)); byte bitCountSimple(uint8_t x) { 4ee: 60 e0 ldi r22, 0x00 ; 0 4f0: 91 e0 ldi r25, 0x01 ; 1 4f2: 20 e0 ldi r18, 0x00 ; 0 4f4: 30 e0 ldi r19, 0x00 ; 0 byte c = 0; int8_t mask = 1; for (int i=0; i<8; i++) { if ((x & mask) != 0) c++; 4f6: e8 2f mov r30, r24 4f8: f0 e0 ldi r31, 0x00 ; 0 4fa: 49 2f mov r20, r25 4fc: 55 27 eor r21, r21 4fe: 47 fd sbrc r20, 7 500: 50 95 com r21 502: ca 01 movw r24, r20 504: 8e 23 and r24, r30 506: 9f 23 and r25, r31 508: 89 2b or r24, r25 50a: 09 f0 breq .+2 ; 0x50e <_Z14bitCountSimpleh+0x20> 50c: 6f 5f subi r22, 0xFF ; 255 50e: 2f 5f subi r18, 0xFF ; 255 510: 3f 4f sbci r19, 0xFF ; 255 512: 28 30 cpi r18, 0x08 ; 8 514: 31 05 cpc r19, r1 516: 19 f0 breq .+6 ; 0x51e <_Z14bitCountSimpleh+0x30> mask = mask << 1; 518: 94 2f mov r25, r20 51a: 99 0f add r25, r25 51c: ee cf rjmp .-36 ; 0x4fa <_Z14bitCountSimpleh+0xc> } return c; } 51e: 86 2f mov r24, r22 520: 08 95 ret ```

### Brian Kernighan, published 1988 (developed by Peter Wegner, 1960):

As long as the argument v is not equal to zero it will be "anded" with its predecessor (v-1). The number of loops is the result.
 ```byte bitCountKernighan8(byte x) __attribute__((noinline)); byte bitCountKernighan8(uint8_t v) { 522: 98 2f mov r25, r24 524: 20 e0 ldi r18, 0x00 ; 0 526: 04 c0 rjmp .+8 ; 0x530 <_Z18bitCountKernighan8h+0xe> byte c; for (c = 0; v; c++) { v &= v-1; 528: 89 2f mov r24, r25 52a: 81 50 subi r24, 0x01 ; 1 52c: 98 23 and r25, r24 52e: 2f 5f subi r18, 0xFF ; 255 530: 99 23 and r25, r25 532: d1 f7 brne .-12 ; 0x528 <_Z18bitCountKernighan8h+0x6> } return c; } 534: 82 2f mov r24, r18 536: 08 95 ret ```

### Table:

The results are stored in a table. The table holds only data for values 0 to 15 (one nibble). So the function has to be called twice.
 ```byte bitCount4(byte x) __attribute__((noinline)); byte bitCount4(byte x) { 538: e8 2f mov r30, r24 53a: f0 e0 ldi r31, 0x00 ; 0 53c: ee 0f add r30, r30 53e: ff 1f adc r31, r31 540: e6 5a subi r30, 0xA6 ; 166 542: fe 4f sbci r31, 0xFE ; 254 const int z[16] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4}; return z[x]; } 544: 80 81 ld r24, Z 546: 08 95 ret 00000548 <_Z9bitCount8h>: byte bitCount8(uint8_t x) __attribute__((noinline)); byte bitCount8(uint8_t x) { 548: 0f 93 push r16 54a: 1f 93 push r17 54c: 18 2f mov r17, r24 return bitCount4(x >> 4) + bitCount4(x % 16); 54e: 82 95 swap r24 550: 8f 70 andi r24, 0x0F ; 15 552: 0e 94 9c 02 call 0x538 ; 0x538 <_Z9bitCount4h> 556: 08 2f mov r16, r24 558: 81 2f mov r24, r17 55a: 8f 70 andi r24, 0x0F ; 15 55c: 0e 94 9c 02 call 0x538 ; 0x538 <_Z9bitCount4h> } 560: 80 0f add r24, r16 562: 1f 91 pop r17 564: 0f 91 pop r16 566: 08 95 ret ```

### Recursive:

The leftmost bit is the sign bit. If it is set (x < 0), the result will be incremented. In this case the function will be called again with the double value of the argument. This will be done until all bits are shifted to the left.
 ```byte bitCountRecursive(int8_t x) __attribute__((noinline)); byte bitCountRecursive(int8_t x) { 568: 90 e0 ldi r25, 0x00 ; 0 if (x) { 56a: 88 23 and r24, r24 56c: 49 f0 breq .+18 ; 0x580 <_Z17bitCountRecursivea+0x18> 56e: 28 2f mov r18, r24 570: 22 0f add r18, r18 if (x < 0) return bitCountRecursive(x+x)+1; 572: 87 ff sbrs r24, 7 574: 03 c0 rjmp .+6 ; 0x57c <_Z17bitCountRecursivea+0x14> 576: 82 2f mov r24, r18 578: 9f 5f subi r25, 0xFF ; 255 57a: f7 cf rjmp .-18 ; 0x56a <_Z17bitCountRecursivea+0x2> else return bitCountRecursive(x+x); 57c: 82 2f mov r24, r18 57e: f5 cf rjmp .-22 ; 0x56a <_Z17bitCountRecursivea+0x2> } else return 0; } 580: 89 2f mov r24, r25 582: 08 95 ret ```

contact: nji(at)gmx.de