Comparison of algorithms for bitcount


  no optimizing Brian Kernighan by means of a table recursive
#Instructions 26 11 22 13
#Bytes 52 22 48 + 16 26
#Loops 8 4 2 7

no optimizing:

A mask in which just 1 bit is set will be "anded" with the argument. If the result is not equal to zero the counter will be incremented.
byte bitCountSimple(byte x) __attribute__((noinline));
byte bitCountSimple(uint8_t x) {
     4ee:	60 e0       	ldi	r22, 0x00	; 0
     4f0:	91 e0       	ldi	r25, 0x01	; 1
     4f2:	20 e0       	ldi	r18, 0x00	; 0
     4f4:	30 e0       	ldi	r19, 0x00	; 0
  byte c = 0;
  int8_t mask = 1;
  for (int i=0; i<8; i++) {
    if ((x & mask) != 0) c++;
     4f6:	e8 2f       	mov	r30, r24
     4f8:	f0 e0       	ldi	r31, 0x00	; 0
     4fa:	49 2f       	mov	r20, r25
     4fc:	55 27       	eor	r21, r21
     4fe:	47 fd       	sbrc	r20, 7
     500:	50 95       	com	r21
     502:	ca 01       	movw	r24, r20
     504:	8e 23       	and	r24, r30
     506:	9f 23       	and	r25, r31
     508:	89 2b       	or	r24, r25
     50a:	09 f0       	breq	.+2      	; 0x50e <_Z14bitCountSimpleh+0x20>
     50c:	6f 5f       	subi	r22, 0xFF	; 255
     50e:	2f 5f       	subi	r18, 0xFF	; 255
     510:	3f 4f       	sbci	r19, 0xFF	; 255
     512:	28 30       	cpi	r18, 0x08	; 8
     514:	31 05       	cpc	r19, r1
     516:	19 f0       	breq	.+6      	; 0x51e <_Z14bitCountSimpleh+0x30>
    mask = mask << 1;
     518:	94 2f       	mov	r25, r20
     51a:	99 0f       	add	r25, r25
     51c:	ee cf       	rjmp	.-36     	; 0x4fa <_Z14bitCountSimpleh+0xc>
  }
  return c;
}
     51e:	86 2f       	mov	r24, r22
     520:	08 95       	ret

Brian Kernighan, published 1988 (developed by Peter Wegner, 1960):

As long as the argument v is not equal to zero it will be "anded" with its predecessor (v-1). The number of loops is the result.
byte bitCountKernighan8(byte x) __attribute__((noinline));
byte bitCountKernighan8(uint8_t v) {       
     522:	98 2f       	mov	r25, r24
     524:	20 e0       	ldi	r18, 0x00	; 0
     526:	04 c0       	rjmp	.+8      	; 0x530 <_Z18bitCountKernighan8h+0xe>
  byte c;
  for (c = 0; v; c++) {
    v &= v-1;
     528:	89 2f       	mov	r24, r25
     52a:	81 50       	subi	r24, 0x01	; 1
     52c:	98 23       	and	r25, r24    
     52e:	2f 5f       	subi	r18, 0xFF	; 255
     530:	99 23       	and	r25, r25
     532:	d1 f7       	brne	.-12     	; 0x528 <_Z18bitCountKernighan8h+0x6>
  }
  return c;           
}
     534:	82 2f       	mov	r24, r18
     536:	08 95       	ret

Table:

The results are stored in a table. The table holds only data for values 0 to 15 (one nibble). So the function has to be called twice.
byte bitCount4(byte x) __attribute__((noinline));
byte bitCount4(byte x) {
     538:	e8 2f       	mov	r30, r24
     53a:	f0 e0       	ldi	r31, 0x00	; 0
     53c:	ee 0f       	add	r30, r30
     53e:	ff 1f       	adc	r31, r31
     540:	e6 5a       	subi	r30, 0xA6	; 166
     542:	fe 4f       	sbci	r31, 0xFE	; 254
  const int z[16] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4};
  return z[x];
}
     544:	80 81       	ld	r24, Z
     546:	08 95       	ret

00000548 <_Z9bitCount8h>:

byte bitCount8(uint8_t x) __attribute__((noinline));
byte bitCount8(uint8_t x) {
     548:	0f 93       	push	r16
     54a:	1f 93       	push	r17
     54c:	18 2f       	mov	r17, r24
  return bitCount4(x >> 4) + bitCount4(x % 16);
     54e:	82 95       	swap	r24
     550:	8f 70       	andi	r24, 0x0F	; 15
     552:	0e 94 9c 02 	call	0x538	; 0x538 <_Z9bitCount4h>
     556:	08 2f       	mov	r16, r24
     558:	81 2f       	mov	r24, r17
     55a:	8f 70       	andi	r24, 0x0F	; 15
     55c:	0e 94 9c 02 	call	0x538	; 0x538 <_Z9bitCount4h>
}
     560:	80 0f       	add	r24, r16
     562:	1f 91       	pop	r17
     564:	0f 91       	pop	r16
     566:	08 95       	ret

Recursive:

The leftmost bit is the sign bit. If it is set (x < 0), the result will be incremented. In this case the function will be called again with the double value of the argument. This will be done until all bits are shifted to the left.
byte bitCountRecursive(int8_t x) __attribute__((noinline)); 
byte bitCountRecursive(int8_t x) {
     568:	90 e0       	ldi	r25, 0x00	; 0
  if (x) {
     56a:	88 23       	and	r24, r24
     56c:	49 f0       	breq	.+18     	; 0x580 <_Z17bitCountRecursivea+0x18>
     56e:	28 2f       	mov	r18, r24
     570:	22 0f       	add	r18, r18
    if (x < 0) return bitCountRecursive(x+x)+1;
     572:	87 ff       	sbrs	r24, 7
     574:	03 c0       	rjmp	.+6      	; 0x57c <_Z17bitCountRecursivea+0x14>
     576:	82 2f       	mov	r24, r18
     578:	9f 5f       	subi	r25, 0xFF	; 255
     57a:	f7 cf       	rjmp	.-18     	; 0x56a <_Z17bitCountRecursivea+0x2>
    else return bitCountRecursive(x+x);
     57c:	82 2f       	mov	r24, r18
     57e:	f5 cf       	rjmp	.-22     	; 0x56a <_Z17bitCountRecursivea+0x2>
  }
  else return 0;
}
     580:	89 2f       	mov	r24, r25
     582:	08 95       	ret







contact: nji(at)gmx.de