Pulse Generator


The Timer Overflow Interrupt is very useful, but as pointed out many times, at low prescale values that means high frequencies it is not very precise.
So, what can be done in order to get it more precise?
Well, you can get rid of the prolog and epilog produced by the compiler just by defining the ISR as "naked".

  "dressed" "undressed"
the source,
what you type
ISR(TIMER2_OVF_vect) {   
  TCNT2 = tcnt2;
  digitalWrite(pin,HIGH);
  digitalWrite(pin,LOW);
};
ISR(TIMER2_OVF_vect, ISR_NAKED) {   
  asm("PUSH r24");       // the only register modified in here
  asm("LDS r24, (tcnt2) "); 
  asm("STS %0, r24" : "=m" (TCNT2) );   // TCNT2 = 0xb2
  asm("SBI %0, %1 \n\t "
      "CBI %0, %1"
      : : "I" (_SFR_IO_ADDR(PORTB)), "I" (PORTB3) );
  asm("POP r24");        
  asm("RETI");           // because we are naked    
};
what the compiler generates
00000138 <__vector_9>:
 138:	1f 92       	push	r1
 13a:	0f 92       	push	r0
 13c:	0f b6       	in	r0, 0x3f	; 63
 13e:	0f 92       	push	r0
 140:	11 24       	eor	r1, r1
 142:	2f 93       	push	r18
 144:	3f 93       	push	r19
 146:	4f 93       	push	r20
 148:	5f 93       	push	r21
 14a:	6f 93       	push	r22
 14c:	7f 93       	push	r23
 14e:	8f 93       	push	r24
 150:	9f 93       	push	r25
 152:	af 93       	push	r26
 154:	bf 93       	push	r27
 156:	ef 93       	push	r30
 158:	ff 93       	push	r31
 15a:	80 91 0c 01 	lds	r24, 0x010C
 15e:	80 93 b2 00 	sts	0x00B2, r24
 162:	8b e0       	ldi	r24, 0x0B	; 11
 164:	61 e0       	ldi	r22, 0x01	; 1
 166:	0e 94 2b 02 	call	0x456	; 0x456 
 16a:	8b e0       	ldi	r24, 0x0B	; 11
 16c:	60 e0       	ldi	r22, 0x00	; 0
 16e:	0e 94 2b 02 	call	0x456	; 0x456 
 172:	ff 91       	pop	r31
 174:	ef 91       	pop	r30
 176:	bf 91       	pop	r27
 178:	af 91       	pop	r26
 17a:	9f 91       	pop	r25
 17c:	8f 91       	pop	r24
 17e:	7f 91       	pop	r23
 180:	6f 91       	pop	r22
 182:	5f 91       	pop	r21
 184:	4f 91       	pop	r20
 186:	3f 91       	pop	r19
 188:	2f 91       	pop	r18
 18a:	0f 90       	pop	r0
 18c:	0f be       	out	0x3f, r0	; 63
 18e:	0f 90       	pop	r0
 190:	1f 90       	pop	r1
 192:	18 95       	reti
0000011a <__vector_9>:
 11a:	8f 93       	push	r24
 11c:	80 91 0c 01 	lds	r24, 0x010C
 120:	80 93 b2 00 	sts	0x00B2, r24
 124:	2b 9a       	sbi	0x05, 3	; 5
 126:	2b 98       	cbi	0x05, 3	; 5
 128:	8f 91       	pop	r24
 12a:	18 95       	reti
 
Note: the called subroutine 
"0x456 < digitalWrite>" 
is not listed here! 
 

When you compare both colums you might understand why there are a lot of freaks out there who hate Arduino.
(see this to find out how to get the compiled code)

Actually, it is extremely difficult to access the C variables in inline assembler statements. But as you can see it can be done.
As we managed to use only one register and did not touch any of the flags there is not much to save and restore.

A complete demo program showing the effect of optimization at different prescalers:

#define FILENAME "PULSEGEN"

const byte pin = 11;      // on the ATmega328P Port B, Bit 3
byte tcnt2 = 131;   
byte index = 1;
const int prescaler[] = {0, 1, 8, 32, 64, 128, 256, 1024};
long t = millis() + 10000;

void setup() {
  Serial.begin(9600);
  Serial.println(FILENAME);
  pinMode(pin, OUTPUT);
  setupTimer2(index);
}

void loop() {
  // the loop will go through all possible prescalers
  if (millis() < t) return;
  index++;
  if (index > 7) index = 1;
  setupTimer2(index);
  t = millis() + 10000;
}

void setupTimer2(byte index) {
  TCCR2B = 0x00;        // Disable Timer2 while we set it up
  TCNT2  = tcnt2;       // Reset Timer Count 
  TIFR2  = 0x00;        // Timer2 INT Flag Reg: Clear Timer Overflow Flag
  TIMSK2 = 0x01;        // Timer2 INT Reg: Timer2 Overflow Interrupt Enable
  TCCR2A = 0x00;        // Timer2 Control Reg A: Wave Gen Mode normal
  TCCR2B = index;       // Timer2 Control Reg B: Timer Prescaler set 
  Serial.print(index);
  Serial.print("  ");
  Serial.println(F_CPU / prescaler[index] / (256 - tcnt2)  );
}

ISR(TIMER2_OVF_vect, ISR_NAKED) {   
  asm("PUSH r24");       // the only register modified in here
/* --------------------- no need to save any flags --------------
  asm("IN   r24,__SREG__");  // 2
  asm("PUSH r24");     // save SREG  --------------------------*/
  // restart the timer TCNT2 = tcnt2:
  asm("LDS r24, (tcnt2) ");  
  asm("STS %0, r24" : "=m" (TCNT2) );   // TCNT2 = 0xb2
  // send a short pulse:
  asm("SBI %0, %1 \n\t "
      "NOP \n\t NOP \n\t NOP \n\t NOP \n\t NOP \n\t NOP \n\t "
      "CBI %0, %1"
      : : "I" (_SFR_IO_ADDR(PORTB)), "I" (PORTB3) );
/* pulse duration without NOPs:  0.125 micro seconds = 2/16 micro seconds at F_CPU = 16 MHz
   pulse duration with 6 NOPs:   0.500 micro seconds = 8/16 micro seconds at F_CPU = 16 MHz */  
  /* --------- no need to restore the flags ---------------
  asm("POP r24");       
  asm("OUT __SREG__,r24"); // restore SREG ---------------*/
  asm("POP r24");        
  asm("RETI"); // because we are naked    
};


Photo of the oscilloscope diagram.

The time base was set to 2 μs per division.
When taking the picture the prescaler value was 1, so the frequency should have been 128,000 Hz but in reality it was 111,262 Hz with the 6 NOPs.
Mind you: if you do not use the "naked" option you only get 108,333 Hz.
If you remove the NOPs you get 114,190 Hz but the pulses a so short, that on the scope you would not see much.
You might be surprised to see the second pulse twice (center of the picture). The reason behind that is we used a standard oscilloscope and a long exposure time with the camera.
As other interrupts occured occasionally the pause between two consecutive pulses was extended and the second (and all the following) pulses appear at different distances.

With prescaler 2 the CPU clock is divided by 8, so you expect a frequency of 16,000 Hz but you actually get only 15,843 Hz.
The lower frequencies (prescaler 3 to 7) give good results.

Final remark 1

If you increase the tnct2 value the frequency gets higher but the results get worse.
If you add any commands to the ISR check which registers are affected and have to be saved and restored.
Also check if your commands modify any flags. If so, you have to uncomment the SREG instructions.
This will add a few instructions to the ISR an again you will lose some precision.

Final remark 2

If you want to modify the ISR to give a duty cycle of 0.5 (a square-wave) you might use this instruction:
  asm("sbi 0x03,3");
or
  PINB = 8;
(whatever you like best) instead of the sbi/cbi instruction above
(0x03 is the memory-mapped address of PINB. If you select another pin, check for the appropriate port and bit number. If you use another port you have to choose the correct address of its PIN port.)
From the Atmel manual (doc8161.pdf, chapter 13.1):
writing a logic one to a bit in the PINx Register, will result in a toggle in the corresponding bit in the Data Register.


contact: nji(at)gmx.de