Atmel AVR Assembly Language


Speed

Not affected by compiler optimization





r0
r18-r25
r25-r27 (X)
r30-r31 (Z)
r1 (must be cleared before returning)

Arguments allocated left to right (r25 to r18)

Even register aligned
Argument
Registers
8-bit
r24
16-bit
r25:r24
32-bit
r25:r24:r23:r22
64-bit
r25:r24:r23:r22:r21:r20:r19:r18
Return
size
Data length
8-bit
8-bit
Registers
registers
r24
r24
16-bit
r25:r24
r25:r24
32-bit
r25:r24:r23:r22
r25:r24:r23:r22
64-bit
r25:r24:r23:r22:r21:r20:r19:r18
r25-r18
uint32_t subit(uint32_t ul, uint8_t b)
{
return(ul-b);
}
#include <avr/io.h>
.text
.global subit
subit:
sub r22,
sbc r23,
sbc r24,
sbc r25,
ret
.end
r20
r1
r1
r1
;
;
;
;
subtract b (r20) from ul (r25-r22)
.. NOTE: gcc makes sure r1 is always 0
..
..
#include <avr/io.h>
; defines the # of cpu cycles of overhead
; (includes the ldi r16,byte0; ldi r17,byte1; ldi r18, byte2,
; ldi r19, byte3, and the call _delay_cycles)
OVERHEAD = 24
; some register aliases
cycles0 = 22
cycles1 = 23
cycles2 = 24
cycles3 = 25
temp = 19
.text
.global delay_cycles
void delay_cycles(uint32_t cpucycles);
delay_cycles:
;
;
subtract the overhead
subi
cycles0,OVERHEAD
sbc
cycles1,r1
sbc
cycles2,r1
sbc
cycles3,r1
brcs
dcx
;
;
delay the lsb
mov
r30,cycles0
com
r30
andi
r30,7
clr
r31
subi
r30,lo8 (-(gs(jtable)))
sbci
r31,hi8 (-(gs(jtable)))
ijmp
jtable: nop
nop
nop
nop
nop
nop
nop
;
;
delay the remaining delay
loop:
subi
cycles0,8
sbc
cycles1,r1
sbc
cycles2,r1
sbc
cycles3,r1
brcs
dcx
nop
rjmp
loop
dcx:
ret
.end
;
;
;
;
;
subtract the overhead
..
..
..
return if req’d delay too short
;
;
;
;
;
;
;
Z = jtable offset to delay 0-7 cycles
..
..
..
add the table offset
..
vector into table for partial delay
;
;
;
;
;
;
;
decrement the count (8 cycles per loop)
..
..
..
exit if done
.. add delay to make 8 cycles per loop
..