Speed Not affected by compiler optimization r0 r18-r25 r25-r27 (X) r30-r31 (Z) r1 (must be cleared before returning) Arguments allocated left to right (r25 to r18) Even register aligned Argument Registers 8-bit r24 16-bit r25:r24 32-bit r25:r24:r23:r22 64-bit r25:r24:r23:r22:r21:r20:r19:r18 Return size Data length 8-bit 8-bit Registers registers r24 r24 16-bit r25:r24 r25:r24 32-bit r25:r24:r23:r22 r25:r24:r23:r22 64-bit r25:r24:r23:r22:r21:r20:r19:r18 r25-r18 uint32_t subit(uint32_t ul, uint8_t b) { return(ul-b); } #include <avr/io.h> .text .global subit subit: sub r22, sbc r23, sbc r24, sbc r25, ret .end r20 r1 r1 r1 ; ; ; ; subtract b (r20) from ul (r25-r22) .. NOTE: gcc makes sure r1 is always 0 .. .. #include <avr/io.h> ; defines the # of cpu cycles of overhead ; (includes the ldi r16,byte0; ldi r17,byte1; ldi r18, byte2, ; ldi r19, byte3, and the call _delay_cycles) OVERHEAD = 24 ; some register aliases cycles0 = 22 cycles1 = 23 cycles2 = 24 cycles3 = 25 temp = 19 .text .global delay_cycles void delay_cycles(uint32_t cpucycles); delay_cycles: ; ; subtract the overhead subi cycles0,OVERHEAD sbc cycles1,r1 sbc cycles2,r1 sbc cycles3,r1 brcs dcx ; ; delay the lsb mov r30,cycles0 com r30 andi r30,7 clr r31 subi r30,lo8 (-(gs(jtable))) sbci r31,hi8 (-(gs(jtable))) ijmp jtable: nop nop nop nop nop nop nop ; ; delay the remaining delay loop: subi cycles0,8 sbc cycles1,r1 sbc cycles2,r1 sbc cycles3,r1 brcs dcx nop rjmp loop dcx: ret .end ; ; ; ; ; subtract the overhead .. .. .. return if req’d delay too short ; ; ; ; ; ; ; Z = jtable offset to delay 0-7 cycles .. .. .. add the table offset .. vector into table for partial delay ; ; ; ; ; ; ; decrement the count (8 cycles per loop) .. .. .. exit if done .. add delay to make 8 cycles per loop ..
© Copyright 2025 ExpyDoc