; ; Ullrich von Bassewitz, 2003-08-20 ; Performance increase (about 20%) by ; Christian Krueger, 2009-09-13 ; ; void* __fastcall__ memcpy (void* dest, const void* src, size_t n); ; ; NOTE: This function contains entry points for memmove, which will ressort ; to memcpy for an upwards copy. Don't change this module without looking ; at memmove! ; .export _memcpy, memcpy_upwards, memcpy_getparams .import popax .importzp sp, ptr1, ptr2, ptr3 ; ---------------------------------------------------------------------- _memcpy: jsr memcpy_getparams memcpy_upwards: ; assert Y = 0 ldx ptr3+1 ; Get high byte of n beq L2 ; Jump if zero L1: .repeat 2 ; Unroll this a bit to make it faster... lda (ptr1),Y ; copy a byte sta (ptr2),Y iny .endrepeat bne L1 inc ptr1+1 inc ptr2+1 dex ; Next 256 byte block bne L1 ; Repeat if any ; the following section could be 10% faster if we were able to copy ; back to front - unfortunately we are forced to copy strict from ; low to high since this function is also used for ; memmove and blocks could be overlapping! ; { L2: ; assert Y = 0 ldx ptr3 ; Get the low byte of n beq done ; something to copy L3: lda (ptr1),Y ; copy a byte sta (ptr2),Y iny dex bne L3 ; } done: jmp popax ; Pop ptr and return as result ; ---------------------------------------------------------------------- ; Get the parameters from stack as follows: ; ; size --> ptr3 ; src --> ptr1 ; dest --> ptr2 ; First argument (dest) will remain on stack and is returned in a/x! memcpy_getparams: ; IMPORTANT! Function has to leave with Y=0! sta ptr3 stx ptr3+1 ; save n to ptr3 jsr popax sta ptr1 stx ptr1+1 ; save src to ptr1 ; save dest to ptr2 ldy #1 ; (direct stack access is three cycles faster ; (total cycle count with return)) lda (sp),y tax stx ptr2+1 ; save high byte of ptr2 dey ; Y = 0 lda (sp),y ; Get ptr2 low sta ptr2 rts