patch-2.3.23 linux/arch/sh/lib/memcpy.S
Next file: linux/arch/sh/lib/memmove.S
Previous file: linux/arch/sh/lib/csum_partial_copy.c
Back to the patch index
Back to the overall index
- Lines: 347
- Date:
Mon Oct 18 11:16:13 1999
- Orig file:
v2.3.22/linux/arch/sh/lib/memcpy.S
- Orig date:
Tue Aug 31 17:29:13 1999
diff -u --recursive --new-file v2.3.22/linux/arch/sh/lib/memcpy.S linux/arch/sh/lib/memcpy.S
@@ -1,131 +1,227 @@
-! Taken from newlib-1.8.0
+/* $Id: memcpy.S,v 1.3 1999/09/28 11:32:48 gniibe Exp $
+ *
+ * "memcpy" implementation of SuperH
+ *
+ * Copyright (C) 1999 Niibe Yutaka
+ *
+ */
-!
-! Fast SH memcpy
-!
-! by Toshiyasu Morita (tm@netcom.com)
-! hacked by J"orn Rernnecke (amylaar@cygnus.co.uk) ("o for o-umlaut)
-!
-! Entry: r4: destination pointer
-! r5: source pointer
-! r6: byte count
-!
-! Exit: r0: destination pointer
-! r1-r7: trashed
-!
-! Notes: Usually one wants to do small reads and write a longword, but
-! unfortunately it is difficult in some cases to concatanate bytes
-! into a longword on the SH, so this does a longword read and small
-! writes.
-!
-! This implementation makes two assumptions about how it is called:
-!
-! 1.: If the byte count is nonzero, the address of the last byte to be
-! copied is unsigned greater than the address of the first byte to
-! be copied. This could be easily swapped for a signed comparison,
-! but the algorithm used needs some comparison.
-!
-! 2.: When there are two or three bytes in the last word of an 11-or-bore
-! bytes memory chunk to b copied, the rest of the word can be read
-! without size effects.
-! This could be easily changed by increasing the minumum size of
-! a fast memcpy and the amount subtracted from r7 before L_2l_loop be 2,
-! however, this would cost a few extra cyles on average.
-!
+/*
+ * void *memcpy(void *dst, const void *src, size_t n);
+ * No overlap between the memory of DST and of SRC are assumed.
+ */
#include <linux/linkage.h>
ENTRY(memcpy)
- ! Big endian version copies with decreasing addresses.
- mov r4,r0
- add r6,r0
- sub r4,r5
- mov #11,r1
- cmp/hs r1,r6
- bf/s L_small
+ tst r6,r6
+ bt/s 9f ! if n=0, do nothing
+ mov r4,r0
+ sub r4,r5 ! From here, r5 has the distance to r0
+ add r6,r0 ! From here, r0 points the end of copying point
+ mov #12,r1
+ cmp/gt r6,r1
+ bt/s 7f ! if it's too small, copy a byte at once
add #-1,r5
- mov r5,r3
- add r0,r3
- shlr r3
- bt/s L_even
- mov r4,r7
- mov.b @(r0,r5),r2
- add #-1,r3
- mov.b r2,@-r0
-L_even:
- tst #1,r0
- add #-1,r5
- bf/s L_odddst
- add #8,r7
- tst #2,r0
- bt L_al4dst
- add #-1,r3
- mov.w @(r0,r5),r1
- mov.w r1,@-r0
-L_al4dst:
- shlr r3
- bt L_al4both
- mov.w @(r0,r5),r1
- swap.w r1,r1
- add #4,r7
- add #-4,r5
- .align 2
-L_2l_loop:
- mov.l @(r0,r5),r2
- xtrct r2,r1
- mov.l r1,@-r0
- cmp/hs r7,r0
- mov.l @(r0,r5),r1
- xtrct r1,r2
- mov.l r2,@-r0
- bt L_2l_loop
- bra L_cleanup
- add #5,r5
+ add #1,r5
+ ! From here, r6 is free
+ !
+ ! r4 --> [ ... ] DST [ ... ] SRC
+ ! [ ... ] [ ... ]
+ ! : :
+ ! r0 --> [ ... ] r0+r5 --> [ ... ]
+ !
+ !
+ mov r5,r1
+ mov #3,r2
+ and r2,r1
+ shll2 r1
+ mov r0,r3 ! Save the value on R0 to R3
+ mova jmptable,r0
+ add r1,r0
+ mov.l @r0,r1
+ jmp @r1
+ mov r3,r0 ! and back to R0
+ .balign 4
+jmptable:
+ .long case0
+ .long case1
+ .long case2
+ .long case3
- nop ! avoid nop in executed code.
-L_al4both:
- add #-2,r5
- .align 2
-L_al4both_loop:
- mov.l @(r0,r5),r1
- cmp/hs r7,r0
- bt/s L_al4both_loop
+ ! copy a byte at once
+7: mov r4,r2
+ add #1,r2
+8:
+ cmp/hi r2,r0
+ mov.b @(r0,r5),r1
+ bt/s 8b ! while (r0>r2)
+ mov.b r1,@-r0
+9:
+ rts
+ nop
+
+case0:
+ !
+ ! GHIJ KLMN OPQR --> GHIJ KLMN OPQR
+ !
+ ! First, align to long word boundary
+ mov r0,r3
+ and r2,r3
+ tst r3,r3
+ bt/s 2f
+ add #-4,r5
+ add #3,r5
+1: dt r3
+ mov.b @(r0,r5),r1
+ bf/s 1b
+ mov.b r1,@-r0
+ !
+ add #-3,r5
+2: ! Second, copy a long word at once
+ mov r4,r2
+ add #7,r2
+3: mov.l @(r0,r5),r1
+ cmp/hi r2,r0
+ bt/s 3b
mov.l r1,@-r0
- bra L_cleanup
+ !
+ ! Third, copy a byte at once, if necessary
+ cmp/eq r4,r0
+ bt/s 9b
add #3,r5
+ bra 8b
+ add #-6,r2
- nop ! avoid nop in executed code.
-L_odddst:
- shlr r3
- bt L_al4src
- mov.w @(r0,r5),r1
- mov.b r1,@-r0
- shlr8 r1
- mov.b r1,@-r0
-L_al4src:
- add #-2,r5
- .align 2
-L_odd_loop:
- mov.l @(r0,r5),r2
- cmp/hs r7,r0
- mov.b r2,@-r0
- shlr8 r2
- mov.w r2,@-r0
- shlr16 r2
- mov.b r2,@-r0
- bt L_odd_loop
-
- add #3,r5
-L_cleanup:
-L_small:
+case1:
+ !
+ ! GHIJ KLMN OPQR --> ...G HIJK LMNO PQR.
+ !
+ ! First, align to long word boundary
+ mov r0,r3
+ and r2,r3
+ tst r3,r3
+ bt/s 2f
+ add #-1,r5
+1: dt r3
+ mov.b @(r0,r5),r1
+ bf/s 1b
+ mov.b r1,@-r0
+ !
+2: ! Second, read a long word and write a long word at once
+ mov.l @(r0,r5),r1
+ add #-4,r5
+ mov r4,r2
+ add #7,r2
+ !
+#ifdef __LITTLE_ENDIAN__
+3: mov r1,r3 ! RQPO
+ shll16 r3
+ shll8 r3 ! Oxxx
+ mov.l @(r0,r5),r1 ! NMLK
+ mov r1,r6
+ shlr8 r6 ! xNML
+ or r6,r3 ! ONML
+ cmp/hi r2,r0
+ bt/s 3b
+ mov.l r3,@-r0
+#else
+3: mov r1,r3 ! OPQR
+ shlr16 r3
+ shlr8 r3 ! xxxO
+ mov.l @(r0,r5),r1 ! KLMN
+ mov r1,r6
+ shll8 r6 ! LMNx
+ or r6,r3 ! LMNO
+ cmp/hi r2,r0
+ bt/s 3b
+ mov.l r3,@-r0
+#endif
+ !
+ ! Third, copy a byte at once, if necessary
cmp/eq r4,r0
- bt L_ready
- add #1,r4
- .align 2
-L_cleanup_loop:
- mov.b @(r0,r5),r2
+ bt/s 9b
+ add #4,r5
+ bra 8b
+ add #-6,r2
+
+case2:
+ !
+ ! GHIJ KLMN OPQR --> ..GH IJKL MNOP QR..
+ !
+ ! First, align to word boundary
+ tst #1,r0
+ bt/s 2f
+ add #-1,r5
+ mov.b @(r0,r5),r1
+ mov.b r1,@-r0
+ !
+2: ! Second, read a word and write a word at once
+ add #-1,r5
+ mov r4,r2
+ add #3,r2
+ !
+3: mov.w @(r0,r5),r1
+ cmp/hi r2,r0
+ bt/s 3b
+ mov.w r1,@-r0
+ !
+ ! Third, copy a byte at once, if necessary
cmp/eq r4,r0
- mov.b r2,@-r0
- bf L_cleanup_loop
-L_ready:
+ bt/s 9b
+ add #1,r5
+ mov.b @(r0,r5),r1
rts
- nop
+ mov.b r1,@-r0
+
+case3:
+ !
+ ! GHIJ KLMN OPQR --> .GHI JKLM NOPQ R...
+ !
+ ! First, align to long word boundary
+ mov r0,r3
+ and r2,r3
+ tst r3,r3
+ bt/s 2f
+ add #-1,r5
+1: dt r3
+ mov.b @(r0,r5),r1
+ bf/s 1b
+ mov.b r1,@-r0
+ !
+2: ! Second, read a long word and write a long word at once
+ add #-2,r5
+ mov.l @(r0,r5),r1
+ add #-4,r5
+ mov r4,r2
+ add #7,r2
+ !
+#ifdef __LITTLE_ENDIAN__
+3: mov r1,r3 ! RQPO
+ shll8 r3 ! QPOx
+ mov.l @(r0,r5),r1 ! NMLK
+ mov r1,r6
+ shlr16 r6
+ shlr8 r6 ! xxxN
+ or r6,r3 ! QPON
+ cmp/hi r2,r0
+ bt/s 3b
+ mov.l r3,@-r0
+#else
+3: mov r1,r3 ! OPQR
+ shlr8 r3 ! xOPQ
+ mov.l @(r0,r5),r1 ! KLMN
+ mov r1,r6
+ shll16 r6
+ shll8 r6 ! Nxxx
+ or r6,r3 ! NOPQ
+ cmp/hi r2,r0
+ bt/s 3b
+ mov.l r3,@-r0
+#endif
+ !
+ ! Third, copy a byte at once, if necessary
+ cmp/eq r4,r0
+ bt/s 9b
+ add #6,r5
+ bra 8b
+ add #-6,r2
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)