summaryrefslogtreecommitdiff
path: root/newlib/libc/machine/hppa/strcpy.S
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/machine/hppa/strcpy.S')
-rw-r--r--newlib/libc/machine/hppa/strcpy.S285
1 files changed, 285 insertions, 0 deletions
diff --git a/newlib/libc/machine/hppa/strcpy.S b/newlib/libc/machine/hppa/strcpy.S
new file mode 100644
index 00000000000..3068cd5e1fa
--- /dev/null
+++ b/newlib/libc/machine/hppa/strcpy.S
@@ -0,0 +1,285 @@
+/*
+ * (c) Copyright 1986 HEWLETT-PACKARD COMPANY
+ *
+ * To anyone who acknowledges that this file is provided "AS IS"
+ * without any express or implied warranty:
+ * permission to use, copy, modify, and distribute this file
+ * for any purpose is hereby granted without fee, provided that
+ * the above copyright notice and this notice appears in all
+ * copies, and that the name of Hewlett-Packard Company not be
+ * used in advertising or publicity pertaining to distribution
+ * of the software without specific, written prior permission.
+ * Hewlett-Packard Company makes no representations about the
+ * suitability of this software for any purpose.
+ */
+
+/*
+ A faster strcpy.
+
+ by
+
+ Jerry Huck (aligned case)
+ Daryl Odnert (equal-alignment case)
+ Edgar Circenis (non-aligned case)
+*/
+/*
+ * strcpy(s1, s2)
+ *
+ * Copy string s2 to s1. s1 must be large enough.
+ * return s1
+ */
+
+#include "DEFS.h"
+
+#define d_addr r26
+#define s_addr r25
+#define tmp6 r24
+#define tmp1 r19
+#define evenside r19
+#define tmp2 r20
+#define oddside r20
+#define tmp3 r21
+#define tmp4 r22
+#define tmp5 arg3
+#define save r1
+
+
+ENTRY(strcpy)
+/* Do some quick alignment checking on and fast path both word aligned */
+ extru,<> s_addr,31,2,tmp6 /*Is source word aligned? */
+ ldwm 4(0,s_addr),oddside /*Assume yes and guess that it
+ is double-word aligned. */
+ dep,= d_addr,29,2,tmp6 /*Is target word aligned? */
+ b case_analysis
+ copy d_addr,ret0
+/* Both are aligned. First source word already loaded assuming that
+ source was oddword aligned. Fall through (therefore fastest) code
+ shuffles the registers to join the main loop */
+bothaligned:
+ bb,>= s_addr,29,twoatatime /*Branch if source was odd aligned*/
+ uxor,nbz oddside,r0,save
+
+/* Even aligned source. save holds that operand.
+ Do one iteration of the main copy loop juggling the registers to avoid
+ one copy. */
+ b,n nullfound
+ ldwm 4(s_addr),oddside
+ stwm save,4(d_addr)
+ uxor,nbz oddside,r0,save
+ b,n nullfound
+ ldwm 4(s_addr),evenside
+ stwm oddside,4(d_addr)
+ uxor,nbz evenside,r0,save
+ b,n nullfound
+ ldwm 4(s_addr),oddside
+
+/* Main loop body. Entry expects evenside still to be stored, oddside
+ just loaded. */
+loop:
+ stwm evenside,4(d_addr)
+ uxor,nbz oddside,r0,save
+
+/* mid loop entry */
+twoatatime:
+ b,n nullfound
+ ldwm 4(s_addr),evenside
+ stwm oddside,4(d_addr)
+ uxor,sbz evenside,r0,save
+ b loop
+ ldwm 4(s_addr),oddside
+
+/* fall through when null found in evenside. oddside actually loaded */
+nullfound: /* adjust d_addr and store final word */
+
+ extru,<> save,7,8,r0 /* pick up leftmost byte */
+ addib,tr,n 1,d_addr,store_final
+ extru,<> save,15,8,r0
+ addib,tr,n 2,d_addr,store_final
+ extru,<> save,23,8,r0
+ addib,tr 3,d_addr,store_final2
+ bv 0(rp)
+ stw save,0(d_addr)
+
+store_final:
+ bv 0(rp)
+store_final2:
+ stbys,e save,0(d_addr) /* delay slot */
+
+case_analysis:
+
+ blr tmp6,r0
+ nop
+
+ /* NOTE: the delay slots for the non-aligned cases load a */
+ /* shift quantity which is TGT-SRC into tmp3. */
+ /* Note also, the case for both strings being word aligned */
+ /* is already checked before the BLR is executed, so that */
+ /* case can never occur. */
+
+ /* TGT SRC */
+ nop /* 00 00 can't happen */
+ nop
+ b neg_aligned_copy /* 00 01 */
+ ldi -1,tmp3 /* load shift quantity. delay slot */
+ b neg_aligned_copy /* 00 10 */
+ ldi -2,tmp3 /* load shift quantity. delay slot */
+ b neg_aligned_copy /* 00 11 */
+ ldi -3,tmp3 /* load shift quantity. delay slot */
+ b pos_aligned_copy0 /* 01 00 */
+ ldi 1,tmp3 /* load shift quantity. delay slot */
+ b equal_alignment_1 /* 01 01 */
+ ldbs,ma 1(s_addr),tmp1
+ b neg_aligned_copy /* 01 10 */
+ ldi -1,tmp3 /* load shift quantity. delay slot */
+ b neg_aligned_copy /* 01 11 */
+ ldi -2,tmp3 /* load shift quantity. delay slot */
+ b pos_aligned_copy0 /* 10 00 */
+ ldi 2,tmp3 /* load shift quantity. delay slot */
+ b pos_aligned_copy /* 10 01 */
+ ldi 1,tmp3 /* load shift quantity. delay slot */
+ b equal_alignment_2 /* 10 10 */
+ ldhs,ma 2(s_addr),tmp1
+ b neg_aligned_copy /* 10 11 */
+ ldi -1,tmp3 /* load shift quantity. delay slot */
+ b pos_aligned_copy0 /* 11 00 */
+ ldi 3,tmp3 /* load shift quantity. delay slot */
+ b pos_aligned_copy /* 11 01 */
+ ldi 2,tmp3 /* load shift quantity. delay slot */
+ b pos_aligned_copy /* 11 10 */
+ ldi 1,tmp3 /* load shift quantity. delay slot */
+ ldbs,ma 1(s_addr),tmp1 /* 11 11 */
+ comiclr,<> r0,tmp1,r0
+ bv 0(rp) /* return if 1st byte was null */
+ stbs,ma tmp1,1(d_addr) /* store a byte to dst string */
+ b bothaligned /* can now goto word_aligned */
+ ldwm 4(s_addr),oddside /* load next word of source */
+
+equal_alignment_1:
+ comiclr,<> r0,tmp1,r0 /* nullify next if tmp1 <> 0 */
+ bv 0(rp) /* return if null byte found */
+ stbs,ma tmp1,1(d_addr) /* store a byte to dst string */
+ ldhs,ma 2(s_addr),tmp1 /* load next halfword */
+equal_alignment_2:
+ extru,<> tmp1,23,8,tmp6 /* look at left byte of halfword */
+ bv 0(rp) /* return if 1st byte was null */
+ stbs,ma tmp6,1(d_addr)
+ extru,<> tmp1,31,8,r0
+ bv 0(rp) /* return if 2nd byte was null */
+ stbs,ma tmp1,1(d_addr)
+ b bothaligned
+ ldwm 4(s_addr),oddside /* load next word */
+
+/* source and destination are not aligned, so we do it the hard way. */
+
+/* target alignment is greater than source alignment */
+pos_aligned_copy0:
+ addi -4,s_addr,s_addr
+pos_aligned_copy:
+ extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */
+ extru s_addr,31,2,tmp1 /* Extract low 2 bits of the src addr */
+ dep r0,31,2,s_addr /* Compute word address of the source. */
+ sh3add tmp3,r0,tmp4 /* compute shift amt */
+ ldwm 4(0,s_addr),tmp2 /* get 1st source word */
+ sh3add tmp1,r0,save /* setup mask shift amount */
+ mtctl save,r11 /* set-up cr11 for mask */
+ zvdepi -2,32,save /* create mask */
+ or save,tmp2,tmp2 /* mask unused bytes in src */
+ ldi -1,tmp1 /* load tmp1 with 0xffffffff */
+ mtctl tmp4,r11 /* shift count -> shift count reg */
+ vshd tmp1,tmp2,tmp3 /* position data ! */
+ uxor,nbz tmp3,r0,save
+ b,n first_null
+ uxor,nbz tmp2,r0,save
+ b nullfound1
+ mtctl tmp4,r11 /* re-load shift cnt (delay slot) */
+ b loop_entry
+ ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */
+
+neg_aligned_copy:
+ extru d_addr,31,2,tmp6 /* Extract low 2 bits of the dest addr */
+ extru s_addr,31,2,tmp2 /* Extract low 2 bits of the src addr */
+ dep r0,31,2,s_addr /* Compute word address of the source. */
+ sh3add tmp3,r0,tmp4 /* compute shift amt */
+ ldwm 4(0,s_addr),tmp1 /* load first word from source. */
+/* check to see if next word can be read safely */
+ sh3add tmp2,r0,save
+ mtctl save,r11 /* shift count -> shift count reg */
+ zvdepi -2,32,save
+ or save, tmp1, tmp1
+ uxor,nbz tmp1,r0,save /* any nulls in first word? */
+ b first_null0
+ mtctl tmp4,r11
+ ldwm 4(0,s_addr),tmp2 /* load second word from source */
+ combt,= tmp6,r0,chunk1 /* don't mask if whole word valid */
+ vshd tmp1,tmp2,tmp3 /* position data ! */
+ sh3add tmp6,r0,save /* setup r1 */
+ mtctl save,r11 /* set-up cr11 for mask */
+ zvdepi -2,32,save
+ or save, tmp3, tmp3
+ uxor,nbz tmp3,r0,save
+ b,n first_null
+ uxor,nbz tmp2,r0,save
+ b nullfound1
+ mtctl tmp4,r11 /* re-load shift cnt (delay slot) */
+ b loop_entry
+ ldwm 4(0,s_addr),tmp1 /* get next word. delay slot */
+
+chunk1:
+ uxor,nbz tmp2,r0,save
+ b nullfound0
+ vshd tmp1,tmp2,tmp3
+did_mask:
+ ldwm 4(0,s_addr),tmp1 /* get next word ! */
+loop_entry:
+ stbys,b,m tmp3,4(0,d_addr) /* store ! */
+
+ uxor,nbz tmp1, r0, save
+ b nullfound2
+ vshd tmp2,tmp1,tmp3 /* position data ! */
+ ldwm 4(s_addr),tmp2
+ stwm tmp3,4(d_addr)
+ uxor,sbz tmp2,r0,save
+ b did_mask
+nullfound0:
+ vshd tmp1,tmp2,tmp3 /* delay slot */
+ uxor,nbz tmp3,r0,save
+ b,n nullfound
+nullfound1:
+ stbys,b,m tmp3,4(0,d_addr)
+ b nullfound
+ vshd tmp2,r0,save /* delay slot */
+
+nullfound2:
+ uxor,nbz tmp3,r0,save
+ b,n nullfound
+ stwm tmp3,4(d_addr)
+ b nullfound
+ /* notice that delay slot is in next routine */
+
+first_null0: /* null found in first word of non-aligned (wrt d_addr) */
+ vshd tmp1,r0,save /* delay slot */
+ combt,= tmp6,r0,check4
+ extru save,7,8,tmp4
+first_null:
+ addibt,= -1,tmp6,check3 /* check last 3 bytes of word */
+ extru save,15,8,tmp4
+ addibt,=,n -1,tmp6,check2 /* check last 2 bytes */
+ bv 0(rp) /* null in last byte--store and exit */
+ stbys,b save, 0(d_addr)
+
+check4:
+ combt,= tmp4,r0,done
+ stbs,ma tmp4,1(d_addr)
+ extru,<> save,15,8,tmp4
+check3:
+ combt,= tmp4,r0,done
+ stbs,ma tmp4,1(d_addr)
+check2:
+ extru,<> save,23,8,tmp4
+ bv 0(rp)
+ stbs,ma tmp4,1(d_addr)
+ bv 0(rp)
+ stbs r0,0(d_addr)
+
+done:
+EXIT(strcpy)