libgcc/config/epiphany/udivsi3-float.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

/* Unsigned 32 bit division optimized for Epiphany.
   Copyright (C) 2009-2013 Free Software Foundation, Inc.
   Contributed by Embecosm on behalf of Adapteva, Inc.

This file is part of GCC.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.

You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
<http://www.gnu.org/licenses/>.  */

#include "epiphany-asm.h"

	FSTAB (__udivsi3,T_UINT)
	.global SYM(__udivsi3)
	.balign 4
	HIDDEN_FUNC(__udivsi3)
SYM(__udivsi3):
	sub TMP0,r0,r1
	bltu .Lret0
	float TMP2,r0
	  mov TMP1,%low(0xb0800000) ; ??? this would be faster with small data
	float TMP3,r1
	  movt TMP1,%high(0xb0800000)
	asr TMP0,r0,8
	sub TMP0,TMP0,TMP1
	movt TMP1,%high(0x00810000)
	movgteu TMP2,TMP0
	bblt .Lret1
	sub TMP2,TMP2,TMP1
	sub TMP2,TMP2,TMP3
	mov TMP3,0
	movltu TMP2,TMP3
	lsr TMP2,TMP2,23
	lsl r1,r1,TMP2
	mov TMP0,1
	lsl TMP0,TMP0,TMP2
	sub r0,r0,r1
	bltu .Ladd_back
	add TMP3,TMP3,TMP0
	sub r0,r0,r1
	bltu .Ladd_back
.Lsub_loop:; More than two iterations are rare, so it makes sense to leave
           ; this label here to reduce average branch penalties.
	add TMP3,TMP3,TMP0
	sub r0,r0,r1
	bgteu .Lsub_loop
.Ladd_back:
	add r0,r0,r1
	sub TMP1,r1,1
	mov r1,%low(.L0step)
	movt r1,%high(.L0step)
	lsl TMP2,TMP2,3
	sub r1,r1,TMP2
	jr r1
	.rep 30
	lsl r0,r0,1
	sub.l r1,r0,TMP1
	movgteu r0,r1
	.endr
.L0step:sub r1,TMP0,1 ; mask result bits from steps ...
	and r0,r0,r1
	orr r0,r0,TMP3 ; ... and combine with first bits.
	rts
.Lret0:	mov r0,0
	rts
.Lret1:	mov r0,1
	rts
	ENDFUNC(__udivsi3)