summaryrefslogtreecommitdiff
path: root/rtl/arm/divide.inc
diff options
context:
space:
mode:
Diffstat (limited to 'rtl/arm/divide.inc')
-rw-r--r--rtl/arm/divide.inc191
1 files changed, 191 insertions, 0 deletions
diff --git a/rtl/arm/divide.inc b/rtl/arm/divide.inc
new file mode 100644
index 0000000000..14c72eb834
--- /dev/null
+++ b/rtl/arm/divide.inc
@@ -0,0 +1,191 @@
+{
+ This file is part of the Free Pascal run time library.
+ Copyright (c) 2014 by the Free Pascal development team.
+
+ Implementation of division helpers
+
+ See the file COPYING.FPC, included in this distribution,
+ for details about the copyright.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+
+{$if defined(CPUARM_HAS_UMULL) and defined(CPUARM_HAS_CLZ)}
+
+{ ARM division helpers using umull to do a 32-bit division based on
+ this paper: http://research.microsoft.com/pubs/70645/tr-2008-141.pdf
+
+ For future optimization and testing, this file is compilable outside
+ the system unit. }
+
+{$ifndef FPC_SYSTEM_HAS_DIV_DWORD}
+{$define FPC_SYSTEM_HAS_DIV_DWORD}
+
+function fpc_div_dword(n,z:dword):dword;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_DIV_DWORD'];{$endif}
+asm
+ // Handle division by zero
+ cmp r0, #0
+ beq .Lhandle_div_by_zero
+
+ stmfd r13!, {r4,lr}
+ // r1 = divisor
+ // r0 = dividend
+ // r2 = k, z
+ // r3 = ty, t, my
+ // ip = temp
+ // r4 = scratch
+
+ // unsigned k = clz(y);
+ clz r2, r0
+
+ // unsigned ty = lsr( lsl(y,k), W-9 ); // prescaling
+ // r3/ty will ALWAYS give a result between 256 and 511
+ mov r3, r0, lsl r2
+ mov r3, r3, lsr #23
+
+ // unsigned t = unrt[ ty - 256 ] + 256; // table lookup
+ adr r4, .LLeading9BitTable - 256
+ ldrb ip, [r4, r3]
+ rsb r3, r2, #31
+ and r3, r3, #255
+ add ip, ip, #256
+
+ // unsigned z = lsr( lsl(t,W-9), W-k-1 );
+ mov ip, ip, lsl #23
+ mov r2, ip, lsr r3
+
+ // unsigned my = 0-y;
+ rsb r3, r0, #0
+
+ // z = z + umulh(z,mul(my,z));
+ mul ip, r3, r2
+ umull r4, ip, r2, ip
+ add r2, r2, ip
+ // z = z + umulh(z,mul(my,z));
+ mul ip, r3, r2
+ umull r4, ip, r2, ip
+ add r2, r2, ip
+
+ // q estimate
+ // q = umulh(x,z);
+ // ip = q
+ umull r4, ip, r1, r2
+
+ // r = x - mul(y,q);
+ // r4 = r
+ mul r4, r0, ip
+ sub r4, r1, r4
+
+ // q refinement
+ // if (r >= y) { r = r - y; q = q + 1; }
+ cmp r0,r4
+ subls r4, r4, r0
+ addls ip, ip, #1
+
+ // if (r >= y) { r = r - y; q = q + 1; }
+ cmp r0,r4
+ subls r4, r4, r0
+ addls ip, ip, #1
+
+ mov r0, ip
+ mov r1, r4
+
+ ldmfd r13!, {r4,pc}
+.LLeading9BitTable:
+ .byte 254, 252, 250, 248, 246, 244, 242, 240, 238, 236, 234, 233, 231, 229, 227, 225
+ .byte 224, 222, 220, 218, 217, 215, 213, 212, 210, 208, 207, 205, 203, 202, 200, 199
+ .byte 197, 195, 194, 192, 191, 189, 188, 186, 185, 183, 182, 180, 179, 178, 176, 175
+ .byte 173, 172, 170, 169, 168, 166, 165, 164, 162, 161, 160, 158, 157, 156, 154, 153
+ .byte 152, 151, 149, 148, 147, 146, 144, 143, 142, 141, 139, 138, 137, 136, 135, 134
+ .byte 132, 131, 130, 129, 128, 127, 126, 125, 123, 122, 121, 120, 119, 118, 117, 116
+ .byte 115, 114, 113, 112, 111, 110, 109, 108, 107, 106, 105, 104, 103, 102, 101, 100
+ .byte 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 88, 87, 86, 85
+ .byte 84, 83, 82, 81, 80, 80, 79, 78, 77, 76, 75, 74, 74, 73, 72, 71
+ .byte 70, 70, 69, 68, 67, 66, 66, 65, 64, 63, 62, 62, 61, 60, 59, 59
+ .byte 58, 57, 56, 56, 55, 54, 53, 53, 52, 51, 50, 50, 49, 48, 48, 47
+ .byte 46, 46, 45, 44, 43, 43, 42, 41, 41, 40, 39, 39, 38, 37, 37, 36
+ .byte 35, 35, 34, 33, 33, 32, 32, 31, 30, 30, 29, 28, 28, 27, 27, 26
+ .byte 25, 25, 24, 24, 23, 22, 22, 21, 21, 20, 19, 19, 18, 18, 17, 17
+ .byte 16, 15, 15, 14, 14, 13, 13, 12, 12, 11, 10, 10, 9, 9, 8, 8
+ .byte 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0
+
+.Lhandle_div_by_zero:
+ mov r0, #200
+ mov r1, r11
+{$ifdef FPC_IS_SYSTEM}
+ b handleerrorframe
+{$endif}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_div_dword(n,z:dword):dword;external name 'FPC_DIV_DWORD';
+{$endif}
+{$endif}
+
+{$ifndef FPC_SYSTEM_HAS_DIV_LONGINT}
+{$define FPC_SYSTEM_HAS_DIV_LONGINT}
+function fpc_div_longint(n,z:longint):longint;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_DIV_LONGINT'];{$endif}
+
+asm
+ stmfd sp!, {r4,lr}
+ ands r4, r0, #1<<31 (* r12:=r0 and $80000000 *)
+ rsbmi r0, r0, #0 (* if signed(r0) then r0:=0-r0 *)
+ eors r4, r4, r1, ASR#32 (* r12:=r12 xor (r1 asr 32) *)
+ rsbcs r1, r1, #0 (* if signed(r12) then r1:=0-r1 *)
+ bl fpc_div_dword
+ movs r4, r4, LSL#1 (* carry:=sign(r12) *)
+ rsbcs r0, r0, #0
+ rsbmi r1, r1, #0
+ ldmfd sp!, {r4,pc}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_div_longint(n,z:longint):longint;external name 'FPC_DIV_LONGINT';
+{$endif}
+{$endif}
+
+{$ifndef FPC_SYSTEM_HAS_MOD_DWORD}
+{$define FPC_SYSTEM_HAS_MOD_DWORD}
+function fpc_mod_dword(n,z:dword):dword;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_MOD_DWORD'];{$endif}
+
+asm
+ stmfd sp!, {ip,lr}
+ bl fpc_div_dword
+ mov r0, r1
+ ldmfd sp!, {ip,pc}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_mod_dword(n,z:dword):dword;external name 'FPC_MOD_DWORD';
+{$endif}
+{$endif}
+
+{$ifndef FPC_SYSTEM_HAS_MOD_LONGINT}
+{$define FPC_SYSTEM_HAS_MOD_LONGINT}
+function fpc_mod_longint(n,z:longint):longint;assembler;nostackframe;
+{$ifdef FPC_IS_SYSTEM}[public,alias: 'FPC_MOD_LONGINT'];{$endif}
+
+asm
+ stmfd sp!, {ip,lr}
+ bl fpc_div_longint
+ mov r0, r1
+ ldmfd sp!, {ip,pc}
+end;
+
+{It is a compilerproc (systemh.inc), make an alias for internal use.}
+{$ifdef FPC_IS_SYSTEM}
+function fpc_mod_longint(n,z:longint):longint;external name 'FPC_MOD_LONGINT';
+{$endif}
+{$endif}
+
+{$endif}