diff options
author | Dino Li <Dino.Li@ite.com.tw> | 2018-10-29 11:14:00 +0800 |
---|---|---|
committer | chrome-bot <chrome-bot@chromium.org> | 2018-10-31 00:54:24 -0700 |
commit | 8a617f05b98c0d5bac6bb373751df937a249bdb8 (patch) | |
tree | 8b08f367244bb08bc5e7627e9de4da6dfecffa3a | |
parent | 42199efde3735743a03846cc905aa4fb5fb1d470 (diff) | |
download | chrome-ec-8a617f05b98c0d5bac6bb373751df937a249bdb8.tar.gz |
nds32: Add 64-bit divide library routines for N8 CPU
Taken from NDS32 CPU's library routines.
Signed-off-by: Dino Li <dino.li@ite.com.tw>
BRANCH=none
BUG=b:115501243
TEST=Add a debug console command to see if 64-bit division
works as expected.
Change-Id: I3ba47a24a1bb60fd7fb57321b177e603a0e7712b
Reviewed-on: https://chromium-review.googlesource.com/1296430
Commit-Ready: Dino Li <Dino.Li@ite.com.tw>
Tested-by: Dino Li <Dino.Li@ite.com.tw>
Reviewed-by: Jonathan Brandmeyer <jbrandmeyer@chromium.org>
-rw-r--r-- | core/nds32/__divdi3.S | 372 | ||||
-rw-r--r-- | core/nds32/__udivdi3.S | 15 | ||||
-rw-r--r-- | core/nds32/build.mk | 1 |
3 files changed, 388 insertions, 0 deletions
diff --git a/core/nds32/__divdi3.S b/core/nds32/__divdi3.S new file mode 100644 index 0000000000..d86e8f6273 --- /dev/null +++ b/core/nds32/__divdi3.S @@ -0,0 +1,372 @@ +/* Copyright 2018 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + * + * __divdi3.S: signed 64 bit division + */ + +#define NREGS $r6 +#define DREGS $r8 +#define P1H $r1 +#define P1L $r0 +#define P2H $r3 +#define P2L $r2 +#define NUMHI $r7 +#define NUMLO $r6 +#define DENHI $r9 +#define DENLO $r8 +#define OFFSET_L 0 +#define OFFSET_H 4 +#define MHI P1H +#define MLO P1L +#define W2 $r3 +#define W1 $r5 +#define W0 $r4 +#define T2 P1L +#define NHI P1H +#define NLO P1L +#define D $r2 +#define DLO $r3 +#define DHI $r10 +#define Q NHI +#define QHI W0 +#define R NLO +#define RHI NHI +#define M T2 +#define M2 DLO + + .text + .align 2 + .globl umul_ppmm + .type umul_ppmm, @function + ! ===================================================================== + ! uint64_t umul_ppmm(uint32_t a, uint32_t b) + ! + ! This function multiplies `a' by `b' to obtain a 64-bit product. The + ! product is broken into two 32-bit pieces which are stored in the zl + ! (low-part at P1L) and zh (high-part at P1H). + ! ===================================================================== +umul_ppmm: + zeh P2L, $r0 ! al=a&0xffff + srli P2H, $r0, 16 ! ah=a>>16 + zeh P1L, $r1 ! bl=b&0xffff + srli P1H, $r1, 16 ! bh=b>>16 + mul W1, P2L, P1H ! zA=al*bh + mul P2L, P2L, P1L ! zl=al*bl + mul P1L, P2H, P1L ! zB=ah*bl + add W1, W1, P1L ! zA+=zB + slt $ta, W1, P1L ! zA<zB + slli $ta, $ta, 16 ! (zA<zB)<<16 + maddr32 $ta, P2H, P1H ! zh=ah*bh+((zA<zB)<<16) + srli P1H, W1, 16 ! zA>>16 + add P1H, P1H, $ta ! zh+=(zA>>16) + slli P1L, W1, 16 ! zA<<=16 + add P1L, P1L, P2L ! zl+=zA + slt $ta, P1L, P2L ! zl<zA + add P1H, P1H, $ta ! zh+=(zl<zA) + ret + .size umul_ppmm, .-umul_ppmm + + .text + .align 2 + .type fudiv_qrnnd, @function + ! ===================================================================== + ! uint64_t fudiv_qrnnd(uint64_t n, uint32_t d) + ! + ! This function divides 64-bit numerator n by 32-bit denominator d. The + ! 64-bit return value contains remainder (low-part at P1L) and quotient + ! (high-part at P1H). + ! This function uses a custom calling convention, + ! with register DHI ($r10) call-clobbered instead of callee-saved. + ! ===================================================================== +fudiv_qrnnd: + srli DHI, D, 16 ! d1 = ll_highpart (d) + zeh W1, NLO ! ll_lowpart (n0) + srli T2, NLO, 16 ! ll_highpart (n0) + divr QHI, RHI, NHI, DHI ! q1 = n1 / __d1, r1 = n1 % __d1 + zeh DLO, D ! d0 = ll_lowpart (d) + slli RHI, RHI, 16 ! r1 << 16 + or RHI, RHI, T2 ! __r1 = (__r1 << 16) | ll_highpart(n0) + mul M, QHI, DLO ! m = __q1*__d0 + slt $ta, RHI, M ! __r1 < __m + beqz $ta, .L2 ! if no, skip + addi QHI, QHI, -1 ! __q1-- + add RHI, RHI, D ! __r1 += d + slt $ta, RHI, D ! __r1 < d + bnez $ta, .L2 ! if yes, skip + slt $ta, RHI, M ! __r1 < __m + beqz $ta, .L2 ! if no, skip + addi QHI, QHI, -1 ! __q1-- + add RHI, RHI, D ! __r1 += d +.L2: + sub RHI, RHI, M ! __r1 -= __m + divr Q, T2, RHI, DHI ! __q0 = r1 / __d1, __r0 = r1 % __d1 + slli T2, T2, 16 ! __r0 << 16 + or R, T2, W1 ! __r0 = (__r0 << 16) | ll_lowpart(n0) + mul M2, DLO, Q ! __m = __q0 * __d0 + slt $ta, R, M2 ! __r0 < __m + beqz $ta, .L5 ! if no, skip + add R, R, D ! __r0 += d + addi Q, Q, -1 ! __q0-- + slt $ta, R, D ! __r0 < d + bnez $ta, .L5 ! if yes, skip + slt $ta, R, M2 ! __r0 < __m + beqz $ta, .L5 ! if no, skip + add R, R, D ! __r0 += d + addi Q, Q, -1 ! __q0-- + +.L5: + sub R, R, M2 ! r = r0 = __r0 - __m + slli QHI, QHI, 16 ! __q1 << 16 + or Q, Q, QHI ! q = (__q1 << 16) | __q0 + ret + .size fudiv_qrnnd, .-fudiv_qrnnd + + .align 2 + .globl __udivmoddi4 + .type __udivmoddi4, @function + ! ===================================================================== + ! uint64_t __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r) + ! + ! This function divides 64-bit numerator n by 64-bit denominator d. The + ! quotient is returned as 64-bit return value and the 64-bit remainder + ! is stored at the input address r. + ! stack allocation: + ! sp+40 +------------------+ + ! | q | + ! sp+32 +------------------+ + ! | bm | + ! sp+28 +------------------+ + ! | $lp | + ! sp+24 +------------------+ + ! | $fp | + ! sp+20 +------------------+ + ! | $r10 | + ! sp+16 +------------------+ + ! | $r6 - $r9 | + ! sp +------------------+ + ! ===================================================================== +__udivmoddi4: + addi $sp, $sp, -40 + smw.bi $r6, [$sp], $r10 , 10 + movd44 NREGS, $r0 ! (n1,n0) + movd44 DREGS, $r2 ! (d1,d0) + move $fp, $r4 ! rp + bnez P2H, .L9 ! if d1 != 0, skip + slt $ta, NUMHI, DENLO ! n1 < d0 + beqz $ta, .L10 ! if no, skip + move $r0, DENLO + bal __clzsi2 + swi $r0, [$sp+(28)] ! bm + beqz $r0, .LZskipnorm1 ! if bm == 0, skip + sll DENLO, DENLO, $r0 ! d0 <<= bm + subri W1, $r0, 32 ! 32 - bm + srl W1, NUMLO, W1 ! n0 >> (32 - bm) + sll NUMHI, NUMHI, $r0 ! n1 << bm + or NUMHI, NUMHI, W1 ! n1 = (n1 << bm) | (n0 >> (32 - bm)) + sll NUMLO, NUMLO, $r0 ! n0 <<= bm +.LZskipnorm1: + movd44 $r0, NREGS ! (n1,n0) + move $r2, DENLO ! d0 + bal fudiv_qrnnd ! calculate q0 n0 + swi P1H, [$sp+(32+OFFSET_L)]! q0 + move NUMLO, P1L ! n0 + move W1, 0 + swi W1, [$sp+(32+OFFSET_H)] ! q1 = 0 + b .L19 +.L10: + beqz P2L, .LZdivzero ! if d0 != 0, skip + move $r0, DENLO + bal __clzsi2 + swi $r0, [$sp+(28)] ! bm + bnez $r0, .LZnorm1 ! if bm != 0, skip + sub NUMHI, NUMHI, DENLO ! n1 -= d0 + movi W1, 1 + swi W1, [$sp+(32+OFFSET_H)] ! q1 = 1 + b .L29 + + ! to eliminate unaligned branch target + .align 2 +.LZnorm1: + subri $ta, $r0, 32 ! b = 32 - bm + sll DENLO, DENLO, $r0 ! d0 <<= bm + move $r2, DENLO + srl W0, NUMLO, $ta ! n0 >> b + sll W1, NUMHI, $r0 ! n1 << bm + sll NUMLO, NUMLO, $r0 ! n0 <<= bm + or P1L, W1, W0 ! n1 = (n1 << bm) | (n0 >> b) + srl P1H, NUMHI, $ta ! n2 = n1 >> b + bal fudiv_qrnnd ! caculate q1, n1 + swi P1H, [$sp+(32+OFFSET_H)]! q1 + move NUMHI, P1L ! n1 +.L29: + movd44 $r0, NREGS ! (n1,n0) + move $r2, DENLO ! d0 + bal fudiv_qrnnd ! calcuate q0, n0 + swi P1H, [$sp+(32+OFFSET_L)] + move NUMLO, P1L + + ! to eliminate unaligned branch target + .align 2 +.L19: + beqz $fp, .LZsetq ! if rp == 0, skip + lwi W2, [$sp+(28)] ! bm + movi NUMHI, 0 + srl NUMLO, NUMLO, W2 ! n0 >> bm + b .LZsetr + + ! to eliminate unaligned branch target + .align 2 +.LZdivzero: + ! divide-by-zero exception or quotient = 0 and remainder = 0 returned + divr NUMHI, NUMLO, DENLO, DENLO +.LZqzero: + movi P1H, 0 + movi P1L, 0 + beqz $fp, .LZret ! if rp == NULL, skip + swi NUMLO, [$fp+OFFSET_L] ! *rp + swi NUMHI, [$fp+OFFSET_H] + b .LZret +.L9: + slt $ta, NUMHI, DENHI ! n1 < d1 + bnez $ta, .LZqzero ! if yes, skip + move $r0, DENHI + bal __clzsi2 + swi $r0, [$sp+(28)] ! bm + beqz $r0, .LZskipnorm2 ! if bm == 0, skip + subri W0, $r0, 32 ! b = 32 - bm + srl W1, DENLO, W0 ! d0 >> b + sll $r2, DENHI, $r0 ! d1 << bm + or $r2, $r2, W1 ! d1 = (d0 >> b) | (d1 << bm) + move DENHI, $r2 + sll DENLO, DENLO, $r0 ! d0 <<= bm + srl W2, NUMLO, W0 ! n0 >> b + sll NUMLO, NUMLO, $r0 ! n0 <<= bm + sll P1L, NUMHI, $r0 ! n1 << bm + srl P1H, NUMHI, W0 ! n2 = n1 >> b + or P1L, P1L, W2 ! n1 = (n0 >> b) | (n1 << bm) + bal fudiv_qrnnd ! calculate q0, n1 + swi P1H, [$sp+(32+OFFSET_L)] + move NUMHI, P1L + move P1L, DENLO ! d0 + bal umul_ppmm + slt $ta, NUMHI, MHI ! n1 < m1 + bnez $ta, .L46 ! if yes, skip + bne MHI, NUMHI, .L45 ! if m1 != n1, skip + slt $ta, NUMLO, MLO ! n0 < m0 + beqz $ta, .L45 ! if no, skip +.L46: + lwi W2, [$sp+(32+OFFSET_L)] + sub MHI, MHI, DENHI ! m1 - d1 + addi W2, W2, -1 ! q0-- + swi W2, [$sp+(32+OFFSET_L)] + sub W2, MLO, DENLO ! __x = m0 - d0 + slt $ta, MLO, W2 ! m0 < __x + sub MHI, MHI, $ta ! m1 = m1 - d1 - (__x > m0) + move MLO, W2 ! m0 = __x +.L45: + movi W2, 0 + swi W2, [$sp+(32+OFFSET_H)] ! q1 = 0 + beqz $fp, .LZsetq ! if yes, skip + sub P1L, NUMLO, MLO ! __x = n0 - m0 + sub P1H, NUMHI, MHI ! n1 - m1 + slt $ta, NUMLO, P1L ! n0 < __x + sub P1H, P1H, $ta ! n1 = n1 - m1 - (__x > n0) + lwi W2, [$sp+(28)] ! bm + subri W0, W2, 32 ! b + sll NUMHI, P1H, W0 ! n1 << b + srl NUMLO, P1L, W2 ! n0 >> bm + or NUMLO, NUMLO, NUMHI ! (n1 << b) | (n0 >> bm) + srl NUMHI, P1H, W2 ! n1 >> bm +.LZsetr: + swi NUMLO, [$fp+OFFSET_L] ! remainder + swi NUMHI, [$fp+OFFSET_H] +.LZsetq: + lwi P1L, [$sp+(32+OFFSET_L)]! quotient + lwi P1H, [$sp+(32+OFFSET_H)] + + ! to eliminate unaligned branch target + .align 2 +.LZret: + lmw.bi $r6, [$sp], $r10 , 10 + addi $sp, $sp, 40 + ret + +.LZskipnorm2: + move W2, 0 + slt $ta, DENHI, NUMHI ! n1 > d1 + bnez $ta, .L52 ! if yes, skip + slt $ta, NUMLO, DENLO ! n0 < d0 + bnez $ta, .L51 ! if yes, skip +.L52: + move W1, 1 + swi W1, [$sp+(32+OFFSET_L)] ! q0 = 1 + sub W0, NUMLO, DENLO ! __x = n0 - d0 + sub NUMHI, NUMHI, DENHI ! n1 - d1 + slt $ta, NUMLO, W0 ! n0 < __x + sub NUMHI, NUMHI, $ta ! n1 = n1 -d1 - (_-x > n0) + move NUMLO, W0 ! n0 = __x + b .L54 +.L51: + swi W2, [$sp+(32+OFFSET_L)] ! q0 = 0 +.L54: + swi W2, [$sp+(32+OFFSET_H)] ! q1 = 0 + bnez $fp, .LZsetr + b .LZsetq + .size __udivmoddi4, .-__udivmoddi4 + + .text + .align 2 + .globl __divdi3 + .type __divdi3, @function +__divdi3: + ! ===================================================================== + ! uint64_t __divdi3(uint64_t n, uint64-t d) + ! + ! This function divides n by d and returns the quotient. + ! + ! stack allocation: + ! sp+8 +-----------------------+ + ! | $lp | + ! sp+4 +-----------------------+ + ! | $r6 | + ! sp +-----------------------+ + ! ===================================================================== + smw.adm $r6, [$sp], $r6, 2 + + xor $r6, P1H, P2H + srai45 $r6, 31 ! signof(numerator xor denominator) + ! abs(denominator) + bgez P2H, .L80 + neg P2H, P2H + beqz P2L, .L80 + neg P2L, P2L + addi P2H, P2H, -1 + +.L80: + ! abs(numerator) + bgez P1H, .L81 + neg P1H, P1H + beqz P1L, .L81 + neg P1L, P1L + addi P1H, P1H, -1 + +.L81: + ! abs(numerator) / abs(denominator) + movi $r4, 0 ! ignore remainder + bal __udivmoddi4 + ! numerator / denominator + beqz $r6, .L82 + or $r4, P1H, P1L + beqz $r4, .L82 + neg P1H, P1H + beqz P1L, .L82 + neg P1L, P1L + addi P1H, P1H, -1 + + ! to eliminate unaligned branch target + .align 2 +.L82: + lmw.bim $r6, [$sp], $r6, 2 + ret + .size __divdi3, .-__divdi3 diff --git a/core/nds32/__udivdi3.S b/core/nds32/__udivdi3.S new file mode 100644 index 0000000000..4cb3b058fe --- /dev/null +++ b/core/nds32/__udivdi3.S @@ -0,0 +1,15 @@ +/* Copyright 2018 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + * + * __udivdi3.S: unsigned 64 bit division + */ + + .text + .align 2 + .globl __udivdi3 + .type __udivdi3, @function +__udivdi3: + movi $r4, 0 ! ignore remainder + b __udivmoddi4 + .size __udivdi3, .-__udivdi3 diff --git a/core/nds32/build.mk b/core/nds32/build.mk index b613b87bc7..199e7bcd27 100644 --- a/core/nds32/build.mk +++ b/core/nds32/build.mk @@ -22,4 +22,5 @@ LDFLAGS_EXTRA+=-flto endif core-y=cpu.o init.o panic.o task.o switch.o __muldi3.o math.o __builtin.o +core-y+=__divdi3.o __udivdi3.o core-$(CONFIG_FPU)+=__libsoftfpu.o |