/* Runtime ABI for the ARM Cortex-M * uldivmod.S: unsigned 64 bit division * * Copyright (c) 2012 Jörg Mische * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include "software_panic.h" .syntax unified .text .code 16 @ {unsigned long long quotient, unsigned long long remainder} @ __aeabi_uldivmod(unsigned long long numerator, unsigned long long denominator) @ @ Divide r1:r0 by r3:r2 and return the quotient in r1:r0 and the remainder @ in r3:r2 (all unsigned) @ .thumb_func .section .text.__aeabi_uldivmod .global __aeabi_uldivmod __aeabi_uldivmod: cmp r3, #0 bne L_large_denom cmp r2, #0 beq L_divison_by_0 cmp r1, #0 beq L_fallback_32bits @ case 1: num >= 2^32 and denom < 2^32 @ Result might be > 2^32, therefore we first calculate the upper 32 @ bits of the result. It is done similar to the calculation of the @ lower 32 bits, but with a denominator that is shifted by 32. @ Hence the lower 32 bits of the denominator are always 0 and the @ costly 64 bit shift and sub operations can be replaced by cheap 32 @ bit operations. push {r4, r5, r6, r7, lr} @ shift left the denominator until it is greater than the numerator @ denom(r7:r6) = r3:r2 << 32 @ TODO(crosbug.com/p/36128): Loops like this (which occur in several @ places in this file) are inefficent in ARMv6-m. movs r5, #1 @ bitmask adds r7, r2, #0 @ dont shift if denominator would overflow bmi L_upper_result cmp r1, r7 blo L_upper_result L_denom_shift_loop1: lsl r5, #1 lsls r7, #1 bmi L_upper_result @ dont shift if overflow cmp r1, r7 bhs L_denom_shift_loop1 L_upper_result: mov r3, r1 mov r2, r0 mov r1, #0 @ upper result = 0 mov r6, #0 L_sub_loop1: orr r1, r5 @ result(r7:r6) |= bitmask(r5) subs r2, r6 @ num -= denom sbcs r3, r7 bhs L_done_sub1 eor r1, r5 @ undo add mask adds r2, r6 @ undo subtract adc r3, r3, r7 L_done_sub1: lsrs r7, #1 @ denom(r7:r6) >>= 1 rrx r6, r6 lsrs r5, #1 @ bitmask(r5) >>= 1 bne L_sub_loop1 rrx r5, r5 b L_lower_result @ case 2: division by 0 @ call __aeabi_ldiv0 L_divison_by_0: b __aeabi_ldiv0 @ case 3: num < 2^32 and denom < 2^32 @ fallback to 32 bit division L_fallback_32bits: mov r1, r0 udiv r0, r0, r2 @ r0 = quotient mul r3, r0, r2 @ r3 = quotient * divisor sub r2, r1, r3 @ r2 = remainder mov r1, #0 mov r3, #0 bx lr @ case 4: denom >= 2^32 @ result is smaller than 2^32 L_large_denom: push {r4, r5, r6, r7, lr} mov r7, r3 mov r6, r2 mov r3, r1 mov r2, r0 @ Shift left the denominator until it is greater than the numerator mov r1, #0 @ high word of result is 0 mov r5, #1 @ bitmask adds r7, #0 @ dont shift if denominator would overflow bmi L_lower_result cmp r3, r7 blo L_lower_result L_denom_shift_loop4: lsl r5, #1 lsls r6, #1 @ denom(r7:r6) <<= 1 adcs r7, r7 bmi L_lower_result @ dont shift if overflow cmp r3, r7 bhs L_denom_shift_loop4 L_lower_result: movs r0, #0 L_sub_loop4: orr r0, r5 @ result(r1:r0) |= bitmask(r5) subs r2, r6 @ numerator -= denom sbcs r3, r7 bhs L_done_sub4 eor r0, r5 @ undo add mask adds r2, r6 @ undo subtract adc r3, r3, r7 L_done_sub4: lsrs r7, #1 @ denom(r7:r6) >>= 1 rrx r6, r6 lsrs r5, #1 @ bitmask(r5) >>= 1 bne L_sub_loop4 pop {r4, r5, r6, r7, pc} __aeabi_ldiv0: ldr SOFTWARE_PANIC_REASON_REG, =PANIC_SW_DIV_ZERO mov SOFTWARE_PANIC_INFO_REG, r14 bl exception_panic