From 11b1a9b2c0fecb7334ccb23f29200da0be0cc156 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 26 May 2016 18:01:38 +0200 Subject: arm: lib: Drop underscore from private libgcc filenames Drop the underscore from the filenames of files implementing libgcc routines. There is no functional change. This change is done to make sync with Linux kernel easier. Signed-off-by: Marek Vasut Cc: Albert Aribaud Cc: Masahiro Yamada Cc: Simon Glass Cc: Tom Rini Reviewed-by: Tom Rini --- arch/arm/lib/Makefile | 6 +- arch/arm/lib/_ashldi3.S | 28 ------ arch/arm/lib/_ashrdi3.S | 28 ------ arch/arm/lib/_divsi3.S | 143 --------------------------- arch/arm/lib/_lshrdi3.S | 28 ------ arch/arm/lib/_modsi3.S | 99 ------------------- arch/arm/lib/_udivsi3.S | 95 ------------------ arch/arm/lib/_uldivmod.S | 245 ----------------------------------------------- arch/arm/lib/_umodsi3.S | 90 ----------------- arch/arm/lib/ashldi3.S | 28 ++++++ arch/arm/lib/ashrdi3.S | 28 ++++++ arch/arm/lib/divsi3.S | 143 +++++++++++++++++++++++++++ arch/arm/lib/lshrdi3.S | 28 ++++++ arch/arm/lib/modsi3.S | 99 +++++++++++++++++++ arch/arm/lib/udivsi3.S | 95 ++++++++++++++++++ arch/arm/lib/uldivmod.S | 245 +++++++++++++++++++++++++++++++++++++++++++++++ arch/arm/lib/umodsi3.S | 90 +++++++++++++++++ 17 files changed, 759 insertions(+), 759 deletions(-) delete mode 100644 arch/arm/lib/_ashldi3.S delete mode 100644 arch/arm/lib/_ashrdi3.S delete mode 100644 arch/arm/lib/_divsi3.S delete mode 100644 arch/arm/lib/_lshrdi3.S delete mode 100644 arch/arm/lib/_modsi3.S delete mode 100644 arch/arm/lib/_udivsi3.S delete mode 100644 arch/arm/lib/_uldivmod.S delete mode 100644 arch/arm/lib/_umodsi3.S create mode 100644 arch/arm/lib/ashldi3.S create mode 100644 arch/arm/lib/ashrdi3.S create mode 100644 arch/arm/lib/divsi3.S create mode 100644 arch/arm/lib/lshrdi3.S create mode 100644 arch/arm/lib/modsi3.S create mode 100644 arch/arm/lib/udivsi3.S create mode 100644 arch/arm/lib/uldivmod.S create mode 100644 arch/arm/lib/umodsi3.S (limited to 'arch/arm') diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 49adce3..f54ce8c 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,9 +5,9 @@ # SPDX-License-Identifier: GPL-2.0+ # -lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _ashldi3.o _ashrdi3.o _divsi3.o \ - _lshrdi3.o _modsi3.o _udivsi3.o _umodsi3.o div0.o \ - _uldivmod.o +lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o divsi3.o \ + lshrdi3.o modsi3.o udivsi3.o umodsi3.o div0.o \ + uldivmod.o ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o diff --git a/arch/arm/lib/_ashldi3.S b/arch/arm/lib/_ashldi3.S deleted file mode 100644 index 9c34c21..0000000 --- a/arch/arm/lib/_ashldi3.S +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 - Free Software Foundation, Inc. - - * SPDX-License-Identifier: GPL-2.0+ - */ - -#include - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -.globl __ashldi3 -__ashldi3: -ENTRY(__aeabi_llsl) - - subs r3, r2, #32 - rsb ip, r2, #32 - movmi ah, ah, lsl r2 - movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip - mov al, al, lsl r2 - mov pc, lr -ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/_ashrdi3.S b/arch/arm/lib/_ashrdi3.S deleted file mode 100644 index c74fd64..0000000 --- a/arch/arm/lib/_ashrdi3.S +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 - Free Software Foundation, Inc. - - * SPDX-License-Identifier: GPL-2.0+ - */ - -#include - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -.globl __ashrdi3 -__ashrdi3: -ENTRY(__aeabi_lasr) - - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, asr r2 - mov pc, lr -ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/_divsi3.S b/arch/arm/lib/_divsi3.S deleted file mode 100644 index c463c68..0000000 --- a/arch/arm/lib/_divsi3.S +++ /dev/null @@ -1,143 +0,0 @@ -#include - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - -.macro ARM_DIV2_ORDER divisor, order - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm - - .align 5 -.globl __divsi3 -__divsi3: -ENTRY(__aeabi_idiv) - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 -ENDPROC(__aeabi_idiv) diff --git a/arch/arm/lib/_lshrdi3.S b/arch/arm/lib/_lshrdi3.S deleted file mode 100644 index 1f9b916..0000000 --- a/arch/arm/lib/_lshrdi3.S +++ /dev/null @@ -1,28 +0,0 @@ -/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 - Free Software Foundation, Inc. - - * SPDX-License-Identifier: GPL-2.0+ - */ - -#include - -#ifdef __ARMEB__ -#define al r1 -#define ah r0 -#else -#define al r0 -#define ah r1 -#endif - -.globl __lshrdi3 -__lshrdi3: -ENTRY(__aeabi_llsr) - - subs r3, r2, #32 - rsb ip, r2, #32 - movmi al, al, lsr r2 - movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip - mov ah, ah, lsr r2 - mov pc, lr -ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/_modsi3.S b/arch/arm/lib/_modsi3.S deleted file mode 100644 index c5e1c22..0000000 --- a/arch/arm/lib/_modsi3.S +++ /dev/null @@ -1,99 +0,0 @@ -#include - -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - .align 5 -ENTRY(__modsi3) - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr -ENDPROC(__modsi3) - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 diff --git a/arch/arm/lib/_udivsi3.S b/arch/arm/lib/_udivsi3.S deleted file mode 100644 index 3b653be..0000000 --- a/arch/arm/lib/_udivsi3.S +++ /dev/null @@ -1,95 +0,0 @@ -#include - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -dividend .req r0 -divisor .req r1 -result .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .globl __udivsi3 - .type __udivsi3 ,function - .globl __aeabi_uidiv - .type __aeabi_uidiv ,function - .align 0 - __udivsi3: - __aeabi_uidiv: - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - bcc Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: - mov r0, result - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __udivsi3 , . - __udivsi3 - -ENTRY(__aeabi_uidivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_uidivmod) - -ENTRY(__aeabi_idivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/_uldivmod.S deleted file mode 100644 index 426c2f2..0000000 --- a/arch/arm/lib/_uldivmod.S +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright 2010, Google Inc. - * - * Brought in from coreboot uldivmod.S - * - * SPDX-License-Identifier: GPL-2.0 - */ - -#include -#include - -/* We don't use Thumb instructions for now */ -#define ARM(x...) x -#define THUMB(x...) - -/* - * A, Q = r0 + (r1 << 32) - * B, R = r2 + (r3 << 32) - * A / B = Q ... R - */ - -A_0 .req r0 -A_1 .req r1 -B_0 .req r2 -B_1 .req r3 -C_0 .req r4 -C_1 .req r5 -D_0 .req r6 -D_1 .req r7 - -Q_0 .req r0 -Q_1 .req r1 -R_0 .req r2 -R_1 .req r3 - -THUMB( -TMP .req r8 -) - -ENTRY(__aeabi_uldivmod) - stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} - @ Test if B == 0 - orrs ip, B_0, B_1 @ Z set -> B == 0 - beq L_div_by_0 - @ Test if B is power of 2: (B & (B - 1)) == 0 - subs C_0, B_0, #1 - sbc C_1, B_1, #0 - tst C_0, B_0 - tsteq B_1, C_1 - beq L_pow2 - @ Test if A_1 == B_1 == 0 - orrs ip, A_1, B_1 - beq L_div_32_32 - -L_div_64_64: -/* CLZ only exists in ARM architecture version 5 and above. */ -#ifdef HAVE_CLZ - mov C_0, #1 - mov C_1, #0 - @ D_0 = clz A - teq A_1, #0 - clz D_0, A_1 - clzeq ip, A_0 - addeq D_0, D_0, ip - @ D_1 = clz B - teq B_1, #0 - clz D_1, B_1 - clzeq ip, B_0 - addeq D_1, D_1, ip - @ if clz B - clz A > 0 - subs D_0, D_1, D_0 - bls L_done_shift - @ B <<= (clz B - clz A) - subs D_1, D_0, #32 - rsb ip, D_0, #32 - movmi B_1, B_1, lsl D_0 -ARM( orrmi B_1, B_1, B_0, lsr ip ) -THUMB( lsrmi TMP, B_0, ip ) -THUMB( orrmi B_1, B_1, TMP ) - movpl B_1, B_0, lsl D_1 - mov B_0, B_0, lsl D_0 - @ C = 1 << (clz B - clz A) - movmi C_1, C_1, lsl D_0 -ARM( orrmi C_1, C_1, C_0, lsr ip ) -THUMB( lsrmi TMP, C_0, ip ) -THUMB( orrmi C_1, C_1, TMP ) - movpl C_1, C_0, lsl D_1 - mov C_0, C_0, lsl D_0 -L_done_shift: - mov D_0, #0 - mov D_1, #0 - @ C: current bit; D: result -#else - @ C: current bit; D: result - mov C_0, #1 - mov C_1, #0 - mov D_0, #0 - mov D_1, #0 -L_lsl_4: - cmp B_1, #0x10000000 - cmpcc B_1, A_1 - cmpeq B_0, A_0 - bcs L_lsl_1 - @ B <<= 4 - mov B_1, B_1, lsl #4 - orr B_1, B_1, B_0, lsr #28 - mov B_0, B_0, lsl #4 - @ C <<= 4 - mov C_1, C_1, lsl #4 - orr C_1, C_1, C_0, lsr #28 - mov C_0, C_0, lsl #4 - b L_lsl_4 -L_lsl_1: - cmp B_1, #0x80000000 - cmpcc B_1, A_1 - cmpeq B_0, A_0 - bcs L_subtract - @ B <<= 1 - mov B_1, B_1, lsl #1 - orr B_1, B_1, B_0, lsr #31 - mov B_0, B_0, lsl #1 - @ C <<= 1 - mov C_1, C_1, lsl #1 - orr C_1, C_1, C_0, lsr #31 - mov C_0, C_0, lsl #1 - b L_lsl_1 -#endif -L_subtract: - @ if A >= B - cmp A_1, B_1 - cmpeq A_0, B_0 - bcc L_update - @ A -= B - subs A_0, A_0, B_0 - sbc A_1, A_1, B_1 - @ D |= C - orr D_0, D_0, C_0 - orr D_1, D_1, C_1 -L_update: - @ if A == 0: break - orrs ip, A_1, A_0 - beq L_exit - @ C >>= 1 - movs C_1, C_1, lsr #1 - movs C_0, C_0, rrx - @ if C == 0: break - orrs ip, C_1, C_0 - beq L_exit - @ B >>= 1 - movs B_1, B_1, lsr #1 - mov B_0, B_0, rrx - b L_subtract -L_exit: - @ Note: A, B & Q, R are aliases - mov R_0, A_0 - mov R_1, A_1 - mov Q_0, D_0 - mov Q_1, D_1 - ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} - -L_div_32_32: - @ Note: A_0 & r0 are aliases - @ Q_1 r1 - mov r1, B_0 - bl __aeabi_uidivmod - mov R_0, r1 - mov R_1, #0 - mov Q_1, #0 - ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} - -L_pow2: -#ifdef HAVE_CLZ - @ Note: A, B and Q, R are aliases - @ R = A & (B - 1) - and C_0, A_0, C_0 - and C_1, A_1, C_1 - @ Q = A >> log2(B) - @ Note: B must not be 0 here! - clz D_0, B_0 - add D_1, D_0, #1 - rsbs D_0, D_0, #31 - bpl L_1 - clz D_0, B_1 - rsb D_0, D_0, #31 - mov A_0, A_1, lsr D_0 - add D_0, D_0, #32 -L_1: - movpl A_0, A_0, lsr D_0 -ARM( orrpl A_0, A_0, A_1, lsl D_1 ) -THUMB( lslpl TMP, A_1, D_1 ) -THUMB( orrpl A_0, A_0, TMP ) - mov A_1, A_1, lsr D_0 - @ Mov back C to R - mov R_0, C_0 - mov R_1, C_1 - ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} -#else - @ Note: A, B and Q, R are aliases - @ R = A & (B - 1) - and C_0, A_0, C_0 - and C_1, A_1, C_1 - @ Q = A >> log2(B) - @ Note: B must not be 0 here! - @ Count the leading zeroes in B. - mov D_0, #0 - orrs B_0, B_0, B_0 - @ If B is greater than 1 << 31, divide A and B by 1 << 32. - moveq A_0, A_1 - moveq A_1, #0 - moveq B_0, B_1 - @ Count the remaining leading zeroes in B. - movs B_1, B_0, lsl #16 - addeq D_0, #16 - moveq B_0, B_0, lsr #16 - tst B_0, #0xff - addeq D_0, #8 - moveq B_0, B_0, lsr #8 - tst B_0, #0xf - addeq D_0, #4 - moveq B_0, B_0, lsr #4 - tst B_0, #0x3 - addeq D_0, #2 - moveq B_0, B_0, lsr #2 - tst B_0, #0x1 - addeq D_0, #1 - @ Shift A to the right by the appropriate amount. - rsb D_1, D_0, #32 - mov Q_0, A_0, lsr D_0 - orr Q_0, A_1, lsl D_1 - mov Q_1, A_1, lsr D_0 - @ Move C to R - mov R_0, C_0 - mov R_1, C_1 - ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} -#endif - -L_div_by_0: - bl __div0 - @ As wrong as it could be - mov Q_0, #0 - mov Q_1, #0 - mov R_0, #0 - mov R_1, #0 - ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} -ENDPROC(__aeabi_uldivmod) diff --git a/arch/arm/lib/_umodsi3.S b/arch/arm/lib/_umodsi3.S deleted file mode 100644 index b166737..0000000 --- a/arch/arm/lib/_umodsi3.S +++ /dev/null @@ -1,90 +0,0 @@ -#include - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -/* # 145 "libgcc1.S" */ -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .type __umodsi3 ,function - .align 0 - ENTRY(__umodsi3) - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - cmp dividend, divisor - movcc pc, lr -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - moveq pc, lr @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __umodsi3 , . - __umodsi3 -/* # 320 "libgcc1.S" */ -/* # 421 "libgcc1.S" */ -/* # 433 "libgcc1.S" */ -/* # 456 "libgcc1.S" */ -/* # 500 "libgcc1.S" */ -/* # 580 "libgcc1.S" */ -ENDPROC(__umodsi3) diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S new file mode 100644 index 0000000..9c34c21 --- /dev/null +++ b/arch/arm/lib/ashldi3.S @@ -0,0 +1,28 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +.globl __ashldi3 +__ashldi3: +ENTRY(__aeabi_llsl) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi ah, ah, lsl r2 + movpl ah, al, lsl r3 + orrmi ah, ah, al, lsr ip + mov al, al, lsl r2 + mov pc, lr +ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S new file mode 100644 index 0000000..c74fd64 --- /dev/null +++ b/arch/arm/lib/ashrdi3.S @@ -0,0 +1,28 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +.globl __ashrdi3 +__ashrdi3: +ENTRY(__aeabi_lasr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, asr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, asr r2 + mov pc, lr +ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/divsi3.S b/arch/arm/lib/divsi3.S new file mode 100644 index 0000000..c463c68 --- /dev/null +++ b/arch/arm/lib/divsi3.S @@ -0,0 +1,143 @@ +#include + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + .align 5 +.globl __divsi3 +__divsi3: +ENTRY(__aeabi_idiv) + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + mov pc, lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + mov pc, lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + mov pc, lr + +Ldiv0: + + str lr, [sp, #-4]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #4 +ENDPROC(__aeabi_idiv) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S new file mode 100644 index 0000000..1f9b916 --- /dev/null +++ b/arch/arm/lib/lshrdi3.S @@ -0,0 +1,28 @@ +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 + Free Software Foundation, Inc. + + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include + +#ifdef __ARMEB__ +#define al r1 +#define ah r0 +#else +#define al r0 +#define ah r1 +#endif + +.globl __lshrdi3 +__lshrdi3: +ENTRY(__aeabi_llsr) + + subs r3, r2, #32 + rsb ip, r2, #32 + movmi al, al, lsr r2 + movpl al, ah, lsr r3 + orrmi al, al, ah, lsl ip + mov ah, ah, lsr r2 + mov pc, lr +ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/modsi3.S b/arch/arm/lib/modsi3.S new file mode 100644 index 0000000..c5e1c22 --- /dev/null +++ b/arch/arm/lib/modsi3.S @@ -0,0 +1,99 @@ +#include + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed substractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subges \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/substractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + .align 5 +ENTRY(__modsi3) + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + mov pc, lr +ENDPROC(__modsi3) + +Ldiv0: + + str lr, [sp, #-4]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #4 diff --git a/arch/arm/lib/udivsi3.S b/arch/arm/lib/udivsi3.S new file mode 100644 index 0000000..3b653be --- /dev/null +++ b/arch/arm/lib/udivsi3.S @@ -0,0 +1,95 @@ +#include + +/* # 1 "libgcc1.S" */ +@ libgcc1 routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) +dividend .req r0 +divisor .req r1 +result .req r2 +curbit .req r3 +/* ip .req r12 */ +/* sp .req r13 */ +/* lr .req r14 */ +/* pc .req r15 */ + .text + .globl __udivsi3 + .type __udivsi3 ,function + .globl __aeabi_uidiv + .type __aeabi_uidiv ,function + .align 0 + __udivsi3: + __aeabi_uidiv: + cmp divisor, #0 + beq Ldiv0 + mov curbit, #1 + mov result, #0 + cmp dividend, divisor + bcc Lgot_result +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #4 + movcc curbit, curbit, lsl #4 + bcc Loop1 +Lbignum: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #1 + movcc curbit, curbit, lsl #1 + bcc Lbignum +Loop3: + @ Test for possible subtractions, and note which bits + @ are done in the result. On the final pass, this may subtract + @ too much from the dividend, but the result will be ok, since the + @ "bit" will have been shifted out at the bottom. + cmp dividend, divisor + subcs dividend, dividend, divisor + orrcs result, result, curbit + cmp dividend, divisor, lsr #1 + subcs dividend, dividend, divisor, lsr #1 + orrcs result, result, curbit, lsr #1 + cmp dividend, divisor, lsr #2 + subcs dividend, dividend, divisor, lsr #2 + orrcs result, result, curbit, lsr #2 + cmp dividend, divisor, lsr #3 + subcs dividend, dividend, divisor, lsr #3 + orrcs result, result, curbit, lsr #3 + cmp dividend, #0 @ Early termination? + movnes curbit, curbit, lsr #4 @ No, any more bits to do? + movne divisor, divisor, lsr #4 + bne Loop3 +Lgot_result: + mov r0, result + mov pc, lr +Ldiv0: + str lr, [sp, #-4]! + bl __div0 (PLT) + mov r0, #0 @ about as wrong as it could be + ldmia sp!, {pc} + .size __udivsi3 , . - __udivsi3 + +ENTRY(__aeabi_uidivmod) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + mov pc, lr +ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/uldivmod.S b/arch/arm/lib/uldivmod.S new file mode 100644 index 0000000..426c2f2 --- /dev/null +++ b/arch/arm/lib/uldivmod.S @@ -0,0 +1,245 @@ +/* + * Copyright 2010, Google Inc. + * + * Brought in from coreboot uldivmod.S + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#include +#include + +/* We don't use Thumb instructions for now */ +#define ARM(x...) x +#define THUMB(x...) + +/* + * A, Q = r0 + (r1 << 32) + * B, R = r2 + (r3 << 32) + * A / B = Q ... R + */ + +A_0 .req r0 +A_1 .req r1 +B_0 .req r2 +B_1 .req r3 +C_0 .req r4 +C_1 .req r5 +D_0 .req r6 +D_1 .req r7 + +Q_0 .req r0 +Q_1 .req r1 +R_0 .req r2 +R_1 .req r3 + +THUMB( +TMP .req r8 +) + +ENTRY(__aeabi_uldivmod) + stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} + @ Test if B == 0 + orrs ip, B_0, B_1 @ Z set -> B == 0 + beq L_div_by_0 + @ Test if B is power of 2: (B & (B - 1)) == 0 + subs C_0, B_0, #1 + sbc C_1, B_1, #0 + tst C_0, B_0 + tsteq B_1, C_1 + beq L_pow2 + @ Test if A_1 == B_1 == 0 + orrs ip, A_1, B_1 + beq L_div_32_32 + +L_div_64_64: +/* CLZ only exists in ARM architecture version 5 and above. */ +#ifdef HAVE_CLZ + mov C_0, #1 + mov C_1, #0 + @ D_0 = clz A + teq A_1, #0 + clz D_0, A_1 + clzeq ip, A_0 + addeq D_0, D_0, ip + @ D_1 = clz B + teq B_1, #0 + clz D_1, B_1 + clzeq ip, B_0 + addeq D_1, D_1, ip + @ if clz B - clz A > 0 + subs D_0, D_1, D_0 + bls L_done_shift + @ B <<= (clz B - clz A) + subs D_1, D_0, #32 + rsb ip, D_0, #32 + movmi B_1, B_1, lsl D_0 +ARM( orrmi B_1, B_1, B_0, lsr ip ) +THUMB( lsrmi TMP, B_0, ip ) +THUMB( orrmi B_1, B_1, TMP ) + movpl B_1, B_0, lsl D_1 + mov B_0, B_0, lsl D_0 + @ C = 1 << (clz B - clz A) + movmi C_1, C_1, lsl D_0 +ARM( orrmi C_1, C_1, C_0, lsr ip ) +THUMB( lsrmi TMP, C_0, ip ) +THUMB( orrmi C_1, C_1, TMP ) + movpl C_1, C_0, lsl D_1 + mov C_0, C_0, lsl D_0 +L_done_shift: + mov D_0, #0 + mov D_1, #0 + @ C: current bit; D: result +#else + @ C: current bit; D: result + mov C_0, #1 + mov C_1, #0 + mov D_0, #0 + mov D_1, #0 +L_lsl_4: + cmp B_1, #0x10000000 + cmpcc B_1, A_1 + cmpeq B_0, A_0 + bcs L_lsl_1 + @ B <<= 4 + mov B_1, B_1, lsl #4 + orr B_1, B_1, B_0, lsr #28 + mov B_0, B_0, lsl #4 + @ C <<= 4 + mov C_1, C_1, lsl #4 + orr C_1, C_1, C_0, lsr #28 + mov C_0, C_0, lsl #4 + b L_lsl_4 +L_lsl_1: + cmp B_1, #0x80000000 + cmpcc B_1, A_1 + cmpeq B_0, A_0 + bcs L_subtract + @ B <<= 1 + mov B_1, B_1, lsl #1 + orr B_1, B_1, B_0, lsr #31 + mov B_0, B_0, lsl #1 + @ C <<= 1 + mov C_1, C_1, lsl #1 + orr C_1, C_1, C_0, lsr #31 + mov C_0, C_0, lsl #1 + b L_lsl_1 +#endif +L_subtract: + @ if A >= B + cmp A_1, B_1 + cmpeq A_0, B_0 + bcc L_update + @ A -= B + subs A_0, A_0, B_0 + sbc A_1, A_1, B_1 + @ D |= C + orr D_0, D_0, C_0 + orr D_1, D_1, C_1 +L_update: + @ if A == 0: break + orrs ip, A_1, A_0 + beq L_exit + @ C >>= 1 + movs C_1, C_1, lsr #1 + movs C_0, C_0, rrx + @ if C == 0: break + orrs ip, C_1, C_0 + beq L_exit + @ B >>= 1 + movs B_1, B_1, lsr #1 + mov B_0, B_0, rrx + b L_subtract +L_exit: + @ Note: A, B & Q, R are aliases + mov R_0, A_0 + mov R_1, A_1 + mov Q_0, D_0 + mov Q_1, D_1 + ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} + +L_div_32_32: + @ Note: A_0 & r0 are aliases + @ Q_1 r1 + mov r1, B_0 + bl __aeabi_uidivmod + mov R_0, r1 + mov R_1, #0 + mov Q_1, #0 + ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} + +L_pow2: +#ifdef HAVE_CLZ + @ Note: A, B and Q, R are aliases + @ R = A & (B - 1) + and C_0, A_0, C_0 + and C_1, A_1, C_1 + @ Q = A >> log2(B) + @ Note: B must not be 0 here! + clz D_0, B_0 + add D_1, D_0, #1 + rsbs D_0, D_0, #31 + bpl L_1 + clz D_0, B_1 + rsb D_0, D_0, #31 + mov A_0, A_1, lsr D_0 + add D_0, D_0, #32 +L_1: + movpl A_0, A_0, lsr D_0 +ARM( orrpl A_0, A_0, A_1, lsl D_1 ) +THUMB( lslpl TMP, A_1, D_1 ) +THUMB( orrpl A_0, A_0, TMP ) + mov A_1, A_1, lsr D_0 + @ Mov back C to R + mov R_0, C_0 + mov R_1, C_1 + ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} +#else + @ Note: A, B and Q, R are aliases + @ R = A & (B - 1) + and C_0, A_0, C_0 + and C_1, A_1, C_1 + @ Q = A >> log2(B) + @ Note: B must not be 0 here! + @ Count the leading zeroes in B. + mov D_0, #0 + orrs B_0, B_0, B_0 + @ If B is greater than 1 << 31, divide A and B by 1 << 32. + moveq A_0, A_1 + moveq A_1, #0 + moveq B_0, B_1 + @ Count the remaining leading zeroes in B. + movs B_1, B_0, lsl #16 + addeq D_0, #16 + moveq B_0, B_0, lsr #16 + tst B_0, #0xff + addeq D_0, #8 + moveq B_0, B_0, lsr #8 + tst B_0, #0xf + addeq D_0, #4 + moveq B_0, B_0, lsr #4 + tst B_0, #0x3 + addeq D_0, #2 + moveq B_0, B_0, lsr #2 + tst B_0, #0x1 + addeq D_0, #1 + @ Shift A to the right by the appropriate amount. + rsb D_1, D_0, #32 + mov Q_0, A_0, lsr D_0 + orr Q_0, A_1, lsl D_1 + mov Q_1, A_1, lsr D_0 + @ Move C to R + mov R_0, C_0 + mov R_1, C_1 + ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} +#endif + +L_div_by_0: + bl __div0 + @ As wrong as it could be + mov Q_0, #0 + mov Q_1, #0 + mov R_0, #0 + mov R_1, #0 + ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} +ENDPROC(__aeabi_uldivmod) diff --git a/arch/arm/lib/umodsi3.S b/arch/arm/lib/umodsi3.S new file mode 100644 index 0000000..b166737 --- /dev/null +++ b/arch/arm/lib/umodsi3.S @@ -0,0 +1,90 @@ +#include + +/* # 1 "libgcc1.S" */ +@ libgcc1 routines for ARM cpu. +@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) +/* # 145 "libgcc1.S" */ +dividend .req r0 +divisor .req r1 +overdone .req r2 +curbit .req r3 +/* ip .req r12 */ +/* sp .req r13 */ +/* lr .req r14 */ +/* pc .req r15 */ + .text + .type __umodsi3 ,function + .align 0 + ENTRY(__umodsi3) + cmp divisor, #0 + beq Ldiv0 + mov curbit, #1 + cmp dividend, divisor + movcc pc, lr +Loop1: + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. + cmp divisor, #0x10000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #4 + movcc curbit, curbit, lsl #4 + bcc Loop1 +Lbignum: + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. + cmp divisor, #0x80000000 + cmpcc divisor, dividend + movcc divisor, divisor, lsl #1 + movcc curbit, curbit, lsl #1 + bcc Lbignum +Loop3: + @ Test for possible subtractions. On the final pass, this may + @ subtract too much from the dividend, so keep track of which + @ subtractions are done, we can fix them up afterwards... + mov overdone, #0 + cmp dividend, divisor + subcs dividend, dividend, divisor + cmp dividend, divisor, lsr #1 + subcs dividend, dividend, divisor, lsr #1 + orrcs overdone, overdone, curbit, ror #1 + cmp dividend, divisor, lsr #2 + subcs dividend, dividend, divisor, lsr #2 + orrcs overdone, overdone, curbit, ror #2 + cmp dividend, divisor, lsr #3 + subcs dividend, dividend, divisor, lsr #3 + orrcs overdone, overdone, curbit, ror #3 + mov ip, curbit + cmp dividend, #0 @ Early termination? + movnes curbit, curbit, lsr #4 @ No, any more bits to do? + movne divisor, divisor, lsr #4 + bne Loop3 + @ Any subtractions that we should not have done will be recorded in + @ the top three bits of "overdone". Exactly which were not needed + @ are governed by the position of the bit, stored in ip. + @ If we terminated early, because dividend became zero, + @ then none of the below will match, since the bit in ip will not be + @ in the bottom nibble. + ands overdone, overdone, #0xe0000000 + moveq pc, lr @ No fixups needed + tst overdone, ip, ror #3 + addne dividend, dividend, divisor, lsr #3 + tst overdone, ip, ror #2 + addne dividend, dividend, divisor, lsr #2 + tst overdone, ip, ror #1 + addne dividend, dividend, divisor, lsr #1 + mov pc, lr +Ldiv0: + str lr, [sp, #-4]! + bl __div0 (PLT) + mov r0, #0 @ about as wrong as it could be + ldmia sp!, {pc} + .size __umodsi3 , . - __umodsi3 +/* # 320 "libgcc1.S" */ +/* # 421 "libgcc1.S" */ +/* # 433 "libgcc1.S" */ +/* # 456 "libgcc1.S" */ +/* # 500 "libgcc1.S" */ +/* # 580 "libgcc1.S" */ +ENDPROC(__umodsi3) -- cgit v1.1