diff options
Diffstat (limited to 'arch')
25 files changed, 974 insertions, 492 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7a77b6a..77eab66 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -64,6 +64,20 @@ config SYS_CPU default "sa1100" if CPU_SA1100 default "armv8" if ARM64 +config SYS_ARM_ARCH + int + default 4 if CPU_ARM720T + default 4 if CPU_ARM920T + default 5 if CPU_ARM926EJS + default 5 if CPU_ARM946ES + default 6 if CPU_ARM1136 + default 6 if CPU_ARM1176 + default 7 if CPU_V7 + default 7 if CPU_V7M + default 5 if CPU_PXA + default 4 if CPU_SA1100 + default 8 if ARM64 + config SEMIHOSTING bool "support boot from semihosting" help @@ -766,6 +780,7 @@ config ARCH_ROCKCHIP config TARGET_THUNDERX_88XX bool "Support ThunderX 88xx" + select ARM64 select OF_CONTROL endchoice diff --git a/arch/arm/Makefile b/arch/arm/Makefile index ecd1887..6a07cd1 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -11,7 +11,7 @@ endif arch-$(CONFIG_CPU_ARM720T) =-march=armv4 arch-$(CONFIG_CPU_ARM920T) =-march=armv4t arch-$(CONFIG_CPU_ARM926EJS) =-march=armv5te -arch-$(CONFIG_CPU_ARM946ES) =-march=armv4 +arch-$(CONFIG_CPU_ARM946ES) =-march=armv5te arch-$(CONFIG_CPU_SA1100) =-march=armv4 arch-$(CONFIG_CPU_PXA) = arch-$(CONFIG_CPU_ARM1136) =-march=armv5 diff --git a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c index 5c2a2ab..73ea955 100644 --- a/arch/arm/cpu/armv7/am33xx/clock_am43xx.c +++ b/arch/arm/cpu/armv7/am33xx/clock_am43xx.c @@ -160,7 +160,7 @@ void disable_edma3_clocks(void) } #endif -#ifdef CONFIG_USB_DWC3 +#if defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP) void enable_usb_clocks(int index) { u32 *usbclkctrl = 0; diff --git a/arch/arm/cpu/armv7/omap-common/hwinit-common.c b/arch/arm/cpu/armv7/omap-common/hwinit-common.c index 078bdd8..2f9693f 100644 --- a/arch/arm/cpu/armv7/omap-common/hwinit-common.c +++ b/arch/arm/cpu/armv7/omap-common/hwinit-common.c @@ -112,6 +112,16 @@ void __weak do_board_detect(void) { } +/** + * vcores_init() - Assign omap_vcores based on board + * + * Function to pick the vcores based on board. This is expected to be + * overridden in the SoC family board file where desired. + */ +void __weak vcores_init(void) +{ +} + void s_init(void) { } @@ -149,6 +159,7 @@ void early_system_init(void) #endif setup_early_clocks(); do_board_detect(); + vcores_init(); prcm_init(); } diff --git a/arch/arm/cpu/armv7/omap5/hw_data.c b/arch/arm/cpu/armv7/omap5/hw_data.c index 88e8920..5b91446a 100644 --- a/arch/arm/cpu/armv7/omap5/hw_data.c +++ b/arch/arm/cpu/armv7/omap5/hw_data.c @@ -365,35 +365,35 @@ struct vcores_data omap5430_volts_es2 = { }; struct vcores_data dra752_volts = { - .mpu.value = VDD_MPU_DRA752, - .mpu.efuse.reg = STD_FUSE_OPP_VMIN_MPU_NOM, + .mpu.value = VDD_MPU_DRA7, + .mpu.efuse.reg = STD_FUSE_OPP_VMIN_MPU, .mpu.efuse.reg_bits = DRA752_EFUSE_REGBITS, .mpu.addr = TPS659038_REG_ADDR_SMPS12, .mpu.pmic = &tps659038, .mpu.abb_tx_done_mask = OMAP_ABB_MPU_TXDONE_MASK, - .eve.value = VDD_EVE_DRA752, - .eve.efuse.reg = STD_FUSE_OPP_VMIN_DSPEVE_NOM, + .eve.value = VDD_EVE_DRA7, + .eve.efuse.reg = STD_FUSE_OPP_VMIN_DSPEVE, .eve.efuse.reg_bits = DRA752_EFUSE_REGBITS, .eve.addr = TPS659038_REG_ADDR_SMPS45, .eve.pmic = &tps659038, .eve.abb_tx_done_mask = OMAP_ABB_EVE_TXDONE_MASK, - .gpu.value = VDD_GPU_DRA752, - .gpu.efuse.reg = STD_FUSE_OPP_VMIN_GPU_NOM, + .gpu.value = VDD_GPU_DRA7, + .gpu.efuse.reg = STD_FUSE_OPP_VMIN_GPU, .gpu.efuse.reg_bits = DRA752_EFUSE_REGBITS, .gpu.addr = TPS659038_REG_ADDR_SMPS6, .gpu.pmic = &tps659038, .gpu.abb_tx_done_mask = OMAP_ABB_GPU_TXDONE_MASK, - .core.value = VDD_CORE_DRA752, - .core.efuse.reg = STD_FUSE_OPP_VMIN_CORE_NOM, + .core.value = VDD_CORE_DRA7, + .core.efuse.reg = STD_FUSE_OPP_VMIN_CORE, .core.efuse.reg_bits = DRA752_EFUSE_REGBITS, .core.addr = TPS659038_REG_ADDR_SMPS7, .core.pmic = &tps659038, - .iva.value = VDD_IVA_DRA752, - .iva.efuse.reg = STD_FUSE_OPP_VMIN_IVA_NOM, + .iva.value = VDD_IVA_DRA7, + .iva.efuse.reg = STD_FUSE_OPP_VMIN_IVA, .iva.efuse.reg_bits = DRA752_EFUSE_REGBITS, .iva.addr = TPS659038_REG_ADDR_SMPS8, .iva.pmic = &tps659038, @@ -401,15 +401,15 @@ struct vcores_data dra752_volts = { }; struct vcores_data dra722_volts = { - .mpu.value = VDD_MPU_DRA72x, - .mpu.efuse.reg = STD_FUSE_OPP_VMIN_MPU_NOM, + .mpu.value = VDD_MPU_DRA7, + .mpu.efuse.reg = STD_FUSE_OPP_VMIN_MPU, .mpu.efuse.reg_bits = DRA752_EFUSE_REGBITS, .mpu.addr = TPS65917_REG_ADDR_SMPS1, .mpu.pmic = &tps659038, .mpu.abb_tx_done_mask = OMAP_ABB_MPU_TXDONE_MASK, - .core.value = VDD_CORE_DRA72x, - .core.efuse.reg = STD_FUSE_OPP_VMIN_CORE_NOM, + .core.value = VDD_CORE_DRA7, + .core.efuse.reg = STD_FUSE_OPP_VMIN_CORE, .core.efuse.reg_bits = DRA752_EFUSE_REGBITS, .core.addr = TPS65917_REG_ADDR_SMPS2, .core.pmic = &tps659038, @@ -418,22 +418,22 @@ struct vcores_data dra722_volts = { * The DSPEVE, GPU and IVA rails are usually grouped on DRA72x * designs and powered by TPS65917 SMPS3, as on the J6Eco EVM. */ - .gpu.value = VDD_GPU_DRA72x, - .gpu.efuse.reg = STD_FUSE_OPP_VMIN_GPU_NOM, + .gpu.value = VDD_GPU_DRA7, + .gpu.efuse.reg = STD_FUSE_OPP_VMIN_GPU, .gpu.efuse.reg_bits = DRA752_EFUSE_REGBITS, .gpu.addr = TPS65917_REG_ADDR_SMPS3, .gpu.pmic = &tps659038, .gpu.abb_tx_done_mask = OMAP_ABB_GPU_TXDONE_MASK, - .eve.value = VDD_EVE_DRA72x, - .eve.efuse.reg = STD_FUSE_OPP_VMIN_DSPEVE_NOM, + .eve.value = VDD_EVE_DRA7, + .eve.efuse.reg = STD_FUSE_OPP_VMIN_DSPEVE, .eve.efuse.reg_bits = DRA752_EFUSE_REGBITS, .eve.addr = TPS65917_REG_ADDR_SMPS3, .eve.pmic = &tps659038, .eve.abb_tx_done_mask = OMAP_ABB_EVE_TXDONE_MASK, - .iva.value = VDD_IVA_DRA72x, - .iva.efuse.reg = STD_FUSE_OPP_VMIN_IVA_NOM, + .iva.value = VDD_IVA_DRA7, + .iva.efuse.reg = STD_FUSE_OPP_VMIN_IVA, .iva.efuse.reg_bits = DRA752_EFUSE_REGBITS, .iva.addr = TPS65917_REG_ADDR_SMPS3, .iva.pmic = &tps659038, @@ -602,7 +602,7 @@ void disable_edma3_clocks(void) } #endif -#ifdef CONFIG_USB_DWC3 +#if defined(CONFIG_USB_DWC3) || defined(CONFIG_USB_XHCI_OMAP) void enable_usb_clocks(int index) { u32 cm_l3init_usb_otg_ss_clkctrl = 0; @@ -614,9 +614,14 @@ void enable_usb_clocks(int index) setbits_le32((*prcm)->cm_l3init_usb_otg_ss1_clkctrl, OPTFCLKEN_REFCLK960M); - /* Enable 32 KHz clock for dwc3 */ + /* Enable 32 KHz clock for USB_PHY1 */ setbits_le32((*prcm)->cm_coreaon_usb_phy1_core_clkctrl, USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K); + + /* Enable 32 KHz clock for USB_PHY3 */ + if (is_dra7xx()) + setbits_le32((*prcm)->cm_coreaon_usb_phy3_core_clkctrl, + USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K); } else if (index == 1) { cm_l3init_usb_otg_ss_clkctrl = (*prcm)->cm_l3init_usb_otg_ss2_clkctrl; @@ -664,9 +669,14 @@ void disable_usb_clocks(int index) clrbits_le32((*prcm)->cm_l3init_usb_otg_ss1_clkctrl, OPTFCLKEN_REFCLK960M); - /* Disable 32 KHz clock for dwc3 */ + /* Disable 32 KHz clock for USB_PHY1 */ clrbits_le32((*prcm)->cm_coreaon_usb_phy1_core_clkctrl, USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K); + + /* Disable 32 KHz clock for USB_PHY3 */ + if (is_dra7xx()) + clrbits_le32((*prcm)->cm_coreaon_usb_phy3_core_clkctrl, + USBPHY_CORE_CLKCTRL_OPTFCLKEN_CLK32K); } else if (index == 1) { cm_l3init_usb_otg_ss_clkctrl = (*prcm)->cm_l3init_usb_otg_ss2_clkctrl; diff --git a/arch/arm/cpu/armv7/omap5/prcm-regs.c b/arch/arm/cpu/armv7/omap5/prcm-regs.c index 655e92b..b5f1d70 100644 --- a/arch/arm/cpu/armv7/omap5/prcm-regs.c +++ b/arch/arm/cpu/armv7/omap5/prcm-regs.c @@ -820,6 +820,7 @@ struct prcm_regs const dra7xx_prcm = { .cm_clkmode_dpll_gmac = 0x4a0052a8, .cm_coreaon_usb_phy1_core_clkctrl = 0x4a008640, .cm_coreaon_usb_phy2_core_clkctrl = 0x4a008688, + .cm_coreaon_usb_phy3_core_clkctrl = 0x4a008698, .cm_coreaon_l3init_60m_gfclk_clkctrl = 0x4a0086c0, /* cm1.mpu */ diff --git a/arch/arm/include/asm/arch-omap5/clock.h b/arch/arm/include/asm/arch-omap5/clock.h index 38d50d6..551c927 100644 --- a/arch/arm/include/asm/arch-omap5/clock.h +++ b/arch/arm/include/asm/arch-omap5/clock.h @@ -239,19 +239,22 @@ #define VDD_MPU_ES2_LOW 880 #define VDD_MM_ES2_LOW 880 -/* DRA74x/75x voltage settings in mv for OPP_NOM per DM */ -#define VDD_MPU_DRA752 1100 -#define VDD_EVE_DRA752 1060 -#define VDD_GPU_DRA752 1060 -#define VDD_CORE_DRA752 1060 -#define VDD_IVA_DRA752 1060 - -/* DRA72x voltage settings in mv for OPP_NOM per DM */ -#define VDD_MPU_DRA72x 1100 -#define VDD_EVE_DRA72x 1060 -#define VDD_GPU_DRA72x 1060 -#define VDD_CORE_DRA72x 1060 -#define VDD_IVA_DRA72x 1060 +/* DRA74x/75x/72x voltage settings in mv for OPP_NOM per DM */ +#define VDD_MPU_DRA7_NOM 1150 +#define VDD_CORE_DRA7_NOM 1150 +#define VDD_EVE_DRA7_NOM 1060 +#define VDD_GPU_DRA7_NOM 1060 +#define VDD_IVA_DRA7_NOM 1060 + +/* DRA74x/75x/72x voltage settings in mv for OPP_OD per DM */ +#define VDD_EVE_DRA7_OD 1150 +#define VDD_GPU_DRA7_OD 1150 +#define VDD_IVA_DRA7_OD 1150 + +/* DRA74x/75x/72x voltage settings in mv for OPP_HIGH per DM */ +#define VDD_EVE_DRA7_HIGH 1250 +#define VDD_GPU_DRA7_HIGH 1250 +#define VDD_IVA_DRA7_HIGH 1250 /* Efuse register offsets for DRA7xx platform */ #define DRA752_EFUSE_BASE 0x4A002000 @@ -283,6 +286,20 @@ /* STD_FUSE_OPP_VMIN_MPU_4 */ #define STD_FUSE_OPP_VMIN_MPU_HIGH (DRA752_EFUSE_BASE + 0x1B28) +/* Common voltage and Efuse register macros */ +/* DRA74x/DRA75x/DRA72x */ +#define VDD_MPU_DRA7 VDD_MPU_DRA7_NOM +#define VDD_CORE_DRA7 VDD_CORE_DRA7_NOM +#define VDD_EVE_DRA7 VDD_EVE_DRA7_NOM +#define VDD_GPU_DRA7 VDD_GPU_DRA7_NOM +#define VDD_IVA_DRA7 VDD_IVA_DRA7_NOM + +#define STD_FUSE_OPP_VMIN_MPU STD_FUSE_OPP_VMIN_MPU_NOM +#define STD_FUSE_OPP_VMIN_CORE STD_FUSE_OPP_VMIN_CORE_NOM +#define STD_FUSE_OPP_VMIN_DSPEVE STD_FUSE_OPP_VMIN_DSPEVE_NOM +#define STD_FUSE_OPP_VMIN_GPU STD_FUSE_OPP_VMIN_GPU_NOM +#define STD_FUSE_OPP_VMIN_IVA STD_FUSE_OPP_VMIN_IVA_NOM + /* Standard offset is 0.5v expressed in uv */ #define PALMAS_SMPS_BASE_VOLT_UV 500000 diff --git a/arch/arm/include/asm/arch-omap5/sys_proto.h b/arch/arm/include/asm/arch-omap5/sys_proto.h index 804266a..ab0e7fa 100644 --- a/arch/arm/include/asm/arch-omap5/sys_proto.h +++ b/arch/arm/include/asm/arch-omap5/sys_proto.h @@ -51,6 +51,7 @@ void sdelay(unsigned long); void setup_early_clocks(void); void prcm_init(void); void do_board_detect(void); +void vcores_init(void); void bypass_dpll(u32 const base); void freq_update_core(void); u32 get_sys_clk_freq(void); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 11b80fb..ae1e42f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -15,6 +15,7 @@ */ #include <config.h> +#include <asm/unified.h> /* * Endian independent macros for shifting bytes within registers. diff --git a/arch/arm/include/asm/omap_common.h b/arch/arm/include/asm/omap_common.h index ac34b0e..07f3848 100644 --- a/arch/arm/include/asm/omap_common.h +++ b/arch/arm/include/asm/omap_common.h @@ -145,6 +145,7 @@ struct prcm_regs { u32 cm_ssc_modfreqdiv_dpll_unipro; u32 cm_coreaon_usb_phy1_core_clkctrl; u32 cm_coreaon_usb_phy2_core_clkctrl; + u32 cm_coreaon_usb_phy3_core_clkctrl; u32 cm_coreaon_l3init_60m_gfclk_clkctrl; /* cm2.core */ diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h new file mode 100644 index 0000000..1b26002 --- /dev/null +++ b/arch/arm/include/asm/unified.h @@ -0,0 +1,129 @@ +/* + * include/asm-arm/unified.h - Unified Assembler Syntax helper macros + * + * Copyright (C) 2008 ARM Limited + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#ifndef __ASM_UNIFIED_H +#define __ASM_UNIFIED_H + +#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) + .syntax unified +#endif + +#ifdef CONFIG_CPU_V7M +#define AR_CLASS(x...) +#define M_CLASS(x...) x +#else +#define AR_CLASS(x...) x +#define M_CLASS(x...) +#endif + +#ifdef CONFIG_THUMB2_KERNEL + +#if __GNUC__ < 4 +#error Thumb-2 kernel requires gcc >= 4 +#endif + +/* The CPSR bit describing the instruction set (Thumb) */ +#define PSR_ISETSTATE PSR_T_BIT + +#define ARM(x...) +#define THUMB(x...) x +#ifdef __ASSEMBLY__ +#define W(instr) instr.w +#else +#define WASM(instr) #instr ".w" +#endif + +#else /* !CONFIG_THUMB2_KERNEL */ + +/* The CPSR bit describing the instruction set (ARM) */ +#define PSR_ISETSTATE 0 + +#define ARM(x...) x +#define THUMB(x...) +#ifdef __ASSEMBLY__ +#define W(instr) instr +#else +#define WASM(instr) #instr +#endif + +#endif /* CONFIG_THUMB2_KERNEL */ + +#ifndef CONFIG_ARM_ASM_UNIFIED + +/* + * If the unified assembly syntax isn't used (in ARM mode), these + * macros expand to an empty string + */ +#ifdef __ASSEMBLY__ + .macro it, cond + .endm + .macro itt, cond + .endm + .macro ite, cond + .endm + .macro ittt, cond + .endm + .macro itte, cond + .endm + .macro itet, cond + .endm + .macro itee, cond + .endm + .macro itttt, cond + .endm + .macro ittte, cond + .endm + .macro ittet, cond + .endm + .macro ittee, cond + .endm + .macro itett, cond + .endm + .macro itete, cond + .endm + .macro iteet, cond + .endm + .macro iteee, cond + .endm +#else /* !__ASSEMBLY__ */ +__asm__( +" .macro it, cond\n" +" .endm\n" +" .macro itt, cond\n" +" .endm\n" +" .macro ite, cond\n" +" .endm\n" +" .macro ittt, cond\n" +" .endm\n" +" .macro itte, cond\n" +" .endm\n" +" .macro itet, cond\n" +" .endm\n" +" .macro itee, cond\n" +" .endm\n" +" .macro itttt, cond\n" +" .endm\n" +" .macro ittte, cond\n" +" .endm\n" +" .macro ittet, cond\n" +" .endm\n" +" .macro ittee, cond\n" +" .endm\n" +" .macro itett, cond\n" +" .endm\n" +" .macro itete, cond\n" +" .endm\n" +" .macro iteet, cond\n" +" .endm\n" +" .macro iteee, cond\n" +" .endm\n"); +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_ARM_ASM_UNIFIED */ + +#endif /* !__ASM_UNIFIED_H */ diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index b535dbe..0e05e87 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,9 +5,9 @@ # SPDX-License-Identifier: GPL-2.0+ # -lib-$(CONFIG_USE_PRIVATE_LIBGCC) += _ashldi3.o _ashrdi3.o _divsi3.o \ - _lshrdi3.o _modsi3.o _udivsi3.o _umodsi3.o div0.o \ - _uldivmod.o +lib-$(CONFIG_USE_PRIVATE_LIBGCC) += ashldi3.o ashrdi3.o lshrdi3.o \ + lib1funcs.o uldivmod.o div0.o \ + div64.o muldi3.o ifdef CONFIG_CPU_V7M obj-y += vectors_m.o crt0.o @@ -62,9 +62,17 @@ ifneq (,$(findstring -mabi=aapcs-linux,$(PLATFORM_CPPFLAGS))) extra-y += eabi_compat.o endif +asflags-y += -DCONFIG_ARM_ASM_UNIFIED +ifeq ($(CONFIG_SPL_BUILD)$(CONFIG_TEGRA),yy) +asflags-y += -D__LINUX_ARM_ARCH__=4 +else +asflags-y += -D__LINUX_ARM_ARCH__=$(CONFIG_SYS_ARM_ARCH) +endif + # some files can only build in ARM or THUMB2, not THUMB1 ifdef CONFIG_SYS_THUMB_BUILD +asflags-$(CONFIG_HAS_THUMB2) += -DCONFIG_THUMB2_KERNEL ifndef CONFIG_HAS_THUMB2 # for C files, just apend -marm, which will override previous -mthumb* @@ -82,6 +90,5 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD - endif endif diff --git a/arch/arm/lib/_divsi3.S b/arch/arm/lib/_divsi3.S deleted file mode 100644 index c463c68..0000000 --- a/arch/arm/lib/_divsi3.S +++ /dev/null @@ -1,143 +0,0 @@ -#include <linux/linkage.h> - -.macro ARM_DIV_BODY dividend, divisor, result, curbit - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \curbit, \divisor - clz \result, \dividend - sub \result, \curbit, \result - mov \curbit, #1 - mov \divisor, \divisor, lsl \result - mov \curbit, \curbit, lsl \result - mov \result, #0 - -#else - - @ Initially shift the divisor left 3 bits if possible, - @ set curbit accordingly. This allows for curbit to be located - @ at the left end of each 4 bit nibbles in the division loop - @ to save one loop in most cases. - tst \divisor, #0xe0000000 - moveq \divisor, \divisor, lsl #3 - moveq \curbit, #8 - movne \curbit, #1 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - movlo \curbit, \curbit, lsl #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - movlo \curbit, \curbit, lsl #1 - blo 1b - - mov \result, #0 - -#endif - - @ Division loop -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - orrhs \result, \result, \curbit - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - orrhs \result, \result, \curbit, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - orrhs \result, \result, \curbit, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - orrhs \result, \result, \curbit, lsr #3 - cmp \dividend, #0 @ Early termination? - movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? - movne \divisor, \divisor, lsr #4 - bne 1b - -.endm - -.macro ARM_DIV2_ORDER divisor, order - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - rsb \order, \order, #31 - -#else - - cmp \divisor, #(1 << 16) - movhs \divisor, \divisor, lsr #16 - movhs \order, #16 - movlo \order, #0 - - cmp \divisor, #(1 << 8) - movhs \divisor, \divisor, lsr #8 - addhs \order, \order, #8 - - cmp \divisor, #(1 << 4) - movhs \divisor, \divisor, lsr #4 - addhs \order, \order, #4 - - cmp \divisor, #(1 << 2) - addhi \order, \order, #3 - addls \order, \order, \divisor, lsr #1 - -#endif - -.endm - - .align 5 -.globl __divsi3 -__divsi3: -ENTRY(__aeabi_idiv) - cmp r1, #0 - eor ip, r0, r1 @ save the sign of the result. - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - subs r2, r1, #1 @ division by 1 or -1 ? - beq 10f - movs r3, r0 - rsbmi r3, r0, #0 @ positive dividend value - cmp r3, r1 - bls 11f - tst r1, r2 @ divisor is power of 2 ? - beq 12f - - ARM_DIV_BODY r3, r1, r0, r2 - - cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr - -10: teq ip, r0 @ same sign ? - rsbmi r0, r0, #0 - mov pc, lr - -11: movlo r0, #0 - moveq r0, ip, asr #31 - orreq r0, r0, #1 - mov pc, lr - -12: ARM_DIV2_ORDER r1, r2 - - cmp ip, #0 - mov r0, r3, lsr r2 - rsbmi r0, r0, #0 - mov pc, lr - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 -ENDPROC(__aeabi_idiv) diff --git a/arch/arm/lib/_modsi3.S b/arch/arm/lib/_modsi3.S deleted file mode 100644 index c5e1c22..0000000 --- a/arch/arm/lib/_modsi3.S +++ /dev/null @@ -1,99 +0,0 @@ -#include <linux/linkage.h> - -.macro ARM_MOD_BODY dividend, divisor, order, spare - -#if __LINUX_ARM_ARCH__ >= 5 - - clz \order, \divisor - clz \spare, \dividend - sub \order, \order, \spare - mov \divisor, \divisor, lsl \order - -#else - - mov \order, #0 - - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. -1: cmp \divisor, #0x10000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #4 - addlo \order, \order, #4 - blo 1b - - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. -1: cmp \divisor, #0x80000000 - cmplo \divisor, \dividend - movlo \divisor, \divisor, lsl #1 - addlo \order, \order, #1 - blo 1b - -#endif - - @ Perform all needed substractions to keep only the reminder. - @ Do comparisons in batch of 4 first. - subs \order, \order, #3 @ yes, 3 is intended here - blt 2f - -1: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - cmp \dividend, \divisor, lsr #1 - subhs \dividend, \dividend, \divisor, lsr #1 - cmp \dividend, \divisor, lsr #2 - subhs \dividend, \dividend, \divisor, lsr #2 - cmp \dividend, \divisor, lsr #3 - subhs \dividend, \dividend, \divisor, lsr #3 - cmp \dividend, #1 - mov \divisor, \divisor, lsr #4 - subges \order, \order, #4 - bge 1b - - tst \order, #3 - teqne \dividend, #0 - beq 5f - - @ Either 1, 2 or 3 comparison/substractions are left. -2: cmn \order, #2 - blt 4f - beq 3f - cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -3: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor - mov \divisor, \divisor, lsr #1 -4: cmp \dividend, \divisor - subhs \dividend, \dividend, \divisor -5: -.endm - - .align 5 -ENTRY(__modsi3) - cmp r1, #0 - beq Ldiv0 - rsbmi r1, r1, #0 @ loops below use unsigned. - movs ip, r0 @ preserve sign of dividend - rsbmi r0, r0, #0 @ if negative make positive - subs r2, r1, #1 @ compare divisor with 1 - cmpne r0, r1 @ compare dividend with divisor - moveq r0, #0 - tsthi r1, r2 @ see if divisor is power of 2 - andeq r0, r0, r2 - bls 10f - - ARM_MOD_BODY r0, r1, r2, r3 - -10: cmp ip, #0 - rsbmi r0, r0, #0 - mov pc, lr -ENDPROC(__modsi3) - -Ldiv0: - - str lr, [sp, #-4]! - bl __div0 - mov r0, #0 @ About as wrong as it could be. - ldr pc, [sp], #4 diff --git a/arch/arm/lib/_udivsi3.S b/arch/arm/lib/_udivsi3.S deleted file mode 100644 index 3b653be..0000000 --- a/arch/arm/lib/_udivsi3.S +++ /dev/null @@ -1,95 +0,0 @@ -#include <linux/linkage.h> - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -dividend .req r0 -divisor .req r1 -result .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .globl __udivsi3 - .type __udivsi3 ,function - .globl __aeabi_uidiv - .type __aeabi_uidiv ,function - .align 0 - __udivsi3: - __aeabi_uidiv: - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - mov result, #0 - cmp dividend, divisor - bcc Lgot_result -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions, and note which bits - @ are done in the result. On the final pass, this may subtract - @ too much from the dividend, but the result will be ok, since the - @ "bit" will have been shifted out at the bottom. - cmp dividend, divisor - subcs dividend, dividend, divisor - orrcs result, result, curbit - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs result, result, curbit, lsr #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs result, result, curbit, lsr #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs result, result, curbit, lsr #3 - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 -Lgot_result: - mov r0, result - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __udivsi3 , . - __udivsi3 - -ENTRY(__aeabi_uidivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_uidiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_uidivmod) - -ENTRY(__aeabi_idivmod) - - stmfd sp!, {r0, r1, ip, lr} - bl __aeabi_idiv - ldmfd sp!, {r1, r2, ip, lr} - mul r3, r0, r2 - sub r1, r1, r3 - mov pc, lr -ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/_umodsi3.S b/arch/arm/lib/_umodsi3.S deleted file mode 100644 index b166737..0000000 --- a/arch/arm/lib/_umodsi3.S +++ /dev/null @@ -1,90 +0,0 @@ -#include <linux/linkage.h> - -/* # 1 "libgcc1.S" */ -@ libgcc1 routines for ARM cpu. -@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) -/* # 145 "libgcc1.S" */ -dividend .req r0 -divisor .req r1 -overdone .req r2 -curbit .req r3 -/* ip .req r12 */ -/* sp .req r13 */ -/* lr .req r14 */ -/* pc .req r15 */ - .text - .type __umodsi3 ,function - .align 0 - ENTRY(__umodsi3) - cmp divisor, #0 - beq Ldiv0 - mov curbit, #1 - cmp dividend, divisor - movcc pc, lr -Loop1: - @ Unless the divisor is very big, shift it up in multiples of - @ four bits, since this is the amount of unwinding in the main - @ division loop. Continue shifting until the divisor is - @ larger than the dividend. - cmp divisor, #0x10000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #4 - movcc curbit, curbit, lsl #4 - bcc Loop1 -Lbignum: - @ For very big divisors, we must shift it a bit at a time, or - @ we will be in danger of overflowing. - cmp divisor, #0x80000000 - cmpcc divisor, dividend - movcc divisor, divisor, lsl #1 - movcc curbit, curbit, lsl #1 - bcc Lbignum -Loop3: - @ Test for possible subtractions. On the final pass, this may - @ subtract too much from the dividend, so keep track of which - @ subtractions are done, we can fix them up afterwards... - mov overdone, #0 - cmp dividend, divisor - subcs dividend, dividend, divisor - cmp dividend, divisor, lsr #1 - subcs dividend, dividend, divisor, lsr #1 - orrcs overdone, overdone, curbit, ror #1 - cmp dividend, divisor, lsr #2 - subcs dividend, dividend, divisor, lsr #2 - orrcs overdone, overdone, curbit, ror #2 - cmp dividend, divisor, lsr #3 - subcs dividend, dividend, divisor, lsr #3 - orrcs overdone, overdone, curbit, ror #3 - mov ip, curbit - cmp dividend, #0 @ Early termination? - movnes curbit, curbit, lsr #4 @ No, any more bits to do? - movne divisor, divisor, lsr #4 - bne Loop3 - @ Any subtractions that we should not have done will be recorded in - @ the top three bits of "overdone". Exactly which were not needed - @ are governed by the position of the bit, stored in ip. - @ If we terminated early, because dividend became zero, - @ then none of the below will match, since the bit in ip will not be - @ in the bottom nibble. - ands overdone, overdone, #0xe0000000 - moveq pc, lr @ No fixups needed - tst overdone, ip, ror #3 - addne dividend, dividend, divisor, lsr #3 - tst overdone, ip, ror #2 - addne dividend, dividend, divisor, lsr #2 - tst overdone, ip, ror #1 - addne dividend, dividend, divisor, lsr #1 - mov pc, lr -Ldiv0: - str lr, [sp, #-4]! - bl __div0 (PLT) - mov r0, #0 @ about as wrong as it could be - ldmia sp!, {pc} - .size __umodsi3 , . - __umodsi3 -/* # 320 "libgcc1.S" */ -/* # 421 "libgcc1.S" */ -/* # 433 "libgcc1.S" */ -/* # 456 "libgcc1.S" */ -/* # 500 "libgcc1.S" */ -/* # 580 "libgcc1.S" */ -ENDPROC(__umodsi3) diff --git a/arch/arm/lib/_ashldi3.S b/arch/arm/lib/ashldi3.S index 9c34c21..6c9ae91 100644 --- a/arch/arm/lib/_ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -5,6 +5,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -14,15 +15,20 @@ #define ah r1 #endif -.globl __ashldi3 -__ashldi3: +ENTRY(__ashldi3) ENTRY(__aeabi_llsl) +.pushsection .text.__ashldi3, "ax" subs r3, r2, #32 rsb ip, r2, #32 movmi ah, ah, lsl r2 movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 - mov pc, lr + ret lr + +.popsection +ENDPROC(__ashldi3) ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/_ashrdi3.S b/arch/arm/lib/ashrdi3.S index c74fd64..3eb59ec 100644 --- a/arch/arm/lib/_ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -5,6 +5,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -14,15 +15,20 @@ #define ah r1 #endif -.globl __ashrdi3 -__ashrdi3: +ENTRY(__ashrdi3) ENTRY(__aeabi_lasr) +.pushsection .text.__ashrdi3, "ax" subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 - mov pc, lr + ret lr + +.popsection +ENDPROC(__ashrdi3) ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S new file mode 100644 index 0000000..5bfb41d --- /dev/null +++ b/arch/arm/lib/div64.S @@ -0,0 +1,214 @@ +/* + * linux/arch/arm/lib/div64.S + * + * Optimized computation of 64-bit dividend / 32-bit divisor + * + * Author: Nicolas Pitre + * Created: Oct 5, 2003 + * Copyright: Monta Vista Software, Inc. + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> +#ifdef __UBOOT__ +#define UNWIND(x...) +#endif + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +/* + * __do_div64: perform a division with 64-bit dividend and 32-bit divisor. + * + * Note: Calling convention is totally non standard for optimal code. + * This is meant to be used by do_div() from include/asm/div64.h only. + * + * Input parameters: + * xh-xl = dividend (clobbered) + * r4 = divisor (preserved) + * + * Output values: + * yh-yl = result + * xh = remainder + * + * Clobbered regs: xl, ip + */ + +ENTRY(__do_div64) +UNWIND(.fnstart) +.pushsection .text.__do_div64, "ax" + + @ Test for easy paths first. + subs ip, r4, #1 + bls 9f @ divisor is 0 or 1 + tst ip, r4 + beq 8f @ divisor is power of 2 + + @ See if we need to handle upper 32-bit result. + cmp xh, r4 + mov yh, #0 + blo 3f + + @ Align divisor with upper part of dividend. + @ The aligned divisor is stored in yl preserving the original. + @ The bit position is stored in ip. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz yl, r4 + clz ip, xh + sub yl, yl, ip + mov ip, #1 + mov ip, ip, lsl yl + mov yl, r4, lsl yl + +#else + + mov yl, r4 + mov ip, #1 +1: cmp yl, #0x80000000 + cmpcc yl, xh + movcc yl, yl, lsl #1 + movcc ip, ip, lsl #1 + bcc 1b + +#endif + + @ The division loop for needed upper bit positions. + @ Break out early if dividend reaches 0. +2: cmp xh, yl + orrcs yh, yh, ip + subscs xh, xh, yl + movsne ip, ip, lsr #1 + mov yl, yl, lsr #1 + bne 2b + + @ See if we need to handle lower 32-bit result. +3: cmp xh, #0 + mov yl, #0 + cmpeq xl, r4 + movlo xh, xl + retlo lr + + @ The division loop for lower bit positions. + @ Here we shift remainer bits leftwards rather than moving the + @ divisor for comparisons, considering the carry-out bit as well. + mov ip, #0x80000000 +4: movs xl, xl, lsl #1 + adcs xh, xh, xh + beq 6f + cmpcc xh, r4 +5: orrcs yl, yl, ip + subcs xh, xh, r4 + movs ip, ip, lsr #1 + bne 4b + ret lr + + @ The top part of remainder became zero. If carry is set + @ (the 33th bit) this is a false positive so resume the loop. + @ Otherwise, if lower part is also null then we are done. +6: bcs 5b + cmp xl, #0 + reteq lr + + @ We still have remainer bits in the low part. Bring them up. + +#if __LINUX_ARM_ARCH__ >= 5 + + clz xh, xl @ we know xh is zero here so... + add xh, xh, #1 + mov xl, xl, lsl xh + mov ip, ip, lsr xh + +#else + +7: movs xl, xl, lsl #1 + mov ip, ip, lsr #1 + bcc 7b + +#endif + + @ Current remainder is now 1. It is worthless to compare with + @ divisor at this point since divisor can not be smaller than 3 here. + @ If possible, branch for another shift in the division loop. + @ If no bit position left then we are done. + movs ip, ip, lsr #1 + mov xh, #1 + bne 4b + ret lr + +8: @ Division by a power of 2: determine what that divisor order is + @ then simply shift values around + +#if __LINUX_ARM_ARCH__ >= 5 + + clz ip, r4 + rsb ip, ip, #31 + +#else + + mov yl, r4 + cmp r4, #(1 << 16) + mov ip, #0 + movhs yl, yl, lsr #16 + movhs ip, #16 + + cmp yl, #(1 << 8) + movhs yl, yl, lsr #8 + addhs ip, ip, #8 + + cmp yl, #(1 << 4) + movhs yl, yl, lsr #4 + addhs ip, ip, #4 + + cmp yl, #(1 << 2) + addhi ip, ip, #3 + addls ip, ip, yl, lsr #1 + +#endif + + mov yh, xh, lsr ip + mov yl, xl, lsr ip + rsb ip, ip, #32 + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) + mov xh, xl, lsl ip + mov xh, xh, lsr ip + ret lr + + @ eq -> division by 1: obvious enough... +9: moveq yl, xl + moveq yh, xh + moveq xh, #0 + reteq lr +.popsection +UNWIND(.fnend) + +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +Ldiv0_64: + @ Division by 0: + str lr, [sp, #-8]! + bl __div0 + + @ as wrong as it could be... + mov yl, #0 + mov yh, #0 + mov xh, #0 + ldr pc, [sp], #8 + +UNWIND(.fnend) +ENDPROC(__do_div64) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S new file mode 100644 index 0000000..f1becda --- /dev/null +++ b/arch/arm/lib/lib1funcs.S @@ -0,0 +1,429 @@ +/* + * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines + * + * Author: Nicolas Pitre <nico@fluxnic.net> + * - contributed to gcc-3.4 on Sep 30, 2003 + * - adapted for the Linux kernel on Oct 2, 2003 + */ + +/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. + + * SPDX-License-Identifier: GPL-2.0+ + */ + + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * U-Boot compatibility bit, define empty UNWIND() macro as, since we + * do not support stack unwinding and define CONFIG_AEABI to make all + * of the functions available without diverging from Linux code. + */ +#ifdef __UBOOT__ +#define UNWIND(x...) +#define CONFIG_AEABI +#endif + +.macro ARM_DIV_BODY dividend, divisor, result, curbit + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \curbit, \divisor + clz \result, \dividend + sub \result, \curbit, \result + mov \curbit, #1 + mov \divisor, \divisor, lsl \result + mov \curbit, \curbit, lsl \result + mov \result, #0 + +#else + + @ Initially shift the divisor left 3 bits if possible, + @ set curbit accordingly. This allows for curbit to be located + @ at the left end of each 4 bit nibbles in the division loop + @ to save one loop in most cases. + tst \divisor, #0xe0000000 + moveq \divisor, \divisor, lsl #3 + moveq \curbit, #8 + movne \curbit, #1 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + movlo \curbit, \curbit, lsl #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + movlo \curbit, \curbit, lsl #1 + blo 1b + + mov \result, #0 + +#endif + + @ Division loop +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + orrhs \result, \result, \curbit + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + orrhs \result, \result, \curbit, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + orrhs \result, \result, \curbit, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + orrhs \result, \result, \curbit, lsr #3 + cmp \dividend, #0 @ Early termination? + movsne \curbit, \curbit, lsr #4 @ No, any more bits to do? + movne \divisor, \divisor, lsr #4 + bne 1b + +.endm + + +.macro ARM_DIV2_ORDER divisor, order + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + rsb \order, \order, #31 + +#else + + cmp \divisor, #(1 << 16) + movhs \divisor, \divisor, lsr #16 + movhs \order, #16 + movlo \order, #0 + + cmp \divisor, #(1 << 8) + movhs \divisor, \divisor, lsr #8 + addhs \order, \order, #8 + + cmp \divisor, #(1 << 4) + movhs \divisor, \divisor, lsr #4 + addhs \order, \order, #4 + + cmp \divisor, #(1 << 2) + addhi \order, \order, #3 + addls \order, \order, \divisor, lsr #1 + +#endif + +.endm + + +.macro ARM_MOD_BODY dividend, divisor, order, spare + +#if __LINUX_ARM_ARCH__ >= 5 + + clz \order, \divisor + clz \spare, \dividend + sub \order, \order, \spare + mov \divisor, \divisor, lsl \order + +#else + + mov \order, #0 + + @ Unless the divisor is very big, shift it up in multiples of + @ four bits, since this is the amount of unwinding in the main + @ division loop. Continue shifting until the divisor is + @ larger than the dividend. +1: cmp \divisor, #0x10000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #4 + addlo \order, \order, #4 + blo 1b + + @ For very big divisors, we must shift it a bit at a time, or + @ we will be in danger of overflowing. +1: cmp \divisor, #0x80000000 + cmplo \divisor, \dividend + movlo \divisor, \divisor, lsl #1 + addlo \order, \order, #1 + blo 1b + +#endif + + @ Perform all needed subtractions to keep only the reminder. + @ Do comparisons in batch of 4 first. + subs \order, \order, #3 @ yes, 3 is intended here + blt 2f + +1: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + cmp \dividend, \divisor, lsr #1 + subhs \dividend, \dividend, \divisor, lsr #1 + cmp \dividend, \divisor, lsr #2 + subhs \dividend, \dividend, \divisor, lsr #2 + cmp \dividend, \divisor, lsr #3 + subhs \dividend, \dividend, \divisor, lsr #3 + cmp \dividend, #1 + mov \divisor, \divisor, lsr #4 + subsge \order, \order, #4 + bge 1b + + tst \order, #3 + teqne \dividend, #0 + beq 5f + + @ Either 1, 2 or 3 comparison/subtractions are left. +2: cmn \order, #2 + blt 4f + beq 3f + cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +3: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor + mov \divisor, \divisor, lsr #1 +4: cmp \dividend, \divisor + subhs \dividend, \dividend, \divisor +5: +.endm + + +ENTRY(__udivsi3) +ENTRY(__aeabi_uidiv) +UNWIND(.fnstart) +.pushsection .text.__udivsi3, "ax" + + subs r2, r1, #1 + reteq lr + bcc Ldiv0 + cmp r0, r1 + bls 11f + tst r1, r2 + beq 12f + + ARM_DIV_BODY r0, r1, r2, r3 + + mov r0, r2 + ret lr + +11: moveq r0, #1 + movne r0, #0 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + mov r0, r0, lsr r2 + ret lr + +.popsection +UNWIND(.fnend) +ENDPROC(__udivsi3) +ENDPROC(__aeabi_uidiv) + +ENTRY(__umodsi3) +UNWIND(.fnstart) +.pushsection .text.__umodsi3, "ax" + + subs r2, r1, #1 @ compare divisor with 1 + bcc Ldiv0 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + retls lr + + ARM_MOD_BODY r0, r1, r2, r3 + + ret lr + +.popsection +UNWIND(.fnend) +ENDPROC(__umodsi3) + +ENTRY(__divsi3) +ENTRY(__aeabi_idiv) +UNWIND(.fnstart) +.pushsection .text.__divsi3, "ax" + + cmp r1, #0 + eor ip, r0, r1 @ save the sign of the result. + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + subs r2, r1, #1 @ division by 1 or -1 ? + beq 10f + movs r3, r0 + rsbmi r3, r0, #0 @ positive dividend value + cmp r3, r1 + bls 11f + tst r1, r2 @ divisor is power of 2 ? + beq 12f + + ARM_DIV_BODY r3, r1, r0, r2 + + cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +10: teq ip, r0 @ same sign ? + rsbmi r0, r0, #0 + ret lr + +11: movlo r0, #0 + moveq r0, ip, asr #31 + orreq r0, r0, #1 + ret lr + +12: ARM_DIV2_ORDER r1, r2 + + cmp ip, #0 + mov r0, r3, lsr r2 + rsbmi r0, r0, #0 + ret lr + +.popsection +UNWIND(.fnend) +ENDPROC(__divsi3) +ENDPROC(__aeabi_idiv) + +ENTRY(__modsi3) +UNWIND(.fnstart) +.pushsection .text.__modsi3, "ax" + + cmp r1, #0 + beq Ldiv0 + rsbmi r1, r1, #0 @ loops below use unsigned. + movs ip, r0 @ preserve sign of dividend + rsbmi r0, r0, #0 @ if negative make positive + subs r2, r1, #1 @ compare divisor with 1 + cmpne r0, r1 @ compare dividend with divisor + moveq r0, #0 + tsthi r1, r2 @ see if divisor is power of 2 + andeq r0, r0, r2 + bls 10f + + ARM_MOD_BODY r0, r1, r2, r3 + +10: cmp ip, #0 + rsbmi r0, r0, #0 + ret lr + +.popsection +UNWIND(.fnend) +ENDPROC(__modsi3) + +#ifdef CONFIG_AEABI + +ENTRY(__aeabi_uidivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) +.pushsection .text.__aeabi_uidivmod, "ax" + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_uidiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +.popsection +UNWIND(.fnend) +ENDPROC(__aeabi_uidivmod) + +ENTRY(__aeabi_idivmod) +UNWIND(.fnstart) +UNWIND(.save {r0, r1, ip, lr} ) +.pushsection .text.__aeabi_uidivmod, "ax" + + stmfd sp!, {r0, r1, ip, lr} + bl __aeabi_idiv + ldmfd sp!, {r1, r2, ip, lr} + mul r3, r0, r2 + sub r1, r1, r3 + ret lr + +.popsection +UNWIND(.fnend) +ENDPROC(__aeabi_idivmod) + +#endif + +Ldiv0: +UNWIND(.fnstart) +UNWIND(.pad #4) +UNWIND(.save {lr}) +.pushsection .text.Ldiv0, "ax" + + str lr, [sp, #-8]! + bl __div0 + mov r0, #0 @ About as wrong as it could be. + ldr pc, [sp], #8 + +.popsection +UNWIND(.fnend) +ENDPROC(Ldiv0) + +/* Thumb-1 specialities */ +#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(CONFIG_HAS_THUMB2) +ENTRY(__gnu_thumb1_case_sqi) +.pushsection .text.__gnu_thumb1_case_sqi, "ax" + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrsb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr +.popsection +ENDPROC(__gnu_thumb1_case_sqi) + +ENTRY(__gnu_thumb1_case_uqi) +.pushsection .text.__gnu_thumb1_case_uqi, "ax" + push {r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r1, r1, #1 + ldrb r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r1} + bx lr +.popsection +ENDPROC(__gnu_thumb1_case_uqi) + +ENTRY(__gnu_thumb1_case_shi) +.pushsection .text.__gnu_thumb1_case_shi, "ax" + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrsh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr +.popsection +ENDPROC(__gnu_thumb1_case_shi) + +ENTRY(__gnu_thumb1_case_uhi) +.pushsection .text.__gnu_thumb1_case_uhi, "ax" + push {r0, r1} + mov r1, lr + lsrs r1, r1, #1 + lsls r0, r0, #1 + lsls r1, r1, #1 + ldrh r1, [r1, r0] + lsls r1, r1, #1 + add lr, lr, r1 + pop {r0, r1} + bx lr +.popsection +ENDPROC(__gnu_thumb1_case_uhi) +#endif diff --git a/arch/arm/lib/_lshrdi3.S b/arch/arm/lib/lshrdi3.S index 1f9b916..f710ccb 100644 --- a/arch/arm/lib/_lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -5,6 +5,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -14,15 +15,20 @@ #define ah r1 #endif -.globl __lshrdi3 -__lshrdi3: +ENTRY(__lshrdi3) ENTRY(__aeabi_llsr) +.pushsection .text.__lshldi3, "ax" subs r3, r2, #32 rsb ip, r2, #32 movmi al, al, lsr r2 movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 - mov pc, lr + ret lr + +.popsection +ENDPROC(__lshrdi3) ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7d9fc0f..00602e9 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -13,12 +13,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> -#if defined(CONFIG_SYS_THUMB_BUILD) && !defined(MEMCPY_NO_THUMB_BUILD) -#define W(instr) instr.w -#else -#define W(instr) instr -#endif - #define LDR1W_SHIFT 0 #define STR1W_SHIFT 0 diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S new file mode 100644 index 0000000..bc255c5 --- /dev/null +++ b/arch/arm/lib/muldi3.S @@ -0,0 +1,48 @@ +/* + * linux/arch/arm/lib/muldi3.S + * + * Author: Nicolas Pitre + * Created: Oct 19, 2005 + * Copyright: Monta Vista Software, Inc. + * + * SPDX-License-Identifier: GPL-2.0 + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +#ifdef __ARMEB__ +#define xh r0 +#define xl r1 +#define yh r2 +#define yl r3 +#else +#define xl r0 +#define xh r1 +#define yl r2 +#define yh r3 +#endif + +ENTRY(__muldi3) +ENTRY(__aeabi_lmul) +.pushsection .text.__muldi3, "ax" + + mul xh, yl, xh + mla xh, xl, yh, xh + mov ip, xl, lsr #16 + mov yh, yl, lsr #16 + bic xl, xl, ip, lsl #16 + bic yl, yl, yh, lsl #16 + mla xh, yh, ip, xh + mul yh, xl, yh + mul xl, yl, xl + mul ip, yl, ip + adds xl, xl, yh, lsl #16 + adc xh, xh, yh, lsr #16 + adds xl, xl, ip, lsl #16 + adc xh, xh, ip, lsr #16 + ret lr + +.popsection +ENDPROC(__muldi3) +ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/_uldivmod.S b/arch/arm/lib/uldivmod.S index 426c2f2..bbc44c6 100644 --- a/arch/arm/lib/_uldivmod.S +++ b/arch/arm/lib/uldivmod.S @@ -9,10 +9,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> -/* We don't use Thumb instructions for now */ -#define ARM(x...) x -#define THUMB(x...) - /* * A, Q = r0 + (r1 << 32) * B, R = r2 + (r3 << 32) @@ -38,6 +34,8 @@ TMP .req r8 ) ENTRY(__aeabi_uldivmod) +.pushsection .text.__aeabi_uldivmod, "ax" + stmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) lr} @ Test if B == 0 orrs ip, B_0, B_1 @ Z set -> B == 0 @@ -226,7 +224,9 @@ THUMB( orrpl A_0, A_0, TMP ) @ Shift A to the right by the appropriate amount. rsb D_1, D_0, #32 mov Q_0, A_0, lsr D_0 - orr Q_0, A_1, lsl D_1 + ARM( orr Q_0, Q_0, A_1, lsl D_1 ) + THUMB( lsl A_1, D_1 ) + THUMB( orr Q_0, A_1 ) mov Q_1, A_1, lsr D_0 @ Move C to R mov R_0, C_0 @@ -242,4 +242,5 @@ L_div_by_0: mov R_0, #0 mov R_1, #0 ldmfd sp!, {r4, r5, r6, r7, THUMB(TMP,) pc} +.popsection ENDPROC(__aeabi_uldivmod) diff --git a/arch/arm/mach-keystone/include/mach/hardware-k2g.h b/arch/arm/mach-keystone/include/mach/hardware-k2g.h index ca2a119..0f6bf61 100644 --- a/arch/arm/mach-keystone/include/mach/hardware-k2g.h +++ b/arch/arm/mach-keystone/include/mach/hardware-k2g.h @@ -74,4 +74,16 @@ #define K2G_GPIO_DIR_OFFSET 0x0 #define K2G_GPIO_SETDATA_OFFSET 0x8 +/* BOOTCFG RESETMUX8 */ +#define KS2_RSTMUX8 (KS2_DEVICE_STATE_CTRL_BASE + 0x328) + +/* RESETMUX register definitions */ +#define RSTMUX_LOCK8_SHIFT 0x0 +#define RSTMUX_LOCK8_MASK (0x1 << 0) +#define RSTMUX_OMODE8_SHIFT 0x1 +#define RSTMUX_OMODE8_MASK (0x7 << 1) +#define RSTMUX_OMODE8_DEV_RESET 0x2 +#define RSTMUX_OMODE8_INT 0x3 +#define RSTMUX_OMODE8_INT_AND_DEV_RESET 0x4 + #endif /* __ASM_ARCH_HARDWARE_K2G_H */ |