diff options
38 files changed, 1882 insertions, 21 deletions
diff --git a/arch/arm/config.mk b/arch/arm/config.mk index fd3e5fb..329c7a7 100644 --- a/arch/arm/config.mk +++ b/arch/arm/config.mk @@ -17,7 +17,8 @@ endif LDFLAGS_FINAL += --gc-sections PLATFORM_RELFLAGS += -ffunction-sections -fdata-sections \ - -fno-common -ffixed-r9 -msoft-float + -fno-common -ffixed-r9 +PLATFORM_RELFLAGS += $(call cc-option, -msoft-float) # Support generic board on ARM __HAVE_ARCH_GENERIC_BOARD := y @@ -105,4 +106,8 @@ PLATFORM_CPPFLAGS += $(call cc-option, -mword-relocations) endif # limit ourselves to the sections we want in the .bin. +ifdef CONFIG_ARM64 +OBJCFLAGS += -j .text -j .rodata -j .data -j .u_boot_list -j .rela.dyn +else OBJCFLAGS += -j .text -j .rodata -j .data -j .u_boot_list -j .rel.dyn +endif diff --git a/arch/arm/cpu/armv8/Makefile b/arch/arm/cpu/armv8/Makefile new file mode 100644 index 0000000..b6eb6de --- /dev/null +++ b/arch/arm/cpu/armv8/Makefile @@ -0,0 +1,17 @@ +# +# (C) Copyright 2000-2003 +# Wolfgang Denk, DENX Software Engineering, wd@denx.de. +# +# SPDX-License-Identifier: GPL-2.0+ +# + +extra-y := start.o + +obj-y += cpu.o +obj-y += generic_timer.o +obj-y += cache_v8.o +obj-y += exceptions.o +obj-y += cache.o +obj-y += tlb.o +obj-y += gic.o +obj-y += transition.o diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S new file mode 100644 index 0000000..546a83e --- /dev/null +++ b/arch/arm/cpu/armv8/cache.S @@ -0,0 +1,136 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * This file is based on sample code from ARMv8 ARM. + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <version.h> +#include <asm/macro.h> +#include <linux/linkage.h> + +/* + * void __asm_flush_dcache_level(level) + * + * clean and invalidate one level cache. + * + * x0: cache level + * x1~x9: clobbered + */ +ENTRY(__asm_flush_dcache_level) + lsl x1, x0, #1 + msr csselr_el1, x1 /* select cache level */ + isb /* sync change of cssidr_el1 */ + mrs x6, ccsidr_el1 /* read the new cssidr_el1 */ + and x2, x6, #7 /* x2 <- log2(cache line size)-4 */ + add x2, x2, #4 /* x2 <- log2(cache line size) */ + mov x3, #0x3ff + and x3, x3, x6, lsr #3 /* x3 <- max number of #ways */ + add w4, w3, w3 + sub w4, w4, 1 /* round up log2(#ways + 1) */ + clz w5, w4 /* bit position of #ways */ + mov x4, #0x7fff + and x4, x4, x6, lsr #13 /* x4 <- max number of #sets */ + /* x1 <- cache level << 1 */ + /* x2 <- line length offset */ + /* x3 <- number of cache ways - 1 */ + /* x4 <- number of cache sets - 1 */ + /* x5 <- bit position of #ways */ + +loop_set: + mov x6, x3 /* x6 <- working copy of #ways */ +loop_way: + lsl x7, x6, x5 + orr x9, x1, x7 /* map way and level to cisw value */ + lsl x7, x4, x2 + orr x9, x9, x7 /* map set number to cisw value */ + dc cisw, x9 /* clean & invalidate by set/way */ + subs x6, x6, #1 /* decrement the way */ + b.ge loop_way + subs x4, x4, #1 /* decrement the set */ + b.ge loop_set + + ret +ENDPROC(__asm_flush_dcache_level) + +/* + * void __asm_flush_dcache_all(void) + * + * clean and invalidate all data cache by SET/WAY. + */ +ENTRY(__asm_flush_dcache_all) + dsb sy + mrs x10, clidr_el1 /* read clidr_el1 */ + lsr x11, x10, #24 + and x11, x11, #0x7 /* x11 <- loc */ + cbz x11, finished /* if loc is 0, exit */ + mov x15, lr + mov x0, #0 /* start flush at cache level 0 */ + /* x0 <- cache level */ + /* x10 <- clidr_el1 */ + /* x11 <- loc */ + /* x15 <- return address */ + +loop_level: + lsl x1, x0, #1 + add x1, x1, x0 /* x0 <- tripled cache level */ + lsr x1, x10, x1 + and x1, x1, #7 /* x1 <- cache type */ + cmp x1, #2 + b.lt skip /* skip if no cache or icache */ + bl __asm_flush_dcache_level +skip: + add x0, x0, #1 /* increment cache level */ + cmp x11, x0 + b.gt loop_level + + mov x0, #0 + msr csselr_el1, x0 /* resotre csselr_el1 */ + dsb sy + isb + mov lr, x15 + +finished: + ret +ENDPROC(__asm_flush_dcache_all) + +/* + * void __asm_flush_dcache_range(start, end) + * + * clean & invalidate data cache in the range + * + * x0: start address + * x1: end address + */ +ENTRY(__asm_flush_dcache_range) + mrs x3, ctr_el0 + lsr x3, x3, #16 + and x3, x3, #0xf + mov x2, #4 + lsl x2, x2, x3 /* cache line size */ + + /* x2 <- minimal cache line size in cache system */ + sub x3, x2, #1 + bic x0, x0, x3 +1: dc civac, x0 /* clean & invalidate data or unified cache */ + add x0, x0, x2 + cmp x0, x1 + b.lo 1b + dsb sy + ret +ENDPROC(__asm_flush_dcache_range) + +/* + * void __asm_invalidate_icache_all(void) + * + * invalidate all tlb entries. + */ +ENTRY(__asm_invalidate_icache_all) + ic ialluis + isb sy + ret +ENDPROC(__asm_invalidate_icache_all) diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c new file mode 100644 index 0000000..131fdab --- /dev/null +++ b/arch/arm/cpu/armv8/cache_v8.c @@ -0,0 +1,219 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <asm/system.h> +#include <asm/armv8/mmu.h> + +DECLARE_GLOBAL_DATA_PTR; + +#ifndef CONFIG_SYS_DCACHE_OFF + +static void set_pgtable_section(u64 section, u64 memory_type) +{ + u64 *page_table = (u64 *)gd->arch.tlb_addr; + u64 value; + + value = (section << SECTION_SHIFT) | PMD_TYPE_SECT | PMD_SECT_AF; + value |= PMD_ATTRINDX(memory_type); + page_table[section] = value; +} + +/* to activate the MMU we need to set up virtual memory */ +static void mmu_setup(void) +{ + int i, j, el; + bd_t *bd = gd->bd; + + /* Setup an identity-mapping for all spaces */ + for (i = 0; i < (PGTABLE_SIZE >> 3); i++) + set_pgtable_section(i, MT_DEVICE_NGNRNE); + + /* Setup an identity-mapping for all RAM space */ + for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) { + ulong start = bd->bi_dram[i].start; + ulong end = bd->bi_dram[i].start + bd->bi_dram[i].size; + for (j = start >> SECTION_SHIFT; + j < end >> SECTION_SHIFT; j++) { + set_pgtable_section(j, MT_NORMAL); + } + } + + /* load TTBR0 */ + el = current_el(); + if (el == 1) + asm volatile("msr ttbr0_el1, %0" + : : "r" (gd->arch.tlb_addr) : "memory"); + else if (el == 2) + asm volatile("msr ttbr0_el2, %0" + : : "r" (gd->arch.tlb_addr) : "memory"); + else + asm volatile("msr ttbr0_el3, %0" + : : "r" (gd->arch.tlb_addr) : "memory"); + + /* enable the mmu */ + set_sctlr(get_sctlr() | CR_M); +} + +/* + * Performs a invalidation of the entire data cache at all levels + */ +void invalidate_dcache_all(void) +{ + __asm_flush_dcache_all(); +} + +/* + * Performs a clean & invalidation of the entire data cache at all levels + */ +void flush_dcache_all(void) +{ + __asm_flush_dcache_all(); +} + +/* + * Invalidates range in all levels of D-cache/unified cache + */ +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ + __asm_flush_dcache_range(start, stop); +} + +/* + * Flush range(clean & invalidate) from all levels of D-cache/unified cache + */ +void flush_dcache_range(unsigned long start, unsigned long stop) +{ + __asm_flush_dcache_range(start, stop); +} + +void dcache_enable(void) +{ + /* The data cache is not active unless the mmu is enabled */ + if (!(get_sctlr() & CR_M)) { + invalidate_dcache_all(); + __asm_invalidate_tlb_all(); + mmu_setup(); + } + + set_sctlr(get_sctlr() | CR_C); +} + +void dcache_disable(void) +{ + uint32_t sctlr; + + sctlr = get_sctlr(); + + /* if cache isn't enabled no need to disable */ + if (!(sctlr & CR_C)) + return; + + set_sctlr(sctlr & ~(CR_C|CR_M)); + + flush_dcache_all(); + __asm_invalidate_tlb_all(); +} + +int dcache_status(void) +{ + return (get_sctlr() & CR_C) != 0; +} + +#else /* CONFIG_SYS_DCACHE_OFF */ + +void invalidate_dcache_all(void) +{ +} + +void flush_dcache_all(void) +{ +} + +void invalidate_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void flush_dcache_range(unsigned long start, unsigned long stop) +{ +} + +void dcache_enable(void) +{ +} + +void dcache_disable(void) +{ +} + +int dcache_status(void) +{ + return 0; +} + +#endif /* CONFIG_SYS_DCACHE_OFF */ + +#ifndef CONFIG_SYS_ICACHE_OFF + +void icache_enable(void) +{ + set_sctlr(get_sctlr() | CR_I); +} + +void icache_disable(void) +{ + set_sctlr(get_sctlr() & ~CR_I); +} + +int icache_status(void) +{ + return (get_sctlr() & CR_I) != 0; +} + +void invalidate_icache_all(void) +{ + __asm_invalidate_icache_all(); +} + +#else /* CONFIG_SYS_ICACHE_OFF */ + +void icache_enable(void) +{ +} + +void icache_disable(void) +{ +} + +int icache_status(void) +{ + return 0; +} + +void invalidate_icache_all(void) +{ +} + +#endif /* CONFIG_SYS_ICACHE_OFF */ + +/* + * Enable dCache & iCache, whether cache is actually enabled + * depend on CONFIG_SYS_DCACHE_OFF and CONFIG_SYS_ICACHE_OFF + */ +void enable_caches(void) +{ + icache_enable(); + dcache_enable(); +} + +/* + * Flush range from all levels of d-cache/unified-cache + */ +void flush_cache(unsigned long start, unsigned long size) +{ + flush_dcache_range(start, start + size); +} diff --git a/arch/arm/cpu/armv8/config.mk b/arch/arm/cpu/armv8/config.mk new file mode 100644 index 0000000..027a68c --- /dev/null +++ b/arch/arm/cpu/armv8/config.mk @@ -0,0 +1,15 @@ +# +# (C) Copyright 2002 +# Gary Jennejohn, DENX Software Engineering, <garyj@denx.de> +# +# SPDX-License-Identifier: GPL-2.0+ +# +PLATFORM_RELFLAGS += -fno-common -ffixed-x18 + +# SEE README.arm-unaligned-accesses +PF_NO_UNALIGNED := $(call cc-option, -mstrict-align) +PLATFORM_NO_UNALIGNED := $(PF_NO_UNALIGNED) + +PF_CPPFLAGS_ARMV8 := $(call cc-option, -march=armv8-a) +PLATFORM_CPPFLAGS += $(PF_CPPFLAGS_ARMV8) +PLATFORM_CPPFLAGS += $(PF_NO_UNALIGNED) diff --git a/arch/arm/cpu/armv8/cpu.c b/arch/arm/cpu/armv8/cpu.c new file mode 100644 index 0000000..e06c3cc --- /dev/null +++ b/arch/arm/cpu/armv8/cpu.c @@ -0,0 +1,43 @@ +/* + * (C) Copyright 2008 Texas Insturments + * + * (C) Copyright 2002 + * Sysgo Real-Time Solutions, GmbH <www.elinos.com> + * Marius Groeger <mgroeger@sysgo.de> + * + * (C) Copyright 2002 + * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <command.h> +#include <asm/system.h> +#include <linux/compiler.h> + +int cleanup_before_linux(void) +{ + /* + * this function is called just before we call linux + * it prepares the processor for linux + * + * disable interrupt and turn off caches etc ... + */ + disable_interrupts(); + + /* + * Turn off I-cache and invalidate it + */ + icache_disable(); + invalidate_icache_all(); + + /* + * turn off D-cache + * dcache_disable() in turn flushes the d-cache and disables MMU + */ + dcache_disable(); + invalidate_dcache_all(); + + return 0; +} diff --git a/arch/arm/cpu/armv8/exceptions.S b/arch/arm/cpu/armv8/exceptions.S new file mode 100644 index 0000000..b91a1b6 --- /dev/null +++ b/arch/arm/cpu/armv8/exceptions.S @@ -0,0 +1,113 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <version.h> +#include <asm/ptrace.h> +#include <asm/macro.h> +#include <linux/linkage.h> + +/* + * Enter Exception. + * This will save the processor state that is ELR/X0~X30 + * to the stack frame. + */ +.macro exception_entry + stp x29, x30, [sp, #-16]! + stp x27, x28, [sp, #-16]! + stp x25, x26, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x17, x18, [sp, #-16]! + stp x15, x16, [sp, #-16]! + stp x13, x14, [sp, #-16]! + stp x11, x12, [sp, #-16]! + stp x9, x10, [sp, #-16]! + stp x7, x8, [sp, #-16]! + stp x5, x6, [sp, #-16]! + stp x3, x4, [sp, #-16]! + stp x1, x2, [sp, #-16]! + + /* Could be running at EL3/EL2/EL1 */ + switch_el x11, 3f, 2f, 1f +3: mrs x1, esr_el3 + mrs x2, elr_el3 + b 0f +2: mrs x1, esr_el2 + mrs x2, elr_el2 + b 0f +1: mrs x1, esr_el1 + mrs x2, elr_el1 +0: + stp x2, x0, [sp, #-16]! + mov x0, sp +.endm + +/* + * Exception vectors. + */ + .align 11 + .globl vectors +vectors: + .align 7 + b _do_bad_sync /* Current EL Synchronous Thread */ + + .align 7 + b _do_bad_irq /* Current EL IRQ Thread */ + + .align 7 + b _do_bad_fiq /* Current EL FIQ Thread */ + + .align 7 + b _do_bad_error /* Current EL Error Thread */ + + .align 7 + b _do_sync /* Current EL Synchronous Handler */ + + .align 7 + b _do_irq /* Current EL IRQ Handler */ + + .align 7 + b _do_fiq /* Current EL FIQ Handler */ + + .align 7 + b _do_error /* Current EL Error Handler */ + + +_do_bad_sync: + exception_entry + bl do_bad_sync + +_do_bad_irq: + exception_entry + bl do_bad_irq + +_do_bad_fiq: + exception_entry + bl do_bad_fiq + +_do_bad_error: + exception_entry + bl do_bad_error + +_do_sync: + exception_entry + bl do_sync + +_do_irq: + exception_entry + bl do_irq + +_do_fiq: + exception_entry + bl do_fiq + +_do_error: + exception_entry + bl do_error diff --git a/arch/arm/cpu/armv8/generic_timer.c b/arch/arm/cpu/armv8/generic_timer.c new file mode 100644 index 0000000..223b95e --- /dev/null +++ b/arch/arm/cpu/armv8/generic_timer.c @@ -0,0 +1,31 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <command.h> +#include <asm/system.h> + +/* + * Generic timer implementation of get_tbclk() + */ +unsigned long get_tbclk(void) +{ + unsigned long cntfrq; + asm volatile("mrs %0, cntfrq_el0" : "=r" (cntfrq)); + return cntfrq; +} + +/* + * Generic timer implementation of timer_read_counter() + */ +unsigned long timer_read_counter(void) +{ + unsigned long cntpct; + isb(); + asm volatile("mrs %0, cntpct_el0" : "=r" (cntpct)); + return cntpct; +} diff --git a/arch/arm/cpu/armv8/gic.S b/arch/arm/cpu/armv8/gic.S new file mode 100644 index 0000000..599aa8f --- /dev/null +++ b/arch/arm/cpu/armv8/gic.S @@ -0,0 +1,106 @@ +/* + * GIC Initialization Routines. + * + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <linux/linkage.h> +#include <asm/macro.h> +#include <asm/gic.h> + + +/************************************************************************* + * + * void gic_init(void) __attribute__((weak)); + * + * Currently, this routine only initialize secure copy of GIC + * with Security Extensions at EL3. + * + *************************************************************************/ +WEAK(gic_init) + branch_if_slave x0, 2f + + /* Initialize Distributor and SPIs */ + ldr x1, =GICD_BASE + mov w0, #0x3 /* EnableGrp0 | EnableGrp1 */ + str w0, [x1, GICD_CTLR] /* Secure GICD_CTLR */ + ldr w0, [x1, GICD_TYPER] + and w2, w0, #0x1f /* ITLinesNumber */ + cbz w2, 2f /* No SPIs */ + add x1, x1, (GICD_IGROUPRn + 4) + mov w0, #~0 /* Config SPIs as Grp1 */ +1: str w0, [x1], #0x4 + sub w2, w2, #0x1 + cbnz w2, 1b + + /* Initialize SGIs and PPIs */ +2: ldr x1, =GICD_BASE + mov w0, #~0 /* Config SGIs and PPIs as Grp1 */ + str w0, [x1, GICD_IGROUPRn] /* GICD_IGROUPR0 */ + mov w0, #0x1 /* Enable SGI 0 */ + str w0, [x1, GICD_ISENABLERn] + + /* Initialize Cpu Interface */ + ldr x1, =GICC_BASE + mov w0, #0x1e7 /* Disable IRQ/FIQ Bypass & */ + /* Enable Ack Group1 Interrupt & */ + /* EnableGrp0 & EnableGrp1 */ + str w0, [x1, GICC_CTLR] /* Secure GICC_CTLR */ + + mov w0, #0x1 << 7 /* Non-Secure access to GICC_PMR */ + str w0, [x1, GICC_PMR] + + ret +ENDPROC(gic_init) + + +/************************************************************************* + * + * void gic_send_sgi(u64 sgi) __attribute__((weak)); + * + *************************************************************************/ +WEAK(gic_send_sgi) + ldr x1, =GICD_BASE + mov w2, #0x8000 + movk w2, #0x100, lsl #16 + orr w2, w2, w0 + str w2, [x1, GICD_SGIR] + ret +ENDPROC(gic_send_sgi) + + +/************************************************************************* + * + * void wait_for_wakeup(void) __attribute__((weak)); + * + * Wait for SGI 0 from master. + * + *************************************************************************/ +WEAK(wait_for_wakeup) + ldr x1, =GICC_BASE +0: wfi + ldr w0, [x1, GICC_AIAR] + str w0, [x1, GICC_AEOIR] + cbnz w0, 0b + ret +ENDPROC(wait_for_wakeup) + + +/************************************************************************* + * + * void smp_kick_all_cpus(void) __attribute__((weak)); + * + *************************************************************************/ +WEAK(smp_kick_all_cpus) + /* Kick secondary cpus up by SGI 0 interrupt */ + mov x0, xzr /* SGI 0 */ + mov x29, lr /* Save LR */ + bl gic_send_sgi + mov lr, x29 /* Restore LR */ + ret +ENDPROC(smp_kick_all_cpus) diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S new file mode 100644 index 0000000..bcc2603 --- /dev/null +++ b/arch/arm/cpu/armv8/start.S @@ -0,0 +1,164 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <version.h> +#include <linux/linkage.h> +#include <asm/macro.h> +#include <asm/armv8/mmu.h> + +/************************************************************************* + * + * Startup Code (reset vector) + * + *************************************************************************/ + +.globl _start +_start: + b reset + + .align 3 + +.globl _TEXT_BASE +_TEXT_BASE: + .quad CONFIG_SYS_TEXT_BASE + +/* + * These are defined in the linker script. + */ +.globl _end_ofs +_end_ofs: + .quad _end - _start + +.globl _bss_start_ofs +_bss_start_ofs: + .quad __bss_start - _start + +.globl _bss_end_ofs +_bss_end_ofs: + .quad __bss_end - _start + +reset: + /* + * Could be EL3/EL2/EL1, Initial State: + * Little Endian, MMU Disabled, i/dCache Disabled + */ + adr x0, vectors + switch_el x1, 3f, 2f, 1f +3: msr vbar_el3, x0 + msr cptr_el3, xzr /* Enable FP/SIMD */ + ldr x0, =COUNTER_FREQUENCY + msr cntfrq_el0, x0 /* Initialize CNTFRQ */ + b 0f +2: msr vbar_el2, x0 + mov x0, #0x33ff + msr cptr_el2, x0 /* Enable FP/SIMD */ + b 0f +1: msr vbar_el1, x0 + mov x0, #3 << 20 + msr cpacr_el1, x0 /* Enable FP/SIMD */ +0: + + /* Cache/BPB/TLB Invalidate */ + bl __asm_flush_dcache_all /* dCache clean&invalidate */ + bl __asm_invalidate_icache_all /* iCache invalidate */ + bl __asm_invalidate_tlb_all /* invalidate TLBs */ + + /* Processor specific initialization */ + bl lowlevel_init + + branch_if_master x0, x1, master_cpu + + /* + * Slave CPUs + */ +slave_cpu: + wfe + ldr x1, =CPU_RELEASE_ADDR + ldr x0, [x1] + cbz x0, slave_cpu + br x0 /* branch to the given address */ + + /* + * Master CPU + */ +master_cpu: + bl _main + +/*-----------------------------------------------------------------------*/ + +WEAK(lowlevel_init) + /* Initialize GIC Secure Bank Status */ + mov x29, lr /* Save LR */ + bl gic_init + + branch_if_master x0, x1, 1f + + /* + * Slave should wait for master clearing spin table. + * This sync prevent salves observing incorrect + * value of spin table and jumping to wrong place. + */ + bl wait_for_wakeup + + /* + * All processors will enter EL2 and optionally EL1. + */ + bl armv8_switch_to_el2 +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 + bl armv8_switch_to_el1 +#endif + +1: + mov lr, x29 /* Restore LR */ + ret +ENDPROC(lowlevel_init) + +/*-----------------------------------------------------------------------*/ + +ENTRY(c_runtime_cpu_setup) + /* If I-cache is enabled invalidate it */ +#ifndef CONFIG_SYS_ICACHE_OFF + ic iallu /* I+BTB cache invalidate */ + isb sy +#endif + +#ifndef CONFIG_SYS_DCACHE_OFF + /* + * Setup MAIR and TCR. + */ + ldr x0, =MEMORY_ATTRIBUTES + ldr x1, =TCR_FLAGS + + switch_el x2, 3f, 2f, 1f +3: orr x1, x1, TCR_EL3_IPS_BITS + msr mair_el3, x0 + msr tcr_el3, x1 + b 0f +2: orr x1, x1, TCR_EL2_IPS_BITS + msr mair_el2, x0 + msr tcr_el2, x1 + b 0f +1: orr x1, x1, TCR_EL1_IPS_BITS + msr mair_el1, x0 + msr tcr_el1, x1 +0: +#endif + + /* Relocate vBAR */ + adr x0, vectors + switch_el x1, 3f, 2f, 1f +3: msr vbar_el3, x0 + b 0f +2: msr vbar_el2, x0 + b 0f +1: msr vbar_el1, x0 +0: + + ret +ENDPROC(c_runtime_cpu_setup) diff --git a/arch/arm/cpu/armv8/tlb.S b/arch/arm/cpu/armv8/tlb.S new file mode 100644 index 0000000..f840b04 --- /dev/null +++ b/arch/arm/cpu/armv8/tlb.S @@ -0,0 +1,34 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <version.h> +#include <linux/linkage.h> +#include <asm/macro.h> + +/* + * void __asm_invalidate_tlb_all(void) + * + * invalidate all tlb entries. + */ +ENTRY(__asm_invalidate_tlb_all) + switch_el x9, 3f, 2f, 1f +3: tlbi alle3 + dsb sy + isb + b 0f +2: tlbi alle2 + dsb sy + isb + b 0f +1: tlbi vmalle1 + dsb sy + isb +0: + ret +ENDPROC(__asm_invalidate_tlb_all) diff --git a/arch/arm/cpu/armv8/transition.S b/arch/arm/cpu/armv8/transition.S new file mode 100644 index 0000000..e0a5946 --- /dev/null +++ b/arch/arm/cpu/armv8/transition.S @@ -0,0 +1,83 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <version.h> +#include <linux/linkage.h> +#include <asm/macro.h> + +ENTRY(armv8_switch_to_el2) + switch_el x0, 1f, 0f, 0f +0: ret +1: + mov x0, #0x5b1 /* Non-secure EL0/EL1 | HVC | 64bit EL2 */ + msr scr_el3, x0 + msr cptr_el3, xzr /* Disable coprocessor traps to EL3 */ + mov x0, #0x33ff + msr cptr_el2, x0 /* Disable coprocessor traps to EL2 */ + + /* Initialize SCTLR_EL2 */ + msr sctlr_el2, xzr + + /* Return to the EL2_SP2 mode from EL3 */ + mov x0, sp + msr sp_el2, x0 /* Migrate SP */ + mrs x0, vbar_el3 + msr vbar_el2, x0 /* Migrate VBAR */ + mov x0, #0x3c9 + msr spsr_el3, x0 /* EL2_SP2 | D | A | I | F */ + msr elr_el3, lr + eret +ENDPROC(armv8_switch_to_el2) + +ENTRY(armv8_switch_to_el1) + switch_el x0, 0f, 1f, 0f +0: ret +1: + /* Initialize Generic Timers */ + mrs x0, cnthctl_el2 + orr x0, x0, #0x3 /* Enable EL1 access to timers */ + msr cnthctl_el2, x0 + msr cntvoff_el2, x0 + mrs x0, cntkctl_el1 + orr x0, x0, #0x3 /* Enable EL0 access to timers */ + msr cntkctl_el1, x0 + + /* Initilize MPID/MPIDR registers */ + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 + + /* Disable coprocessor traps */ + mov x0, #0x33ff + msr cptr_el2, x0 /* Disable coprocessor traps to EL2 */ + msr hstr_el2, xzr /* Disable coprocessor traps to EL2 */ + mov x0, #3 << 20 + msr cpacr_el1, x0 /* Enable FP/SIMD at EL1 */ + + /* Initialize HCR_EL2 */ + mov x0, #(1 << 31) /* 64bit EL1 */ + orr x0, x0, #(1 << 29) /* Disable HVC */ + msr hcr_el2, x0 + + /* SCTLR_EL1 initialization */ + mov x0, #0x0800 + movk x0, #0x30d0, lsl #16 + msr sctlr_el1, x0 + + /* Return to the EL1_SP1 mode from EL2 */ + mov x0, sp + msr sp_el1, x0 /* Migrate SP */ + mrs x0, vbar_el2 + msr vbar_el1, x0 /* Migrate VBAR */ + mov x0, #0x3c5 + msr spsr_el2, x0 /* EL1_SP1 | D | A | I | F */ + msr elr_el2, lr + eret +ENDPROC(armv8_switch_to_el1) diff --git a/arch/arm/cpu/armv8/u-boot.lds b/arch/arm/cpu/armv8/u-boot.lds new file mode 100644 index 0000000..4c12222 --- /dev/null +++ b/arch/arm/cpu/armv8/u-boot.lds @@ -0,0 +1,89 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * (C) Copyright 2002 + * Gary Jennejohn, DENX Software Engineering, <garyj@denx.de> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64") +OUTPUT_ARCH(aarch64) +ENTRY(_start) +SECTIONS +{ + . = 0x00000000; + + . = ALIGN(8); + .text : + { + *(.__image_copy_start) + CPUDIR/start.o (.text*) + *(.text*) + } + + . = ALIGN(8); + .rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) } + + . = ALIGN(8); + .data : { + *(.data*) + } + + . = ALIGN(8); + + . = .; + + . = ALIGN(8); + .u_boot_list : { + KEEP(*(SORT(.u_boot_list*))); + } + + . = ALIGN(8); + + .image_copy_end : + { + *(.__image_copy_end) + } + + . = ALIGN(8); + + .rel_dyn_start : + { + *(.__rel_dyn_start) + } + + .rela.dyn : { + *(.rela*) + } + + .rel_dyn_end : + { + *(.__rel_dyn_end) + } + + _end = .; + + . = ALIGN(8); + + .bss_start : { + KEEP(*(.__bss_start)); + } + + .bss : { + *(.bss*) + . = ALIGN(8); + } + + .bss_end : { + KEEP(*(.__bss_end)); + } + + /DISCARD/ : { *(.dynsym) } + /DISCARD/ : { *(.dynstr*) } + /DISCARD/ : { *(.dynamic*) } + /DISCARD/ : { *(.plt*) } + /DISCARD/ : { *(.interp*) } + /DISCARD/ : { *(.gnu*) } +} diff --git a/arch/arm/include/asm/armv8/mmu.h b/arch/arm/include/asm/armv8/mmu.h new file mode 100644 index 0000000..1193e76 --- /dev/null +++ b/arch/arm/include/asm/armv8/mmu.h @@ -0,0 +1,111 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef _ASM_ARMV8_MMU_H_ +#define _ASM_ARMV8_MMU_H_ + +#ifdef __ASSEMBLY__ +#define _AC(X, Y) X +#else +#define _AC(X, Y) (X##Y) +#endif + +#define UL(x) _AC(x, UL) + +/***************************************************************/ +/* + * The following definitions are related each other, shoud be + * calculated specifically. + */ +#define VA_BITS (42) /* 42 bits virtual address */ + +/* PAGE_SHIFT determines the page size */ +#undef PAGE_SIZE +#define PAGE_SHIFT 16 +#define PAGE_SIZE (1 << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* + * section address mask and size definitions. + */ +#define SECTION_SHIFT 29 +#define SECTION_SIZE (UL(1) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) +/***************************************************************/ + +/* + * Memory types + */ +#define MT_DEVICE_NGNRNE 0 +#define MT_DEVICE_NGNRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 + +#define MEMORY_ATTRIBUTES ((0x00 << (MT_DEVICE_NGNRNE*8)) | \ + (0x04 << (MT_DEVICE_NGNRE*8)) | \ + (0x0c << (MT_DEVICE_GRE*8)) | \ + (0x44 << (MT_NORMAL_NC*8)) | \ + (UL(0xff) << (MT_NORMAL*8))) + +/* + * Hardware page table definitions. + * + * Level 2 descriptor (PMD). + */ +#define PMD_TYPE_MASK (3 << 0) +#define PMD_TYPE_FAULT (0 << 0) +#define PMD_TYPE_TABLE (3 << 0) +#define PMD_TYPE_SECT (1 << 0) + +/* + * Section + */ +#define PMD_SECT_S (3 << 8) +#define PMD_SECT_AF (1 << 10) +#define PMD_SECT_NG (1 << 11) +#define PMD_SECT_PXN (UL(1) << 53) +#define PMD_SECT_UXN (UL(1) << 54) + +/* + * AttrIndx[2:0] + */ +#define PMD_ATTRINDX(t) ((t) << 2) +#define PMD_ATTRINDX_MASK (7 << 2) + +/* + * TCR flags. + */ +#define TCR_T0SZ(x) ((64 - (x)) << 0) +#define TCR_IRGN_NC (0 << 8) +#define TCR_IRGN_WBWA (1 << 8) +#define TCR_IRGN_WT (2 << 8) +#define TCR_IRGN_WBNWA (3 << 8) +#define TCR_IRGN_MASK (3 << 8) +#define TCR_ORGN_NC (0 << 10) +#define TCR_ORGN_WBWA (1 << 10) +#define TCR_ORGN_WT (2 << 10) +#define TCR_ORGN_WBNWA (3 << 10) +#define TCR_ORGN_MASK (3 << 10) +#define TCR_SHARED_NON (0 << 12) +#define TCR_SHARED_OUTER (1 << 12) +#define TCR_SHARED_INNER (2 << 12) +#define TCR_TG0_4K (0 << 14) +#define TCR_TG0_64K (1 << 14) +#define TCR_TG0_16K (2 << 14) +#define TCR_EL1_IPS_BITS (UL(3) << 32) /* 42 bits physical address */ +#define TCR_EL2_IPS_BITS (3 << 16) /* 42 bits physical address */ +#define TCR_EL3_IPS_BITS (3 << 16) /* 42 bits physical address */ + +/* PTWs cacheable, inner/outer WBWA and non-shareable */ +#define TCR_FLAGS (TCR_TG0_64K | \ + TCR_SHARED_NON | \ + TCR_ORGN_WBWA | \ + TCR_IRGN_WBWA | \ + TCR_T0SZ(VA_BITS)) + +#endif /* _ASM_ARMV8_MMU_H_ */ diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h index c3489f1..71a9966 100644 --- a/arch/arm/include/asm/byteorder.h +++ b/arch/arm/include/asm/byteorder.h @@ -23,10 +23,22 @@ # define __SWAB_64_THRU_32__ #endif +#ifdef CONFIG_ARM64 + +#ifdef __AARCH64EB__ +#include <linux/byteorder/big_endian.h> +#else +#include <linux/byteorder/little_endian.h> +#endif + +#else /* CONFIG_ARM64 */ + #ifdef __ARMEB__ #include <linux/byteorder/big_endian.h> #else #include <linux/byteorder/little_endian.h> #endif +#endif /* CONFIG_ARM64 */ + #endif diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h index 6d60a4a..ddebbc8 100644 --- a/arch/arm/include/asm/cache.h +++ b/arch/arm/include/asm/cache.h @@ -11,6 +11,8 @@ #include <asm/system.h> +#ifndef CONFIG_ARM64 + /* * Invalidate L2 Cache using co-proc instruction */ @@ -28,6 +30,9 @@ void l2_cache_disable(void); void set_section_dcache(int section, enum dcache_option option); void dram_bank_mmu_setup(int bank); + +#endif + /* * The current upper bound for ARM L1 data cache line sizes is 64 bytes. We * use that value for aligning DMA buffers unless the board config has specified diff --git a/arch/arm/include/asm/config.h b/arch/arm/include/asm/config.h index 99b703e..abf79e5 100644 --- a/arch/arm/include/asm/config.h +++ b/arch/arm/include/asm/config.h @@ -9,4 +9,10 @@ #define CONFIG_LMB #define CONFIG_SYS_BOOT_RAMDISK_HIGH + +#ifdef CONFIG_ARM64 +#define CONFIG_PHYS_64BIT +#define CONFIG_STATIC_RELA +#endif + #endif diff --git a/arch/arm/include/asm/gic.h b/arch/arm/include/asm/gic.h index a0891cc..ac2b2bf 100644 --- a/arch/arm/include/asm/gic.h +++ b/arch/arm/include/asm/gic.h @@ -1,19 +1,54 @@ -#ifndef __GIC_V2_H__ -#define __GIC_V2_H__ +#ifndef __GIC_H__ +#define __GIC_H__ -/* register offsets for the ARM generic interrupt controller (GIC) */ +/* Register offsets for the ARM generic interrupt controller (GIC) */ #define GIC_DIST_OFFSET 0x1000 +#define GIC_CPU_OFFSET_A9 0x0100 +#define GIC_CPU_OFFSET_A15 0x2000 + +/* Distributor Registers */ #define GICD_CTLR 0x0000 #define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_SEIR 0x0068 #define GICD_IGROUPRn 0x0080 -#define GICD_SGIR 0x0F00 +#define GICD_ISENABLERn 0x0100 +#define GICD_ICENABLERn 0x0180 +#define GICD_ISPENDRn 0x0200 +#define GICD_ICPENDRn 0x0280 +#define GICD_ISACTIVERn 0x0300 +#define GICD_ICACTIVERn 0x0380 +#define GICD_IPRIORITYRn 0x0400 +#define GICD_ITARGETSRn 0x0800 +#define GICD_ICFGR 0x0c00 +#define GICD_IGROUPMODRn 0x0d00 +#define GICD_NSACRn 0x0e00 +#define GICD_SGIR 0x0f00 +#define GICD_CPENDSGIRn 0x0f10 +#define GICD_SPENDSGIRn 0x0f20 +#define GICD_IROUTERn 0x6000 -#define GIC_CPU_OFFSET_A9 0x0100 -#define GIC_CPU_OFFSET_A15 0x2000 +/* Cpu Interface Memory Mapped Registers */ #define GICC_CTLR 0x0000 #define GICC_PMR 0x0004 +#define GICC_BPR 0x0008 #define GICC_IAR 0x000C #define GICC_EOIR 0x0010 +#define GICC_RPR 0x0014 +#define GICC_HPPIR 0x0018 +#define GICC_ABPR 0x001c +#define GICC_AIAR 0x0020 +#define GICC_AEOIR 0x0024 +#define GICC_AHPPIR 0x0028 +#define GICC_APRn 0x00d0 +#define GICC_NSAPRn 0x00e0 +#define GICC_IIDR 0x00fc +#define GICC_DIR 0x1000 -#endif +#endif /* __GIC_H__ */ diff --git a/arch/arm/include/asm/global_data.h b/arch/arm/include/asm/global_data.h index e126436..60e8726 100644 --- a/arch/arm/include/asm/global_data.h +++ b/arch/arm/include/asm/global_data.h @@ -47,6 +47,10 @@ struct arch_global_data { #include <asm-generic/global_data.h> -#define DECLARE_GLOBAL_DATA_PTR register volatile gd_t *gd asm ("r9") +#ifdef CONFIG_ARM64 +#define DECLARE_GLOBAL_DATA_PTR register volatile gd_t *gd asm ("x18") +#else +#define DECLARE_GLOBAL_DATA_PTR register volatile gd_t *gd asm ("r9") +#endif #endif /* __ASM_GBL_DATA_H */ diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1fbc531..6a1f05a 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -75,42 +75,45 @@ static inline phys_addr_t virt_to_phys(void * vaddr) #define __arch_putw(v,a) (*(volatile unsigned short *)(a) = (v)) #define __arch_putl(v,a) (*(volatile unsigned int *)(a) = (v)) -extern inline void __raw_writesb(unsigned int addr, const void *data, int bytelen) +extern inline void __raw_writesb(unsigned long addr, const void *data, + int bytelen) { uint8_t *buf = (uint8_t *)data; while(bytelen--) __arch_putb(*buf++, addr); } -extern inline void __raw_writesw(unsigned int addr, const void *data, int wordlen) +extern inline void __raw_writesw(unsigned long addr, const void *data, + int wordlen) { uint16_t *buf = (uint16_t *)data; while(wordlen--) __arch_putw(*buf++, addr); } -extern inline void __raw_writesl(unsigned int addr, const void *data, int longlen) +extern inline void __raw_writesl(unsigned long addr, const void *data, + int longlen) { uint32_t *buf = (uint32_t *)data; while(longlen--) __arch_putl(*buf++, addr); } -extern inline void __raw_readsb(unsigned int addr, void *data, int bytelen) +extern inline void __raw_readsb(unsigned long addr, void *data, int bytelen) { uint8_t *buf = (uint8_t *)data; while(bytelen--) *buf++ = __arch_getb(addr); } -extern inline void __raw_readsw(unsigned int addr, void *data, int wordlen) +extern inline void __raw_readsw(unsigned long addr, void *data, int wordlen) { uint16_t *buf = (uint16_t *)data; while(wordlen--) *buf++ = __arch_getw(addr); } -extern inline void __raw_readsl(unsigned int addr, void *data, int longlen) +extern inline void __raw_readsl(unsigned long addr, void *data, int longlen) { uint32_t *buf = (uint32_t *)data; while(longlen--) diff --git a/arch/arm/include/asm/macro.h b/arch/arm/include/asm/macro.h index ff13f36..f77e4b8 100644 --- a/arch/arm/include/asm/macro.h +++ b/arch/arm/include/asm/macro.h @@ -54,5 +54,58 @@ bcs 1b .endm +#ifdef CONFIG_ARM64 +/* + * Register aliases. + */ +lr .req x30 + +/* + * Branch according to exception level + */ +.macro switch_el, xreg, el3_label, el2_label, el1_label + mrs \xreg, CurrentEL + cmp \xreg, 0xc + b.eq \el3_label + cmp \xreg, 0x8 + b.eq \el2_label + cmp \xreg, 0x4 + b.eq \el1_label +.endm + +/* + * Branch if current processor is a slave, + * choose processor with all zero affinity value as the master. + */ +.macro branch_if_slave, xreg, slave_label + mrs \xreg, mpidr_el1 + tst \xreg, #0xff /* Test Affinity 0 */ + b.ne \slave_label + lsr \xreg, \xreg, #8 + tst \xreg, #0xff /* Test Affinity 1 */ + b.ne \slave_label + lsr \xreg, \xreg, #8 + tst \xreg, #0xff /* Test Affinity 2 */ + b.ne \slave_label + lsr \xreg, \xreg, #16 + tst \xreg, #0xff /* Test Affinity 3 */ + b.ne \slave_label +.endm + +/* + * Branch if current processor is a master, + * choose processor with all zero affinity value as the master. + */ +.macro branch_if_master, xreg1, xreg2, master_label + mrs \xreg1, mpidr_el1 + lsr \xreg2, \xreg1, #32 + lsl \xreg1, \xreg1, #40 + lsr \xreg1, \xreg1, #40 + orr \xreg1, \xreg1, \xreg2 + cbz \xreg1, \master_label +.endm + +#endif /* CONFIG_ARM64 */ + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ARM_MACRO_H__ */ diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h index c412486..9ba9add 100644 --- a/arch/arm/include/asm/posix_types.h +++ b/arch/arm/include/asm/posix_types.h @@ -13,6 +13,8 @@ #ifndef __ARCH_ARM_POSIX_TYPES_H #define __ARCH_ARM_POSIX_TYPES_H +#include <config.h> + /* * This file is generally used by user-level software, so you need to * be a little careful about namespace pollution etc. Also, we cannot @@ -28,9 +30,17 @@ typedef int __kernel_pid_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; + +#ifdef CONFIG_ARM64 +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +#else /* CONFIG_ARM64 */ typedef unsigned int __kernel_size_t; typedef int __kernel_ssize_t; typedef int __kernel_ptrdiff_t; +#endif /* CONFIG_ARM64 */ + typedef long __kernel_time_t; typedef long __kernel_suseconds_t; typedef long __kernel_clock_t; diff --git a/arch/arm/include/asm/proc-armv/ptrace.h b/arch/arm/include/asm/proc-armv/ptrace.h index a060ee6..21aef58 100644 --- a/arch/arm/include/asm/proc-armv/ptrace.h +++ b/arch/arm/include/asm/proc-armv/ptrace.h @@ -10,6 +10,25 @@ #ifndef __ASM_PROC_PTRACE_H #define __ASM_PROC_PTRACE_H +#ifdef CONFIG_ARM64 + +#define PCMASK 0 + +#ifndef __ASSEMBLY__ + +/* + * This struct defines the way the registers are stored + * on the stack during an exception. + */ +struct pt_regs { + unsigned long elr; + unsigned long regs[31]; +}; + +#endif /* __ASSEMBLY__ */ + +#else /* CONFIG_ARM64 */ + #define USR26_MODE 0x00 #define FIQ26_MODE 0x01 #define IRQ26_MODE 0x02 @@ -104,4 +123,6 @@ static inline int valid_user_regs(struct pt_regs *regs) #endif /* __ASSEMBLY__ */ +#endif /* CONFIG_ARM64 */ + #endif diff --git a/arch/arm/include/asm/proc-armv/system.h b/arch/arm/include/asm/proc-armv/system.h index cda8976..693d1f4 100644 --- a/arch/arm/include/asm/proc-armv/system.h +++ b/arch/arm/include/asm/proc-armv/system.h @@ -13,6 +13,60 @@ /* * Save the current interrupt enable state & disable IRQs */ +#ifdef CONFIG_ARM64 + +/* + * Save the current interrupt enable state + * and disable IRQs/FIQs + */ +#define local_irq_save(flags) \ + ({ \ + asm volatile( \ + "mrs %0, daif" \ + "msr daifset, #3" \ + : "=r" (flags) \ + : \ + : "memory"); \ + }) + +/* + * restore saved IRQ & FIQ state + */ +#define local_irq_restore(flags) \ + ({ \ + asm volatile( \ + "msr daif, %0" \ + : \ + : "r" (flags) \ + : "memory"); \ + }) + +/* + * Enable IRQs/FIQs + */ +#define local_irq_enable() \ + ({ \ + asm volatile( \ + "msr daifclr, #3" \ + : \ + : \ + : "memory"); \ + }) + +/* + * Disable IRQs/FIQs + */ +#define local_irq_disable() \ + ({ \ + asm volatile( \ + "msr daifset, #3" \ + : \ + : \ + : "memory"); \ + }) + +#else /* CONFIG_ARM64 */ + #define local_irq_save(x) \ ({ \ unsigned long temp; \ @@ -107,7 +161,10 @@ : "r" (x) \ : "memory") -#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) +#endif /* CONFIG_ARM64 */ + +#if defined(CONFIG_CPU_SA1100) || defined(CONFIG_CPU_SA110) || \ + defined(CONFIG_ARM64) /* * On the StrongARM, "swp" is terminally broken since it bypasses the * cache totally. This means that the cache becomes inconsistent, and, diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 760345f..4178f8c 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -1,6 +1,86 @@ #ifndef __ASM_ARM_SYSTEM_H #define __ASM_ARM_SYSTEM_H +#ifdef CONFIG_ARM64 + +/* + * SCTLR_EL1/SCTLR_EL2/SCTLR_EL3 bits definitions + */ +#define CR_M (1 << 0) /* MMU enable */ +#define CR_A (1 << 1) /* Alignment abort enable */ +#define CR_C (1 << 2) /* Dcache enable */ +#define CR_SA (1 << 3) /* Stack Alignment Check Enable */ +#define CR_I (1 << 12) /* Icache enable */ +#define CR_WXN (1 << 19) /* Write Permision Imply XN */ +#define CR_EE (1 << 25) /* Exception (Big) Endian */ + +#define PGTABLE_SIZE (0x10000) + +#ifndef __ASSEMBLY__ + +#define isb() \ + ({asm volatile( \ + "isb" : : : "memory"); \ + }) + +#define wfi() \ + ({asm volatile( \ + "wfi" : : : "memory"); \ + }) + +static inline unsigned int current_el(void) +{ + unsigned int el; + asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc"); + return el >> 2; +} + +static inline unsigned int get_sctlr(void) +{ + unsigned int el, val; + + el = current_el(); + if (el == 1) + asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc"); + else if (el == 2) + asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc"); + else + asm volatile("mrs %0, sctlr_el3" : "=r" (val) : : "cc"); + + return val; +} + +static inline void set_sctlr(unsigned int val) +{ + unsigned int el; + + el = current_el(); + if (el == 1) + asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc"); + else if (el == 2) + asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc"); + else + asm volatile("msr sctlr_el3, %0" : : "r" (val) : "cc"); + + asm volatile("isb"); +} + +void __asm_flush_dcache_all(void); +void __asm_flush_dcache_range(u64 start, u64 end); +void __asm_invalidate_tlb_all(void); +void __asm_invalidate_icache_all(void); + +void armv8_switch_to_el2(void); +void armv8_switch_to_el1(void); +void gic_init(void); +void gic_send_sgi(unsigned long sgino); +void wait_for_wakeup(void); +void smp_kick_all_cpus(void); + +#endif /* __ASSEMBLY__ */ + +#else /* CONFIG_ARM64 */ + #ifdef __KERNEL__ #define CPU_ARCH_UNKNOWN 0 @@ -45,6 +125,8 @@ #define CR_AFE (1 << 29) /* Access flag enable */ #define CR_TE (1 << 30) /* Thumb exception enable */ +#define PGTABLE_SIZE (4096 * 4) + /* * This is used to ensure the compiler did actually allocate the register we * asked it for some inline assembly sequences. Apparently we can't trust @@ -132,4 +214,6 @@ void mmu_page_table_flush(unsigned long start, unsigned long stop); #endif /* __KERNEL__ */ +#endif /* CONFIG_ARM64 */ + #endif diff --git a/arch/arm/include/asm/types.h b/arch/arm/include/asm/types.h index 71dc049..2326420 100644 --- a/arch/arm/include/asm/types.h +++ b/arch/arm/include/asm/types.h @@ -39,7 +39,11 @@ typedef unsigned int u32; typedef signed long long s64; typedef unsigned long long u64; +#ifdef CONFIG_ARM64 +#define BITS_PER_LONG 64 +#else /* CONFIG_ARM64 */ #define BITS_PER_LONG 32 +#endif /* CONFIG_ARM64 */ /* Dma addresses are 32-bits wide. */ diff --git a/arch/arm/include/asm/u-boot.h b/arch/arm/include/asm/u-boot.h index 2b5fce8..cb81232 100644 --- a/arch/arm/include/asm/u-boot.h +++ b/arch/arm/include/asm/u-boot.h @@ -44,6 +44,10 @@ typedef struct bd_info { #endif /* !CONFIG_SYS_GENERIC_BOARD */ /* For image.h:image_check_target_arch() */ +#ifndef CONFIG_ARM64 #define IH_ARCH_DEFAULT IH_ARCH_ARM +#else +#define IH_ARCH_DEFAULT IH_ARCH_ARM64 +#endif #endif /* _U_BOOT_H_ */ diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h index 44593a8..0a228fb 100644 --- a/arch/arm/include/asm/unaligned.h +++ b/arch/arm/include/asm/unaligned.h @@ -8,7 +8,7 @@ /* * Select endianness */ -#ifndef __ARMEB__ +#if __BYTE_ORDER == __LITTLE_ENDIAN #define get_unaligned __get_unaligned_le #define put_unaligned __put_unaligned_le #else diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 679f19a..321997c 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -17,14 +17,22 @@ lib-y += _umodsi3.o lib-y += div0.o endif -obj-y += crt0.o +ifdef CONFIG_ARM64 +obj-y += crt0_64.o +else +obj-y += crt0.o +endif ifndef CONFIG_SPL_BUILD -obj-y += relocate.o +ifdef CONFIG_ARM64 +obj-y += relocate_64.o +else +obj-y += relocate.o +endif ifndef CONFIG_SYS_GENERIC_BOARD obj-y += board.o endif -obj-y += sections.o +obj-y += sections.o obj-$(CONFIG_OF_LIBFDT) += bootm-fdt.o obj-$(CONFIG_CMD_BOOTM) += bootm.o @@ -35,11 +43,17 @@ else obj-$(CONFIG_SPL_FRAMEWORK) += spl.o endif +ifdef CONFIG_ARM64 +obj-y += interrupts_64.o +else obj-y += interrupts.o +endif obj-y += reset.o obj-y += cache.o +ifndef CONFIG_ARM64 obj-y += cache-cp15.o +endif # For EABI conformant tool chains, provide eabi_compat() ifneq (,$(findstring -mabi=aapcs-linux,$(PLATFORM_CPPFLAGS))) diff --git a/arch/arm/lib/board.c b/arch/arm/lib/board.c index 9c72a53..b770e25 100644 --- a/arch/arm/lib/board.c +++ b/arch/arm/lib/board.c @@ -344,7 +344,7 @@ void board_init_f(ulong bootflag) #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) /* reserve TLB table */ - gd->arch.tlb_size = 4096 * 4; + gd->arch.tlb_size = PGTABLE_SIZE; addr -= gd->arch.tlb_size; /* round down to next 64 kB limit */ @@ -419,6 +419,7 @@ void board_init_f(ulong bootflag) } #endif +#ifndef CONFIG_ARM64 /* setup stackpointer for exeptions */ gd->irq_sp = addr_sp; #ifdef CONFIG_USE_IRQ @@ -431,6 +432,10 @@ void board_init_f(ulong bootflag) /* 8-byte alignment for ABI compliance */ addr_sp &= ~0x07; +#else /* CONFIG_ARM64 */ + /* 16-byte alignment for ABI compliance */ + addr_sp &= ~0x0f; +#endif /* CONFIG_ARM64 */ #else addr_sp += 128; /* leave 32 words for abort-stack */ gd->irq_sp = addr_sp; diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c index f476a89..77f1a5c 100644 --- a/arch/arm/lib/bootm.c +++ b/arch/arm/lib/bootm.c @@ -196,6 +196,14 @@ static void do_nonsec_virt_switch(void) debug("entered non-secure state\n"); #endif #endif + +#ifdef CONFIG_ARM64 + smp_kick_all_cpus(); + armv8_switch_to_el2(); +#ifdef CONFIG_ARMV8_SWITCH_TO_EL1 + armv8_switch_to_el1(); +#endif +#endif } /* Subcommand: PREP */ @@ -240,6 +248,21 @@ static void boot_prep_linux(bootm_headers_t *images) /* Subcommand: GO */ static void boot_jump_linux(bootm_headers_t *images, int flag) { +#ifdef CONFIG_ARM64 + void (*kernel_entry)(void *fdt_addr); + int fake = (flag & BOOTM_STATE_OS_FAKE_GO); + + kernel_entry = (void (*)(void *fdt_addr))images->ep; + + debug("## Transferring control to Linux (at address %lx)...\n", + (ulong) kernel_entry); + bootstage_mark(BOOTSTAGE_ID_RUN_OS); + + announce_and_cleanup(fake); + + if (!fake) + kernel_entry(images->ft_addr); +#else unsigned long machid = gd->bd->bi_arch_number; char *s; void (*kernel_entry)(int zero, int arch, uint params); @@ -266,6 +289,7 @@ static void boot_jump_linux(bootm_headers_t *images, int flag) if (!fake) kernel_entry(0, machid, r2); +#endif } /* Main Entry point for arm bootm implementation diff --git a/arch/arm/lib/crt0_64.S b/arch/arm/lib/crt0_64.S new file mode 100644 index 0000000..7756396 --- /dev/null +++ b/arch/arm/lib/crt0_64.S @@ -0,0 +1,113 @@ +/* + * crt0 - C-runtime startup Code for AArch64 U-Boot + * + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * (C) Copyright 2012 + * Albert ARIBAUD <albert.u.boot@aribaud.net> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <config.h> +#include <asm-offsets.h> +#include <asm/macro.h> +#include <linux/linkage.h> + +/* + * This file handles the target-independent stages of the U-Boot + * start-up where a C runtime environment is needed. Its entry point + * is _main and is branched into from the target's start.S file. + * + * _main execution sequence is: + * + * 1. Set up initial environment for calling board_init_f(). + * This environment only provides a stack and a place to store + * the GD ('global data') structure, both located in some readily + * available RAM (SRAM, locked cache...). In this context, VARIABLE + * global data, initialized or not (BSS), are UNAVAILABLE; only + * CONSTANT initialized data are available. + * + * 2. Call board_init_f(). This function prepares the hardware for + * execution from system RAM (DRAM, DDR...) As system RAM may not + * be available yet, , board_init_f() must use the current GD to + * store any data which must be passed on to later stages. These + * data include the relocation destination, the future stack, and + * the future GD location. + * + * (the following applies only to non-SPL builds) + * + * 3. Set up intermediate environment where the stack and GD are the + * ones allocated by board_init_f() in system RAM, but BSS and + * initialized non-const data are still not available. + * + * 4. Call relocate_code(). This function relocates U-Boot from its + * current location into the relocation destination computed by + * board_init_f(). + * + * 5. Set up final environment for calling board_init_r(). This + * environment has BSS (initialized to 0), initialized non-const + * data (initialized to their intended value), and stack in system + * RAM. GD has retained values set by board_init_f(). Some CPUs + * have some work left to do at this point regarding memory, so + * call c_runtime_cpu_setup. + * + * 6. Branch to board_init_r(). + */ + +ENTRY(_main) + +/* + * Set up initial C runtime environment and call board_init_f(0). + */ + ldr x0, =(CONFIG_SYS_INIT_SP_ADDR) + sub x0, x0, #GD_SIZE /* allocate one GD above SP */ + bic sp, x0, #0xf /* 16-byte alignment for ABI compliance */ + mov x18, sp /* GD is above SP */ + mov x0, #0 + bl board_init_f + +/* + * Set up intermediate environment (new sp and gd) and call + * relocate_code(addr_moni). Trick here is that we'll return + * 'here' but relocated. + */ + ldr x0, [x18, #GD_START_ADDR_SP] /* x0 <- gd->start_addr_sp */ + bic sp, x0, #0xf /* 16-byte alignment for ABI compliance */ + ldr x18, [x18, #GD_BD] /* x18 <- gd->bd */ + sub x18, x18, #GD_SIZE /* new GD is below bd */ + + adr lr, relocation_return + ldr x9, [x18, #GD_RELOC_OFF] /* x9 <- gd->reloc_off */ + add lr, lr, x9 /* new return address after relocation */ + ldr x0, [x18, #GD_RELOCADDR] /* x0 <- gd->relocaddr */ + b relocate_code + +relocation_return: + +/* + * Set up final (full) environment + */ + bl c_runtime_cpu_setup /* still call old routine */ + +/* + * Clear BSS section + */ + ldr x0, =__bss_start /* this is auto-relocated! */ + ldr x1, =__bss_end /* this is auto-relocated! */ + mov x2, #0 +clear_loop: + str x2, [x0] + add x0, x0, #8 + cmp x0, x1 + b.lo clear_loop + + /* call board_init_r(gd_t *id, ulong dest_addr) */ + mov x0, x18 /* gd_t */ + ldr x1, [x18, #GD_RELOCADDR] /* dest_addr */ + b board_init_r /* PC relative jump */ + + /* NOTREACHED - board_init_r() does not return */ + +ENDPROC(_main) diff --git a/arch/arm/lib/interrupts_64.c b/arch/arm/lib/interrupts_64.c new file mode 100644 index 0000000..b476722 --- /dev/null +++ b/arch/arm/lib/interrupts_64.c @@ -0,0 +1,120 @@ +/* + * (C) Copyright 2013 + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <common.h> +#include <linux/compiler.h> + + +int interrupt_init(void) +{ + return 0; +} + +void enable_interrupts(void) +{ + return; +} + +int disable_interrupts(void) +{ + return 0; +} + +void show_regs(struct pt_regs *regs) +{ + int i; + + printf("ELR: %lx\n", regs->elr); + printf("LR: %lx\n", regs->regs[30]); + for (i = 0; i < 29; i += 2) + printf("x%-2d: %016lx x%-2d: %016lx\n", + i, regs->regs[i], i+1, regs->regs[i+1]); + printf("\n"); +} + +/* + * do_bad_sync handles the impossible case in the Synchronous Abort vector. + */ +void do_bad_sync(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("Bad mode in \"Synchronous Abort\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_bad_irq handles the impossible case in the Irq vector. + */ +void do_bad_irq(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("Bad mode in \"Irq\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_bad_fiq handles the impossible case in the Fiq vector. + */ +void do_bad_fiq(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("Bad mode in \"Fiq\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_bad_error handles the impossible case in the Error vector. + */ +void do_bad_error(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("Bad mode in \"Error\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_sync handles the Synchronous Abort exception. + */ +void do_sync(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("\"Synchronous Abort\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_irq handles the Irq exception. + */ +void do_irq(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("\"Irq\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_fiq handles the Fiq exception. + */ +void do_fiq(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("\"Fiq\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} + +/* + * do_error handles the Error exception. + * Errors are more likely to be processor specific, + * it is defined with weak attribute and can be redefined + * in processor specific code. + */ +void __weak do_error(struct pt_regs *pt_regs, unsigned int esr) +{ + printf("\"Error\" handler, esr 0x%08x\n", esr); + show_regs(pt_regs); + panic("Resetting CPU ...\n"); +} diff --git a/arch/arm/lib/relocate_64.S b/arch/arm/lib/relocate_64.S new file mode 100644 index 0000000..7fba9e2 --- /dev/null +++ b/arch/arm/lib/relocate_64.S @@ -0,0 +1,58 @@ +/* + * relocate - common relocation function for AArch64 U-Boot + * + * (C) Copyright 2013 + * Albert ARIBAUD <albert.u.boot@aribaud.net> + * David Feng <fenghua@phytium.com.cn> + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include <asm-offsets.h> +#include <config.h> +#include <linux/linkage.h> + +/* + * void relocate_code (addr_moni) + * + * This function relocates the monitor code. + * x0 holds the destination address. + */ +ENTRY(relocate_code) + /* + * Copy u-boot from flash to RAM + */ + ldr x1, =__image_copy_start /* x1 <- SRC &__image_copy_start */ + subs x9, x0, x1 /* x9 <- relocation offset */ + b.eq relocate_done /* skip relocation */ + ldr x2, =__image_copy_end /* x2 <- SRC &__image_copy_end */ + +copy_loop: + ldp x10, x11, [x1], #16 /* copy from source address [x1] */ + stp x10, x11, [x0], #16 /* copy to target address [x0] */ + cmp x1, x2 /* until source end address [x2] */ + b.lo copy_loop + + /* + * Fix .rela.dyn relocations + */ + ldr x2, =__rel_dyn_start /* x2 <- SRC &__rel_dyn_start */ + ldr x3, =__rel_dyn_end /* x3 <- SRC &__rel_dyn_end */ +fixloop: + ldp x0, x1, [x2], #16 /* (x0,x1) <- (SRC location, fixup) */ + ldr x4, [x2], #8 /* x4 <- addend */ + and x1, x1, #0xffffffff + cmp x1, #1027 /* relative fixup? */ + bne fixnext + + /* relative fix: store addend plus offset at dest location */ + add x0, x0, x9 + add x4, x4, x9 + str x4, [x0] +fixnext: + cmp x2, x3 + b.lo fixloop + +relocate_done: + ret +ENDPROC(relocate_code) diff --git a/common/image.c b/common/image.c index b0ae58f..4145354 100644 --- a/common/image.c +++ b/common/image.c @@ -81,6 +81,7 @@ static const table_entry_t uimage_arch[] = { { IH_ARCH_NDS32, "nds32", "NDS32", }, { IH_ARCH_OPENRISC, "or1k", "OpenRISC 1000",}, { IH_ARCH_SANDBOX, "sandbox", "Sandbox", }, + { IH_ARCH_ARM64, "arm64", "AArch64", }, { -1, "", "", }, }; diff --git a/doc/README.arm64 b/doc/README.arm64 new file mode 100644 index 0000000..75586db --- /dev/null +++ b/doc/README.arm64 @@ -0,0 +1,46 @@ +U-boot for arm64 + +Summary +======= +No hardware platform of arm64 is available now. The u-boot is +simulated on Foundation Model and Fast Model for ARMv8. + +Notes +===== + +1. Currenly, u-boot run at the highest exception level processor + supported and jump to EL2 or optionally EL1 before enter OS. + +2. U-boot for arm64 is compiled with AArch64-gcc. AArch64-gcc + use rela relocation format, a tool(tools/relocate-rela) by Scott Wood + is used to encode the initial addend of rela to u-boot.bin. After running, + the u-boot will be relocated to destination again. + +3. Fdt should be placed at a 2-megabyte boundary and within the first 512 + megabytes from the start of the kernel image. So, fdt_high should be + defined specially. + Please reference linux/Documentation/arm64/booting.txt for detail. + +4. Spin-table is used to wake up secondary processors. One location + (or per processor location) is defined to hold the kernel entry point + for secondary processors. It must be ensured that the location is + accessible and zero immediately after secondary processor + enter slave_cpu branch execution in start.S. The location address + is encoded in cpu node of DTS. Linux kernel store the entry point + of secondary processors to it and send event to wakeup secondary + processors. + Please reference linux/Documentation/arm64/booting.txt for detail. + +5. Generic board is supported. + +6. CONFIG_ARM64 instead of CONFIG_ARMV8 is used to distinguish aarch64 and + aarch32 specific codes. + +Contributor +=========== + Tom Rini <trini@ti.com> + Scott Wood <scottwood@freescale.com> + York Sun <yorksun@freescale.com> + Simon Glass <sjg@chromium.org> + Sharma Bhupesh <bhupesh.sharma@freescale.com> + Rob Herring <robherring2@gmail.com> diff --git a/examples/standalone/stubs.c b/examples/standalone/stubs.c index 5d2ab56..32a19ce 100644 --- a/examples/standalone/stubs.c +++ b/examples/standalone/stubs.c @@ -39,6 +39,20 @@ gd_t *global_data; " bctr\n" \ : : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "r11"); #elif defined(CONFIG_ARM) +#ifdef CONFIG_ARM64 +/* + * x18 holds the pointer to the global_data, x9 is a call-clobbered + * register + */ +#define EXPORT_FUNC(x) \ + asm volatile ( \ +" .globl " #x "\n" \ +#x ":\n" \ +" ldr x9, [x18, %0]\n" \ +" ldr x9, [x9, %1]\n" \ +" br x9\n" \ + : : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "x9"); +#else /* * r9 holds the pointer to the global_data, ip is a call-clobbered * register @@ -50,6 +64,7 @@ gd_t *global_data; " ldr ip, [r9, %0]\n" \ " ldr pc, [ip, %1]\n" \ : : "i"(offsetof(gd_t, jt)), "i"(XF_ ## x * sizeof(void *)) : "ip"); +#endif #elif defined(CONFIG_MIPS) /* * k0 ($26) holds the pointer to the global_data; t9 ($25) is a call- diff --git a/include/image.h b/include/image.h index ee6eb8d..7de2bb2 100644 --- a/include/image.h +++ b/include/image.h @@ -156,6 +156,7 @@ struct lmb; #define IH_ARCH_SANDBOX 19 /* Sandbox architecture (test only) */ #define IH_ARCH_NDS32 20 /* ANDES Technology - NDS32 */ #define IH_ARCH_OPENRISC 21 /* OpenRISC 1000 */ +#define IH_ARCH_ARM64 22 /* ARM64 */ /* * Image Types |