It was observed that decompressor running on hardware implementing ARM v8.2 Load/Store Multiple Atomicity and Ordering Control (LSMAOC), say, as guest, would stuck just after: Uncompressing Linux... done, booting the kernel. The reason is that it clears nTLSMD bit when disabling caches: nTLSMD, bit [3] When ARMv8.2-LSMAOC is implemented: No Trap Load Multiple and Store Multiple to Device-nGRE/Device-nGnRE/Device-nGnRnE memory. 0b0 All memory accesses by A32 and T32 Load Multiple and Store Multiple at EL1 or EL0 that are marked at stage 1 as Device-nGRE/Device-nGnRE/Device-nGnRnE memory are trapped and generate a stage 1 Alignment fault. 0b1 All memory accesses by A32 and T32 Load Multiple and Store Multiple at EL1 or EL0 that are marked at stage 1 as Device-nGRE/Device-nGnRE/Device-nGnRnE memory are not trapped. This bit is permitted to be cached in a TLB. This field resets to 1. Otherwise: Reserved, RES1 So as effect we start getting traps we are not quite ready for. Looking into history it seems that mask used for SCTLR clear came from the similar code for ARMv4, where bit[3] is the enable/disable bit for the write buffer. That not applicable to ARMv7 and onwards, so retire that bit from the masks. Fixes: 7d09e85448dfa78e3e58186c934449aaf6d49b50 ("[ARM] 4393/2: ARMv7: Add uncompressing code for the new CPU Id format") Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
1535 lines
38 KiB
ArmAsm
1535 lines
38 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* linux/arch/arm/boot/compressed/head.S
|
|
*
|
|
* Copyright (C) 1996-2002 Russell King
|
|
* Copyright (C) 2004 Hyok S. Choi (MPU support)
|
|
*/
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
#include <asm/v7m.h>
|
|
|
|
#include "efi-header.S"
|
|
|
|
#ifdef __ARMEB__
|
|
#define OF_DT_MAGIC 0xd00dfeed
|
|
#else
|
|
#define OF_DT_MAGIC 0xedfe0dd0
|
|
#endif
|
|
|
|
AR_CLASS( .arch armv7-a )
|
|
M_CLASS( .arch armv7-m )
|
|
|
|
/*
|
|
* Debugging stuff
|
|
*
|
|
* Note that these macros must not contain any code which is not
|
|
* 100% relocatable. Any attempt to do so will result in a crash.
|
|
* Please select one of the following when turning on debugging.
|
|
*/
|
|
#ifdef DEBUG
|
|
|
|
#if defined(CONFIG_DEBUG_ICEDCC)
|
|
|
|
#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7)
|
|
.macro loadsp, rb, tmp1, tmp2
|
|
.endm
|
|
.macro writeb, ch, rb, tmp
|
|
mcr p14, 0, \ch, c0, c5, 0
|
|
.endm
|
|
#elif defined(CONFIG_CPU_XSCALE)
|
|
.macro loadsp, rb, tmp1, tmp2
|
|
.endm
|
|
.macro writeb, ch, rb, tmp
|
|
mcr p14, 0, \ch, c8, c0, 0
|
|
.endm
|
|
#else
|
|
.macro loadsp, rb, tmp1, tmp2
|
|
.endm
|
|
.macro writeb, ch, rb, tmp
|
|
mcr p14, 0, \ch, c1, c0, 0
|
|
.endm
|
|
#endif
|
|
|
|
#else
|
|
|
|
#include CONFIG_DEBUG_LL_INCLUDE
|
|
|
|
.macro writeb, ch, rb, tmp
|
|
#ifdef CONFIG_DEBUG_UART_FLOW_CONTROL
|
|
waituartcts \tmp, \rb
|
|
#endif
|
|
waituarttxrdy \tmp, \rb
|
|
senduart \ch, \rb
|
|
busyuart \tmp, \rb
|
|
.endm
|
|
|
|
#if defined(CONFIG_ARCH_SA1100)
|
|
.macro loadsp, rb, tmp1, tmp2
|
|
mov \rb, #0x80000000 @ physical base address
|
|
#ifdef CONFIG_DEBUG_LL_SER3
|
|
add \rb, \rb, #0x00050000 @ Ser3
|
|
#else
|
|
add \rb, \rb, #0x00010000 @ Ser1
|
|
#endif
|
|
.endm
|
|
#else
|
|
.macro loadsp, rb, tmp1, tmp2
|
|
addruart \rb, \tmp1, \tmp2
|
|
.endm
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
.macro kputc,val
|
|
mov r0, \val
|
|
bl putc
|
|
.endm
|
|
|
|
.macro kphex,val,len
|
|
mov r0, \val
|
|
mov r1, #\len
|
|
bl phex
|
|
.endm
|
|
|
|
/*
|
|
* Debug kernel copy by printing the memory addresses involved
|
|
*/
|
|
.macro dbgkc, begin, end, cbegin, cend
|
|
#ifdef DEBUG
|
|
kputc #'C'
|
|
kputc #':'
|
|
kputc #'0'
|
|
kputc #'x'
|
|
kphex \begin, 8 /* Start of compressed kernel */
|
|
kputc #'-'
|
|
kputc #'0'
|
|
kputc #'x'
|
|
kphex \end, 8 /* End of compressed kernel */
|
|
kputc #'-'
|
|
kputc #'>'
|
|
kputc #'0'
|
|
kputc #'x'
|
|
kphex \cbegin, 8 /* Start of kernel copy */
|
|
kputc #'-'
|
|
kputc #'0'
|
|
kputc #'x'
|
|
kphex \cend, 8 /* End of kernel copy */
|
|
kputc #'\n'
|
|
#endif
|
|
.endm
|
|
|
|
/*
|
|
* Debug print of the final appended DTB location
|
|
*/
|
|
.macro dbgadtb, begin, size
|
|
#ifdef DEBUG
|
|
kputc #'D'
|
|
kputc #'T'
|
|
kputc #'B'
|
|
kputc #':'
|
|
kputc #'0'
|
|
kputc #'x'
|
|
kphex \begin, 8 /* Start of appended DTB */
|
|
kputc #' '
|
|
kputc #'('
|
|
kputc #'0'
|
|
kputc #'x'
|
|
kphex \size, 8 /* Size of appended DTB */
|
|
kputc #')'
|
|
kputc #'\n'
|
|
#endif
|
|
.endm
|
|
|
|
.macro enable_cp15_barriers, reg
|
|
mrc p15, 0, \reg, c1, c0, 0 @ read SCTLR
|
|
tst \reg, #(1 << 5) @ CP15BEN bit set?
|
|
bne .L_\@
|
|
orr \reg, \reg, #(1 << 5) @ CP15 barrier instructions
|
|
mcr p15, 0, \reg, c1, c0, 0 @ write SCTLR
|
|
ARM( .inst 0xf57ff06f @ v7+ isb )
|
|
THUMB( isb )
|
|
.L_\@:
|
|
.endm
|
|
|
|
/*
|
|
* The kernel build system appends the size of the
|
|
* decompressed kernel at the end of the compressed data
|
|
* in little-endian form.
|
|
*/
|
|
.macro get_inflated_image_size, res:req, tmp1:req, tmp2:req
|
|
adr \res, .Linflated_image_size_offset
|
|
ldr \tmp1, [\res]
|
|
add \tmp1, \tmp1, \res @ address of inflated image size
|
|
|
|
ldrb \res, [\tmp1] @ get_unaligned_le32
|
|
ldrb \tmp2, [\tmp1, #1]
|
|
orr \res, \res, \tmp2, lsl #8
|
|
ldrb \tmp2, [\tmp1, #2]
|
|
ldrb \tmp1, [\tmp1, #3]
|
|
orr \res, \res, \tmp2, lsl #16
|
|
orr \res, \res, \tmp1, lsl #24
|
|
.endm
|
|
|
|
.macro be32tocpu, val, tmp
|
|
#ifndef __ARMEB__
|
|
/* convert to little endian */
|
|
rev_l \val, \tmp
|
|
#endif
|
|
.endm
|
|
|
|
.section ".start", "ax"
|
|
/*
|
|
* sort out different calling conventions
|
|
*/
|
|
.align
|
|
/*
|
|
* Always enter in ARM state for CPUs that support the ARM ISA.
|
|
* As of today (2014) that's exactly the members of the A and R
|
|
* classes.
|
|
*/
|
|
AR_CLASS( .arm )
|
|
start:
|
|
.type start,#function
|
|
/*
|
|
* These 7 nops along with the 1 nop immediately below for
|
|
* !THUMB2 form 8 nops that make the compressed kernel bootable
|
|
* on legacy ARM systems that were assuming the kernel in a.out
|
|
* binary format. The boot loaders on these systems would
|
|
* jump 32 bytes into the image to skip the a.out header.
|
|
* with these 8 nops filling exactly 32 bytes, things still
|
|
* work as expected on these legacy systems. Thumb2 mode keeps
|
|
* 7 of the nops as it turns out that some boot loaders
|
|
* were patching the initial instructions of the kernel, i.e
|
|
* had started to exploit this "patch area".
|
|
*/
|
|
.rept 7
|
|
__nop
|
|
.endr
|
|
#ifndef CONFIG_THUMB2_KERNEL
|
|
__nop
|
|
#else
|
|
AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode
|
|
M_CLASS( nop.w ) @ M: already in Thumb2 mode
|
|
.thumb
|
|
#endif
|
|
W(b) 1f
|
|
|
|
.word _magic_sig @ Magic numbers to help the loader
|
|
.word _magic_start @ absolute load/run zImage address
|
|
.word _magic_end @ zImage end address
|
|
.word 0x04030201 @ endianness flag
|
|
.word 0x45454545 @ another magic number to indicate
|
|
.word _magic_table @ additional data table
|
|
|
|
__EFI_HEADER
|
|
1:
|
|
ARM_BE8( setend be ) @ go BE8 if compiled for BE8
|
|
AR_CLASS( mrs r9, cpsr )
|
|
#ifdef CONFIG_ARM_VIRT_EXT
|
|
bl __hyp_stub_install @ get into SVC mode, reversibly
|
|
#endif
|
|
mov r7, r1 @ save architecture ID
|
|
mov r8, r2 @ save atags pointer
|
|
|
|
#ifndef CONFIG_CPU_V7M
|
|
/*
|
|
* Booting from Angel - need to enter SVC mode and disable
|
|
* FIQs/IRQs (numeric definitions from angel arm.h source).
|
|
* We only do this if we were in user mode on entry.
|
|
*/
|
|
mrs r2, cpsr @ get current mode
|
|
tst r2, #3 @ not user?
|
|
bne not_angel
|
|
mov r0, #0x17 @ angel_SWIreason_EnterSVC
|
|
ARM( swi 0x123456 ) @ angel_SWI_ARM
|
|
THUMB( svc 0xab ) @ angel_SWI_THUMB
|
|
not_angel:
|
|
safe_svcmode_maskall r0
|
|
msr spsr_cxsf, r9 @ Save the CPU boot mode in
|
|
@ SPSR
|
|
#endif
|
|
/*
|
|
* Note that some cache flushing and other stuff may
|
|
* be needed here - is there an Angel SWI call for this?
|
|
*/
|
|
|
|
/*
|
|
* some architecture specific code can be inserted
|
|
* by the linker here, but it should preserve r7, r8, and r9.
|
|
*/
|
|
|
|
.text
|
|
|
|
#ifdef CONFIG_AUTO_ZRELADDR
|
|
/*
|
|
* Find the start of physical memory. As we are executing
|
|
* without the MMU on, we are in the physical address space.
|
|
* We just need to get rid of any offset by aligning the
|
|
* address.
|
|
*
|
|
* This alignment is a balance between the requirements of
|
|
* different platforms - we have chosen 128MB to allow
|
|
* platforms which align the start of their physical memory
|
|
* to 128MB to use this feature, while allowing the zImage
|
|
* to be placed within the first 128MB of memory on other
|
|
* platforms. Increasing the alignment means we place
|
|
* stricter alignment requirements on the start of physical
|
|
* memory, but relaxing it means that we break people who
|
|
* are already placing their zImage in (eg) the top 64MB
|
|
* of this range.
|
|
*/
|
|
mov r0, pc
|
|
and r0, r0, #0xf8000000
|
|
#ifdef CONFIG_USE_OF
|
|
adr r1, LC1
|
|
#ifdef CONFIG_ARM_APPENDED_DTB
|
|
/*
|
|
* Look for an appended DTB. If found, we cannot use it to
|
|
* validate the calculated start of physical memory, as its
|
|
* memory nodes may need to be augmented by ATAGS stored at
|
|
* an offset from the same start of physical memory.
|
|
*/
|
|
ldr r2, [r1, #4] @ get &_edata
|
|
add r2, r2, r1 @ relocate it
|
|
ldr r2, [r2] @ get DTB signature
|
|
ldr r3, =OF_DT_MAGIC
|
|
cmp r2, r3 @ do we have a DTB there?
|
|
beq 1f @ if yes, skip validation
|
|
#endif /* CONFIG_ARM_APPENDED_DTB */
|
|
|
|
/*
|
|
* Make sure we have some stack before calling C code.
|
|
* No GOT fixup has occurred yet, but none of the code we're
|
|
* about to call uses any global variables.
|
|
*/
|
|
ldr sp, [r1] @ get stack location
|
|
add sp, sp, r1 @ apply relocation
|
|
|
|
/* Validate calculated start against passed DTB */
|
|
mov r1, r8
|
|
bl fdt_check_mem_start
|
|
1:
|
|
#endif /* CONFIG_USE_OF */
|
|
/* Determine final kernel image address. */
|
|
add r4, r0, #TEXT_OFFSET
|
|
#else
|
|
ldr r4, =zreladdr
|
|
#endif
|
|
|
|
/*
|
|
* Set up a page table only if it won't overwrite ourself.
|
|
* That means r4 < pc || r4 - 16k page directory > &_end.
|
|
* Given that r4 > &_end is most unfrequent, we add a rough
|
|
* additional 1MB of room for a possible appended DTB.
|
|
*/
|
|
mov r0, pc
|
|
cmp r0, r4
|
|
ldrcc r0, .Lheadroom
|
|
addcc r0, r0, pc
|
|
cmpcc r4, r0
|
|
orrcc r4, r4, #1 @ remember we skipped cache_on
|
|
blcs cache_on
|
|
|
|
restart: adr r0, LC1
|
|
ldr sp, [r0]
|
|
ldr r6, [r0, #4]
|
|
add sp, sp, r0
|
|
add r6, r6, r0
|
|
|
|
get_inflated_image_size r9, r10, lr
|
|
|
|
#ifndef CONFIG_ZBOOT_ROM
|
|
/* malloc space is above the relocated stack (64k max) */
|
|
add r10, sp, #MALLOC_SIZE
|
|
#else
|
|
/*
|
|
* With ZBOOT_ROM the bss/stack is non relocatable,
|
|
* but someone could still run this code from RAM,
|
|
* in which case our reference is _edata.
|
|
*/
|
|
mov r10, r6
|
|
#endif
|
|
|
|
mov r5, #0 @ init dtb size to 0
|
|
#ifdef CONFIG_ARM_APPENDED_DTB
|
|
/*
|
|
* r4 = final kernel address (possibly with LSB set)
|
|
* r5 = appended dtb size (still unknown)
|
|
* r6 = _edata
|
|
* r7 = architecture ID
|
|
* r8 = atags/device tree pointer
|
|
* r9 = size of decompressed image
|
|
* r10 = end of this image, including bss/stack/malloc space if non XIP
|
|
* sp = stack pointer
|
|
*
|
|
* if there are device trees (dtb) appended to zImage, advance r10 so that the
|
|
* dtb data will get relocated along with the kernel if necessary.
|
|
*/
|
|
|
|
ldr lr, [r6, #0]
|
|
ldr r1, =OF_DT_MAGIC
|
|
cmp lr, r1
|
|
bne dtb_check_done @ not found
|
|
|
|
#ifdef CONFIG_ARM_ATAG_DTB_COMPAT
|
|
/*
|
|
* OK... Let's do some funky business here.
|
|
* If we do have a DTB appended to zImage, and we do have
|
|
* an ATAG list around, we want the later to be translated
|
|
* and folded into the former here. No GOT fixup has occurred
|
|
* yet, but none of the code we're about to call uses any
|
|
* global variable.
|
|
*/
|
|
|
|
/* Get the initial DTB size */
|
|
ldr r5, [r6, #4]
|
|
be32tocpu r5, r1
|
|
dbgadtb r6, r5
|
|
/* 50% DTB growth should be good enough */
|
|
add r5, r5, r5, lsr #1
|
|
/* preserve 64-bit alignment */
|
|
add r5, r5, #7
|
|
bic r5, r5, #7
|
|
/* clamp to 32KB min and 1MB max */
|
|
cmp r5, #(1 << 15)
|
|
movlo r5, #(1 << 15)
|
|
cmp r5, #(1 << 20)
|
|
movhi r5, #(1 << 20)
|
|
/* temporarily relocate the stack past the DTB work space */
|
|
add sp, sp, r5
|
|
|
|
mov r0, r8
|
|
mov r1, r6
|
|
mov r2, r5
|
|
bl atags_to_fdt
|
|
|
|
/*
|
|
* If returned value is 1, there is no ATAG at the location
|
|
* pointed by r8. Try the typical 0x100 offset from start
|
|
* of RAM and hope for the best.
|
|
*/
|
|
cmp r0, #1
|
|
sub r0, r4, #TEXT_OFFSET
|
|
bic r0, r0, #1
|
|
add r0, r0, #0x100
|
|
mov r1, r6
|
|
mov r2, r5
|
|
bleq atags_to_fdt
|
|
|
|
sub sp, sp, r5
|
|
#endif
|
|
|
|
mov r8, r6 @ use the appended device tree
|
|
|
|
/*
|
|
* Make sure that the DTB doesn't end up in the final
|
|
* kernel's .bss area. To do so, we adjust the decompressed
|
|
* kernel size to compensate if that .bss size is larger
|
|
* than the relocated code.
|
|
*/
|
|
ldr r5, =_kernel_bss_size
|
|
adr r1, wont_overwrite
|
|
sub r1, r6, r1
|
|
subs r1, r5, r1
|
|
addhi r9, r9, r1
|
|
|
|
/* Get the current DTB size */
|
|
ldr r5, [r6, #4]
|
|
be32tocpu r5, r1
|
|
|
|
/* preserve 64-bit alignment */
|
|
add r5, r5, #7
|
|
bic r5, r5, #7
|
|
|
|
/* relocate some pointers past the appended dtb */
|
|
add r6, r6, r5
|
|
add r10, r10, r5
|
|
add sp, sp, r5
|
|
dtb_check_done:
|
|
#endif
|
|
|
|
/*
|
|
* Check to see if we will overwrite ourselves.
|
|
* r4 = final kernel address (possibly with LSB set)
|
|
* r9 = size of decompressed image
|
|
* r10 = end of this image, including bss/stack/malloc space if non XIP
|
|
* We basically want:
|
|
* r4 - 16k page directory >= r10 -> OK
|
|
* r4 + image length <= address of wont_overwrite -> OK
|
|
* Note: the possible LSB in r4 is harmless here.
|
|
*/
|
|
add r10, r10, #16384
|
|
cmp r4, r10
|
|
bhs wont_overwrite
|
|
add r10, r4, r9
|
|
adr r9, wont_overwrite
|
|
cmp r10, r9
|
|
bls wont_overwrite
|
|
|
|
/*
|
|
* Relocate ourselves past the end of the decompressed kernel.
|
|
* r6 = _edata
|
|
* r10 = end of the decompressed kernel
|
|
* Because we always copy ahead, we need to do it from the end and go
|
|
* backward in case the source and destination overlap.
|
|
*/
|
|
/*
|
|
* Bump to the next 256-byte boundary with the size of
|
|
* the relocation code added. This avoids overwriting
|
|
* ourself when the offset is small.
|
|
*/
|
|
add r10, r10, #((reloc_code_end - restart + 256) & ~255)
|
|
bic r10, r10, #255
|
|
|
|
/* Get start of code we want to copy and align it down. */
|
|
adr r5, restart
|
|
bic r5, r5, #31
|
|
|
|
/* Relocate the hyp vector base if necessary */
|
|
#ifdef CONFIG_ARM_VIRT_EXT
|
|
mrs r0, spsr
|
|
and r0, r0, #MODE_MASK
|
|
cmp r0, #HYP_MODE
|
|
bne 1f
|
|
|
|
/*
|
|
* Compute the address of the hyp vectors after relocation.
|
|
* Call __hyp_set_vectors with the new address so that we
|
|
* can HVC again after the copy.
|
|
*/
|
|
adr_l r0, __hyp_stub_vectors
|
|
sub r0, r0, r5
|
|
add r0, r0, r10
|
|
bl __hyp_set_vectors
|
|
1:
|
|
#endif
|
|
|
|
sub r9, r6, r5 @ size to copy
|
|
add r9, r9, #31 @ rounded up to a multiple
|
|
bic r9, r9, #31 @ ... of 32 bytes
|
|
add r6, r9, r5
|
|
add r9, r9, r10
|
|
|
|
#ifdef DEBUG
|
|
sub r10, r6, r5
|
|
sub r10, r9, r10
|
|
/*
|
|
* We are about to copy the kernel to a new memory area.
|
|
* The boundaries of the new memory area can be found in
|
|
* r10 and r9, whilst r5 and r6 contain the boundaries
|
|
* of the memory we are going to copy.
|
|
* Calling dbgkc will help with the printing of this
|
|
* information.
|
|
*/
|
|
dbgkc r5, r6, r10, r9
|
|
#endif
|
|
|
|
1: ldmdb r6!, {r0 - r3, r10 - r12, lr}
|
|
cmp r6, r5
|
|
stmdb r9!, {r0 - r3, r10 - r12, lr}
|
|
bhi 1b
|
|
|
|
/* Preserve offset to relocated code. */
|
|
sub r6, r9, r6
|
|
|
|
mov r0, r9 @ start of relocated zImage
|
|
add r1, sp, r6 @ end of relocated zImage
|
|
bl cache_clean_flush
|
|
|
|
badr r0, restart
|
|
add r0, r0, r6
|
|
mov pc, r0
|
|
|
|
wont_overwrite:
|
|
adr r0, LC0
|
|
ldmia r0, {r1, r2, r3, r11, r12}
|
|
sub r0, r0, r1 @ calculate the delta offset
|
|
|
|
/*
|
|
* If delta is zero, we are running at the address we were linked at.
|
|
* r0 = delta
|
|
* r2 = BSS start
|
|
* r3 = BSS end
|
|
* r4 = kernel execution address (possibly with LSB set)
|
|
* r5 = appended dtb size (0 if not present)
|
|
* r7 = architecture ID
|
|
* r8 = atags pointer
|
|
* r11 = GOT start
|
|
* r12 = GOT end
|
|
* sp = stack pointer
|
|
*/
|
|
orrs r1, r0, r5
|
|
beq not_relocated
|
|
|
|
add r11, r11, r0
|
|
add r12, r12, r0
|
|
|
|
#ifndef CONFIG_ZBOOT_ROM
|
|
/*
|
|
* If we're running fully PIC === CONFIG_ZBOOT_ROM = n,
|
|
* we need to fix up pointers into the BSS region.
|
|
* Note that the stack pointer has already been fixed up.
|
|
*/
|
|
add r2, r2, r0
|
|
add r3, r3, r0
|
|
|
|
/*
|
|
* Relocate all entries in the GOT table.
|
|
* Bump bss entries to _edata + dtb size
|
|
*/
|
|
1: ldr r1, [r11, #0] @ relocate entries in the GOT
|
|
add r1, r1, r0 @ This fixes up C references
|
|
cmp r1, r2 @ if entry >= bss_start &&
|
|
cmphs r3, r1 @ bss_end > entry
|
|
addhi r1, r1, r5 @ entry += dtb size
|
|
str r1, [r11], #4 @ next entry
|
|
cmp r11, r12
|
|
blo 1b
|
|
|
|
/* bump our bss pointers too */
|
|
add r2, r2, r5
|
|
add r3, r3, r5
|
|
|
|
#else
|
|
|
|
/*
|
|
* Relocate entries in the GOT table. We only relocate
|
|
* the entries that are outside the (relocated) BSS region.
|
|
*/
|
|
1: ldr r1, [r11, #0] @ relocate entries in the GOT
|
|
cmp r1, r2 @ entry < bss_start ||
|
|
cmphs r3, r1 @ _end < entry
|
|
addlo r1, r1, r0 @ table. This fixes up the
|
|
str r1, [r11], #4 @ C references.
|
|
cmp r11, r12
|
|
blo 1b
|
|
#endif
|
|
|
|
not_relocated: mov r0, #0
|
|
1: str r0, [r2], #4 @ clear bss
|
|
str r0, [r2], #4
|
|
str r0, [r2], #4
|
|
str r0, [r2], #4
|
|
cmp r2, r3
|
|
blo 1b
|
|
|
|
/*
|
|
* Did we skip the cache setup earlier?
|
|
* That is indicated by the LSB in r4.
|
|
* Do it now if so.
|
|
*/
|
|
tst r4, #1
|
|
bic r4, r4, #1
|
|
blne cache_on
|
|
|
|
/*
|
|
* The C runtime environment should now be setup sufficiently.
|
|
* Set up some pointers, and start decompressing.
|
|
* r4 = kernel execution address
|
|
* r7 = architecture ID
|
|
* r8 = atags pointer
|
|
*/
|
|
mov r0, r4
|
|
mov r1, sp @ malloc space above stack
|
|
add r2, sp, #MALLOC_SIZE @ 64k max
|
|
mov r3, r7
|
|
bl decompress_kernel
|
|
|
|
get_inflated_image_size r1, r2, r3
|
|
|
|
mov r0, r4 @ start of inflated image
|
|
add r1, r1, r0 @ end of inflated image
|
|
bl cache_clean_flush
|
|
bl cache_off
|
|
|
|
#ifdef CONFIG_ARM_VIRT_EXT
|
|
mrs r0, spsr @ Get saved CPU boot mode
|
|
and r0, r0, #MODE_MASK
|
|
cmp r0, #HYP_MODE @ if not booted in HYP mode...
|
|
bne __enter_kernel @ boot kernel directly
|
|
|
|
adr_l r0, __hyp_reentry_vectors
|
|
bl __hyp_set_vectors
|
|
__HVC(0) @ otherwise bounce to hyp mode
|
|
|
|
b . @ should never be reached
|
|
#else
|
|
b __enter_kernel
|
|
#endif
|
|
|
|
.align 2
|
|
.type LC0, #object
|
|
LC0: .word LC0 @ r1
|
|
.word __bss_start @ r2
|
|
.word _end @ r3
|
|
.word _got_start @ r11
|
|
.word _got_end @ ip
|
|
.size LC0, . - LC0
|
|
|
|
.type LC1, #object
|
|
LC1: .word .L_user_stack_end - LC1 @ sp
|
|
.word _edata - LC1 @ r6
|
|
.size LC1, . - LC1
|
|
|
|
.Lheadroom:
|
|
.word _end - restart + 16384 + 1024*1024
|
|
|
|
.Linflated_image_size_offset:
|
|
.long (input_data_end - 4) - .
|
|
|
|
#ifdef CONFIG_ARCH_RPC
|
|
.globl params
|
|
params: ldr r0, =0x10000100 @ params_phys for RPC
|
|
mov pc, lr
|
|
.ltorg
|
|
.align
|
|
#endif
|
|
|
|
/*
|
|
* dcache_line_size - get the minimum D-cache line size from the CTR register
|
|
* on ARMv7.
|
|
*/
|
|
.macro dcache_line_size, reg, tmp
|
|
#ifdef CONFIG_CPU_V7M
|
|
movw \tmp, #:lower16:BASEADDR_V7M_SCB + V7M_SCB_CTR
|
|
movt \tmp, #:upper16:BASEADDR_V7M_SCB + V7M_SCB_CTR
|
|
ldr \tmp, [\tmp]
|
|
#else
|
|
mrc p15, 0, \tmp, c0, c0, 1 @ read ctr
|
|
#endif
|
|
lsr \tmp, \tmp, #16
|
|
and \tmp, \tmp, #0xf @ cache line size encoding
|
|
mov \reg, #4 @ bytes per word
|
|
mov \reg, \reg, lsl \tmp @ actual cache line size
|
|
.endm
|
|
|
|
/*
|
|
* Turn on the cache. We need to setup some page tables so that we
|
|
* can have both the I and D caches on.
|
|
*
|
|
* We place the page tables 16k down from the kernel execution address,
|
|
* and we hope that nothing else is using it. If we're using it, we
|
|
* will go pop!
|
|
*
|
|
* On entry,
|
|
* r4 = kernel execution address
|
|
* r7 = architecture number
|
|
* r8 = atags pointer
|
|
* On exit,
|
|
* r0, r1, r2, r3, r9, r10, r12 corrupted
|
|
* This routine must preserve:
|
|
* r4, r7, r8
|
|
*/
|
|
.align 5
|
|
cache_on: mov r3, #8 @ cache_on function
|
|
b call_cache_fn
|
|
|
|
/*
|
|
* Initialize the highest priority protection region, PR7
|
|
* to cover all 32bit address and cacheable and bufferable.
|
|
*/
|
|
__armv4_mpu_cache_on:
|
|
mov r0, #0x3f @ 4G, the whole
|
|
mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting
|
|
mcr p15, 0, r0, c6, c7, 1
|
|
|
|
mov r0, #0x80 @ PR7
|
|
mcr p15, 0, r0, c2, c0, 0 @ D-cache on
|
|
mcr p15, 0, r0, c2, c0, 1 @ I-cache on
|
|
mcr p15, 0, r0, c3, c0, 0 @ write-buffer on
|
|
|
|
mov r0, #0xc000
|
|
mcr p15, 0, r0, c5, c0, 1 @ I-access permission
|
|
mcr p15, 0, r0, c5, c0, 0 @ D-access permission
|
|
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
|
|
mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache
|
|
mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache
|
|
mrc p15, 0, r0, c1, c0, 0 @ read control reg
|
|
@ ...I .... ..D. WC.M
|
|
orr r0, r0, #0x002d @ .... .... ..1. 11.1
|
|
orr r0, r0, #0x1000 @ ...1 .... .... ....
|
|
|
|
mcr p15, 0, r0, c1, c0, 0 @ write control reg
|
|
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c5, 0 @ flush(inval) I-Cache
|
|
mcr p15, 0, r0, c7, c6, 0 @ flush(inval) D-Cache
|
|
mov pc, lr
|
|
|
|
__armv3_mpu_cache_on:
|
|
mov r0, #0x3f @ 4G, the whole
|
|
mcr p15, 0, r0, c6, c7, 0 @ PR7 Area Setting
|
|
|
|
mov r0, #0x80 @ PR7
|
|
mcr p15, 0, r0, c2, c0, 0 @ cache on
|
|
mcr p15, 0, r0, c3, c0, 0 @ write-buffer on
|
|
|
|
mov r0, #0xc000
|
|
mcr p15, 0, r0, c5, c0, 0 @ access permission
|
|
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
|
|
/*
|
|
* ?? ARMv3 MMU does not allow reading the control register,
|
|
* does this really work on ARMv3 MPU?
|
|
*/
|
|
mrc p15, 0, r0, c1, c0, 0 @ read control reg
|
|
@ .... .... .... WC.M
|
|
orr r0, r0, #0x000d @ .... .... .... 11.1
|
|
/* ?? this overwrites the value constructed above? */
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c1, c0, 0 @ write control reg
|
|
|
|
/* ?? invalidate for the second time? */
|
|
mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
|
|
mov pc, lr
|
|
|
|
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
|
|
#define CB_BITS 0x08
|
|
#else
|
|
#define CB_BITS 0x0c
|
|
#endif
|
|
|
|
__setup_mmu: sub r3, r4, #16384 @ Page directory size
|
|
bic r3, r3, #0xff @ Align the pointer
|
|
bic r3, r3, #0x3f00
|
|
/*
|
|
* Initialise the page tables, turning on the cacheable and bufferable
|
|
* bits for the RAM area only.
|
|
*/
|
|
mov r0, r3
|
|
mov r9, r0, lsr #18
|
|
mov r9, r9, lsl #18 @ start of RAM
|
|
add r10, r9, #0x10000000 @ a reasonable RAM size
|
|
mov r1, #0x12 @ XN|U + section mapping
|
|
orr r1, r1, #3 << 10 @ AP=11
|
|
add r2, r3, #16384
|
|
1: cmp r1, r9 @ if virt > start of RAM
|
|
cmphs r10, r1 @ && end of RAM > virt
|
|
bic r1, r1, #0x1c @ clear XN|U + C + B
|
|
orrlo r1, r1, #0x10 @ Set XN|U for non-RAM
|
|
orrhs r1, r1, r6 @ set RAM section settings
|
|
str r1, [r0], #4 @ 1:1 mapping
|
|
add r1, r1, #1048576
|
|
teq r0, r2
|
|
bne 1b
|
|
/*
|
|
* If ever we are running from Flash, then we surely want the cache
|
|
* to be enabled also for our execution instance... We map 2MB of it
|
|
* so there is no map overlap problem for up to 1 MB compressed kernel.
|
|
* If the execution is in RAM then we would only be duplicating the above.
|
|
*/
|
|
orr r1, r6, #0x04 @ ensure B is set for this
|
|
orr r1, r1, #3 << 10
|
|
mov r2, pc
|
|
mov r2, r2, lsr #20
|
|
orr r1, r1, r2, lsl #20
|
|
add r0, r3, r2, lsl #2
|
|
str r1, [r0], #4
|
|
add r1, r1, #1048576
|
|
str r1, [r0]
|
|
mov pc, lr
|
|
ENDPROC(__setup_mmu)
|
|
|
|
@ Enable unaligned access on v6, to allow better code generation
|
|
@ for the decompressor C code:
|
|
__armv6_mmu_cache_on:
|
|
mrc p15, 0, r0, c1, c0, 0 @ read SCTLR
|
|
bic r0, r0, #2 @ A (no unaligned access fault)
|
|
orr r0, r0, #1 << 22 @ U (v6 unaligned access model)
|
|
mcr p15, 0, r0, c1, c0, 0 @ write SCTLR
|
|
b __armv4_mmu_cache_on
|
|
|
|
__arm926ejs_mmu_cache_on:
|
|
#ifdef CONFIG_CPU_DCACHE_WRITETHROUGH
|
|
mov r0, #4 @ put dcache in WT mode
|
|
mcr p15, 7, r0, c15, c0, 0
|
|
#endif
|
|
|
|
__armv4_mmu_cache_on:
|
|
mov r12, lr
|
|
#ifdef CONFIG_MMU
|
|
mov r6, #CB_BITS | 0x12 @ U
|
|
bl __setup_mmu
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
|
|
mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
|
|
mrc p15, 0, r0, c1, c0, 0 @ read control reg
|
|
orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
|
|
orr r0, r0, #0x0030
|
|
ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
|
|
bl __common_mmu_cache_on
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
|
|
#endif
|
|
mov pc, r12
|
|
|
|
__armv7_mmu_cache_on:
|
|
enable_cp15_barriers r11
|
|
mov r12, lr
|
|
#ifdef CONFIG_MMU
|
|
mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0
|
|
tst r11, #0xf @ VMSA
|
|
movne r6, #CB_BITS | 0x02 @ !XN
|
|
blne __setup_mmu
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
|
|
tst r11, #0xf @ VMSA
|
|
mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
|
|
#endif
|
|
mrc p15, 0, r0, c1, c0, 0 @ read control reg
|
|
bic r0, r0, #1 << 28 @ clear SCTLR.TRE
|
|
orr r0, r0, #0x5000 @ I-cache enable, RR cache replacement
|
|
orr r0, r0, #0x003c @ write buffer
|
|
bic r0, r0, #2 @ A (no unaligned access fault)
|
|
orr r0, r0, #1 << 22 @ U (v6 unaligned access model)
|
|
@ (needed for ARM1176)
|
|
#ifdef CONFIG_MMU
|
|
ARM_BE8( orr r0, r0, #1 << 25 ) @ big-endian page tables
|
|
mrcne p15, 0, r6, c2, c0, 2 @ read ttb control reg
|
|
orrne r0, r0, #1 @ MMU enabled
|
|
movne r1, #0xfffffffd @ domain 0 = client
|
|
bic r6, r6, #1 << 31 @ 32-bit translation system
|
|
bic r6, r6, #(7 << 0) | (1 << 4) @ use only ttbr0
|
|
mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer
|
|
mcrne p15, 0, r1, c3, c0, 0 @ load domain access control
|
|
mcrne p15, 0, r6, c2, c0, 2 @ load ttb control
|
|
#endif
|
|
mcr p15, 0, r0, c7, c5, 4 @ ISB
|
|
mcr p15, 0, r0, c1, c0, 0 @ load control register
|
|
mrc p15, 0, r0, c1, c0, 0 @ and read it back
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c5, 4 @ ISB
|
|
mov pc, r12
|
|
|
|
__fa526_cache_on:
|
|
mov r12, lr
|
|
mov r6, #CB_BITS | 0x12 @ U
|
|
bl __setup_mmu
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c7, 0 @ Invalidate whole cache
|
|
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
|
|
mcr p15, 0, r0, c8, c7, 0 @ flush UTLB
|
|
mrc p15, 0, r0, c1, c0, 0 @ read control reg
|
|
orr r0, r0, #0x1000 @ I-cache enable
|
|
bl __common_mmu_cache_on
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c8, c7, 0 @ flush UTLB
|
|
mov pc, r12
|
|
|
|
__common_mmu_cache_on:
|
|
#ifndef CONFIG_THUMB2_KERNEL
|
|
#ifndef DEBUG
|
|
orr r0, r0, #0x000d @ Write buffer, mmu
|
|
#endif
|
|
mov r1, #-1
|
|
mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
|
|
mcr p15, 0, r1, c3, c0, 0 @ load domain access control
|
|
b 1f
|
|
.align 5 @ cache line aligned
|
|
1: mcr p15, 0, r0, c1, c0, 0 @ load control register
|
|
mrc p15, 0, r0, c1, c0, 0 @ and read it back to
|
|
sub pc, lr, r0, lsr #32 @ properly flush pipeline
|
|
#endif
|
|
|
|
#define PROC_ENTRY_SIZE (4*5)
|
|
|
|
/*
|
|
* Here follow the relocatable cache support functions for the
|
|
* various processors. This is a generic hook for locating an
|
|
* entry and jumping to an instruction at the specified offset
|
|
* from the start of the block. Please note this is all position
|
|
* independent code.
|
|
*
|
|
* r1 = corrupted
|
|
* r2 = corrupted
|
|
* r3 = block offset
|
|
* r9 = corrupted
|
|
* r12 = corrupted
|
|
*/
|
|
|
|
call_cache_fn: adr r12, proc_types
|
|
#ifdef CONFIG_CPU_CP15
|
|
mrc p15, 0, r9, c0, c0 @ get processor ID
|
|
#elif defined(CONFIG_CPU_V7M)
|
|
/*
|
|
* On v7-M the processor id is located in the V7M_SCB_CPUID
|
|
* register, but as cache handling is IMPLEMENTATION DEFINED on
|
|
* v7-M (if existant at all) we just return early here.
|
|
* If V7M_SCB_CPUID were used the cpu ID functions (i.e.
|
|
* __armv7_mmu_cache_{on,off,flush}) would be selected which
|
|
* use cp15 registers that are not implemented on v7-M.
|
|
*/
|
|
bx lr
|
|
#else
|
|
ldr r9, =CONFIG_PROCESSOR_ID
|
|
#endif
|
|
1: ldr r1, [r12, #0] @ get value
|
|
ldr r2, [r12, #4] @ get mask
|
|
eor r1, r1, r9 @ (real ^ match)
|
|
tst r1, r2 @ & mask
|
|
ARM( addeq pc, r12, r3 ) @ call cache function
|
|
THUMB( addeq r12, r3 )
|
|
THUMB( moveq pc, r12 ) @ call cache function
|
|
add r12, r12, #PROC_ENTRY_SIZE
|
|
b 1b
|
|
|
|
/*
|
|
* Table for cache operations. This is basically:
|
|
* - CPU ID match
|
|
* - CPU ID mask
|
|
* - 'cache on' method instruction
|
|
* - 'cache off' method instruction
|
|
* - 'cache flush' method instruction
|
|
*
|
|
* We match an entry using: ((real_id ^ match) & mask) == 0
|
|
*
|
|
* Writethrough caches generally only need 'on' and 'off'
|
|
* methods. Writeback caches _must_ have the flush method
|
|
* defined.
|
|
*/
|
|
.align 2
|
|
.type proc_types,#object
|
|
proc_types:
|
|
.word 0x41000000 @ old ARM ID
|
|
.word 0xff00f000
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
|
|
.word 0x41007000 @ ARM7/710
|
|
.word 0xfff8fe00
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
|
|
.word 0x41807200 @ ARM720T (writethrough)
|
|
.word 0xffffff00
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
|
|
.word 0x41007400 @ ARM74x
|
|
.word 0xff00ff00
|
|
W(b) __armv3_mpu_cache_on
|
|
W(b) __armv3_mpu_cache_off
|
|
W(b) __armv3_mpu_cache_flush
|
|
|
|
.word 0x41009400 @ ARM94x
|
|
.word 0xff00ff00
|
|
W(b) __armv4_mpu_cache_on
|
|
W(b) __armv4_mpu_cache_off
|
|
W(b) __armv4_mpu_cache_flush
|
|
|
|
.word 0x41069260 @ ARM926EJ-S (v5TEJ)
|
|
.word 0xff0ffff0
|
|
W(b) __arm926ejs_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv5tej_mmu_cache_flush
|
|
|
|
.word 0x00007000 @ ARM7 IDs
|
|
.word 0x0000f000
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
|
|
@ Everything from here on will be the new ID system.
|
|
|
|
.word 0x4401a100 @ sa110 / sa1100
|
|
.word 0xffffffe0
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv4_mmu_cache_flush
|
|
|
|
.word 0x6901b110 @ sa1110
|
|
.word 0xfffffff0
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv4_mmu_cache_flush
|
|
|
|
.word 0x56056900
|
|
.word 0xffffff00 @ PXA9xx
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv4_mmu_cache_flush
|
|
|
|
.word 0x56158000 @ PXA168
|
|
.word 0xfffff000
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv5tej_mmu_cache_flush
|
|
|
|
.word 0x56050000 @ Feroceon
|
|
.word 0xff0f0000
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv5tej_mmu_cache_flush
|
|
|
|
#ifdef CONFIG_CPU_FEROCEON_OLD_ID
|
|
/* this conflicts with the standard ARMv5TE entry */
|
|
.long 0x41009260 @ Old Feroceon
|
|
.long 0xff00fff0
|
|
b __armv4_mmu_cache_on
|
|
b __armv4_mmu_cache_off
|
|
b __armv5tej_mmu_cache_flush
|
|
#endif
|
|
|
|
.word 0x66015261 @ FA526
|
|
.word 0xff01fff1
|
|
W(b) __fa526_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __fa526_cache_flush
|
|
|
|
@ These match on the architecture ID
|
|
|
|
.word 0x00020000 @ ARMv4T
|
|
.word 0x000f0000
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv4_mmu_cache_flush
|
|
|
|
.word 0x00050000 @ ARMv5TE
|
|
.word 0x000f0000
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv4_mmu_cache_flush
|
|
|
|
.word 0x00060000 @ ARMv5TEJ
|
|
.word 0x000f0000
|
|
W(b) __armv4_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv5tej_mmu_cache_flush
|
|
|
|
.word 0x0007b000 @ ARMv6
|
|
.word 0x000ff000
|
|
W(b) __armv6_mmu_cache_on
|
|
W(b) __armv4_mmu_cache_off
|
|
W(b) __armv6_mmu_cache_flush
|
|
|
|
.word 0x000f0000 @ new CPU Id
|
|
.word 0x000f0000
|
|
W(b) __armv7_mmu_cache_on
|
|
W(b) __armv7_mmu_cache_off
|
|
W(b) __armv7_mmu_cache_flush
|
|
|
|
.word 0 @ unrecognised type
|
|
.word 0
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
mov pc, lr
|
|
THUMB( nop )
|
|
|
|
.size proc_types, . - proc_types
|
|
|
|
/*
|
|
* If you get a "non-constant expression in ".if" statement"
|
|
* error from the assembler on this line, check that you have
|
|
* not accidentally written a "b" instruction where you should
|
|
* have written W(b).
|
|
*/
|
|
.if (. - proc_types) % PROC_ENTRY_SIZE != 0
|
|
.error "The size of one or more proc_types entries is wrong."
|
|
.endif
|
|
|
|
/*
|
|
* Turn off the Cache and MMU. ARMv3 does not support
|
|
* reading the control register, but ARMv4 does.
|
|
*
|
|
* On exit,
|
|
* r0, r1, r2, r3, r9, r12 corrupted
|
|
* This routine must preserve:
|
|
* r4, r7, r8
|
|
*/
|
|
.align 5
|
|
cache_off: mov r3, #12 @ cache_off function
|
|
b call_cache_fn
|
|
|
|
__armv4_mpu_cache_off:
|
|
mrc p15, 0, r0, c1, c0
|
|
bic r0, r0, #0x000d
|
|
mcr p15, 0, r0, c1, c0 @ turn MPU and cache off
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
|
|
mcr p15, 0, r0, c7, c6, 0 @ flush D-Cache
|
|
mcr p15, 0, r0, c7, c5, 0 @ flush I-Cache
|
|
mov pc, lr
|
|
|
|
__armv3_mpu_cache_off:
|
|
mrc p15, 0, r0, c1, c0
|
|
bic r0, r0, #0x000d
|
|
mcr p15, 0, r0, c1, c0, 0 @ turn MPU and cache off
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
|
|
mov pc, lr
|
|
|
|
__armv4_mmu_cache_off:
|
|
#ifdef CONFIG_MMU
|
|
mrc p15, 0, r0, c1, c0
|
|
bic r0, r0, #0x000d
|
|
mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
|
|
mov r0, #0
|
|
mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
|
|
mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
|
|
#endif
|
|
mov pc, lr
|
|
|
|
__armv7_mmu_cache_off:
|
|
mrc p15, 0, r0, c1, c0
|
|
#ifdef CONFIG_MMU
|
|
bic r0, r0, #0x0005
|
|
#else
|
|
bic r0, r0, #0x0004
|
|
#endif
|
|
mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
|
|
mov r0, #0
|
|
#ifdef CONFIG_MMU
|
|
mcr p15, 0, r0, c8, c7, 0 @ invalidate whole TLB
|
|
#endif
|
|
mcr p15, 0, r0, c7, c5, 6 @ invalidate BTC
|
|
mcr p15, 0, r0, c7, c10, 4 @ DSB
|
|
mcr p15, 0, r0, c7, c5, 4 @ ISB
|
|
mov pc, lr
|
|
|
|
/*
|
|
* Clean and flush the cache to maintain consistency.
|
|
*
|
|
* On entry,
|
|
* r0 = start address
|
|
* r1 = end address (exclusive)
|
|
* On exit,
|
|
* r1, r2, r3, r9, r10, r11, r12 corrupted
|
|
* This routine must preserve:
|
|
* r4, r6, r7, r8
|
|
*/
|
|
.align 5
|
|
cache_clean_flush:
|
|
mov r3, #16
|
|
mov r11, r1
|
|
b call_cache_fn
|
|
|
|
__armv4_mpu_cache_flush:
|
|
tst r4, #1
|
|
movne pc, lr
|
|
mov r2, #1
|
|
mov r3, #0
|
|
mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache
|
|
mov r1, #7 << 5 @ 8 segments
|
|
1: orr r3, r1, #63 << 26 @ 64 entries
|
|
2: mcr p15, 0, r3, c7, c14, 2 @ clean & invalidate D index
|
|
subs r3, r3, #1 << 26
|
|
bcs 2b @ entries 63 to 0
|
|
subs r1, r1, #1 << 5
|
|
bcs 1b @ segments 7 to 0
|
|
|
|
teq r2, #0
|
|
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
|
mcr p15, 0, ip, c7, c10, 4 @ drain WB
|
|
mov pc, lr
|
|
|
|
__fa526_cache_flush:
|
|
tst r4, #1
|
|
movne pc, lr
|
|
mov r1, #0
|
|
mcr p15, 0, r1, c7, c14, 0 @ clean and invalidate D cache
|
|
mcr p15, 0, r1, c7, c5, 0 @ flush I cache
|
|
mcr p15, 0, r1, c7, c10, 4 @ drain WB
|
|
mov pc, lr
|
|
|
|
__armv6_mmu_cache_flush:
|
|
mov r1, #0
|
|
tst r4, #1
|
|
mcreq p15, 0, r1, c7, c14, 0 @ clean+invalidate D
|
|
mcr p15, 0, r1, c7, c5, 0 @ invalidate I+BTB
|
|
mcreq p15, 0, r1, c7, c15, 0 @ clean+invalidate unified
|
|
mcr p15, 0, r1, c7, c10, 4 @ drain WB
|
|
mov pc, lr
|
|
|
|
__armv7_mmu_cache_flush:
|
|
enable_cp15_barriers r10
|
|
tst r4, #1
|
|
bne iflush
|
|
mrc p15, 0, r10, c0, c1, 5 @ read ID_MMFR1
|
|
tst r10, #0xf << 16 @ hierarchical cache (ARMv7)
|
|
mov r10, #0
|
|
beq hierarchical
|
|
mcr p15, 0, r10, c7, c14, 0 @ clean+invalidate D
|
|
b iflush
|
|
hierarchical:
|
|
dcache_line_size r1, r2 @ r1 := dcache min line size
|
|
sub r2, r1, #1 @ r2 := line size mask
|
|
bic r0, r0, r2 @ round down start to line size
|
|
sub r11, r11, #1 @ end address is exclusive
|
|
bic r11, r11, r2 @ round down end to line size
|
|
0: cmp r0, r11 @ finished?
|
|
bgt iflush
|
|
mcr p15, 0, r0, c7, c14, 1 @ Dcache clean/invalidate by VA
|
|
add r0, r0, r1
|
|
b 0b
|
|
iflush:
|
|
mcr p15, 0, r10, c7, c10, 4 @ DSB
|
|
mcr p15, 0, r10, c7, c5, 0 @ invalidate I+BTB
|
|
mcr p15, 0, r10, c7, c10, 4 @ DSB
|
|
mcr p15, 0, r10, c7, c5, 4 @ ISB
|
|
mov pc, lr
|
|
|
|
__armv5tej_mmu_cache_flush:
|
|
tst r4, #1
|
|
movne pc, lr
|
|
1: mrc p15, 0, APSR_nzcv, c7, c14, 3 @ test,clean,invalidate D cache
|
|
bne 1b
|
|
mcr p15, 0, r0, c7, c5, 0 @ flush I cache
|
|
mcr p15, 0, r0, c7, c10, 4 @ drain WB
|
|
mov pc, lr
|
|
|
|
__armv4_mmu_cache_flush:
|
|
tst r4, #1
|
|
movne pc, lr
|
|
mov r2, #64*1024 @ default: 32K dcache size (*2)
|
|
mov r11, #32 @ default: 32 byte line size
|
|
mrc p15, 0, r3, c0, c0, 1 @ read cache type
|
|
teq r3, r9 @ cache ID register present?
|
|
beq no_cache_id
|
|
mov r1, r3, lsr #18
|
|
and r1, r1, #7
|
|
mov r2, #1024
|
|
mov r2, r2, lsl r1 @ base dcache size *2
|
|
tst r3, #1 << 14 @ test M bit
|
|
addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1
|
|
mov r3, r3, lsr #12
|
|
and r3, r3, #3
|
|
mov r11, #8
|
|
mov r11, r11, lsl r3 @ cache line size in bytes
|
|
no_cache_id:
|
|
mov r1, pc
|
|
bic r1, r1, #63 @ align to longest cache line
|
|
add r2, r1, r2
|
|
1:
|
|
ARM( ldr r3, [r1], r11 ) @ s/w flush D cache
|
|
THUMB( ldr r3, [r1] ) @ s/w flush D cache
|
|
THUMB( add r1, r1, r11 )
|
|
teq r1, r2
|
|
bne 1b
|
|
|
|
mcr p15, 0, r1, c7, c5, 0 @ flush I cache
|
|
mcr p15, 0, r1, c7, c6, 0 @ flush D cache
|
|
mcr p15, 0, r1, c7, c10, 4 @ drain WB
|
|
mov pc, lr
|
|
|
|
__armv3_mmu_cache_flush:
|
|
__armv3_mpu_cache_flush:
|
|
tst r4, #1
|
|
movne pc, lr
|
|
mov r1, #0
|
|
mcr p15, 0, r1, c7, c0, 0 @ invalidate whole cache v3
|
|
mov pc, lr
|
|
|
|
/*
|
|
* Various debugging routines for printing hex characters and
|
|
* memory, which again must be relocatable.
|
|
*/
|
|
#ifdef DEBUG
|
|
.align 2
|
|
.type phexbuf,#object
|
|
phexbuf: .space 12
|
|
.size phexbuf, . - phexbuf
|
|
|
|
@ phex corrupts {r0, r1, r2, r3}
|
|
phex: adr r3, phexbuf
|
|
mov r2, #0
|
|
strb r2, [r3, r1]
|
|
1: subs r1, r1, #1
|
|
movmi r0, r3
|
|
bmi puts
|
|
and r2, r0, #15
|
|
mov r0, r0, lsr #4
|
|
cmp r2, #10
|
|
addge r2, r2, #7
|
|
add r2, r2, #'0'
|
|
strb r2, [r3, r1]
|
|
b 1b
|
|
|
|
@ puts corrupts {r0, r1, r2, r3}
|
|
puts: loadsp r3, r2, r1
|
|
1: ldrb r2, [r0], #1
|
|
teq r2, #0
|
|
moveq pc, lr
|
|
2: writeb r2, r3, r1
|
|
mov r1, #0x00020000
|
|
3: subs r1, r1, #1
|
|
bne 3b
|
|
teq r2, #'\n'
|
|
moveq r2, #'\r'
|
|
beq 2b
|
|
teq r0, #0
|
|
bne 1b
|
|
mov pc, lr
|
|
@ putc corrupts {r0, r1, r2, r3}
|
|
putc:
|
|
mov r2, r0
|
|
loadsp r3, r1, r0
|
|
mov r0, #0
|
|
b 2b
|
|
|
|
@ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr}
|
|
memdump: mov r12, r0
|
|
mov r10, lr
|
|
mov r11, #0
|
|
2: mov r0, r11, lsl #2
|
|
add r0, r0, r12
|
|
mov r1, #8
|
|
bl phex
|
|
mov r0, #':'
|
|
bl putc
|
|
1: mov r0, #' '
|
|
bl putc
|
|
ldr r0, [r12, r11, lsl #2]
|
|
mov r1, #8
|
|
bl phex
|
|
and r0, r11, #7
|
|
teq r0, #3
|
|
moveq r0, #' '
|
|
bleq putc
|
|
and r0, r11, #7
|
|
add r11, r11, #1
|
|
teq r0, #7
|
|
bne 1b
|
|
mov r0, #'\n'
|
|
bl putc
|
|
cmp r11, #64
|
|
blt 2b
|
|
mov pc, r10
|
|
#endif
|
|
|
|
.ltorg
|
|
|
|
#ifdef CONFIG_ARM_VIRT_EXT
|
|
.align 5
|
|
__hyp_reentry_vectors:
|
|
W(b) . @ reset
|
|
W(b) . @ undef
|
|
#ifdef CONFIG_EFI_STUB
|
|
W(b) __enter_kernel_from_hyp @ hvc from HYP
|
|
#else
|
|
W(b) . @ svc
|
|
#endif
|
|
W(b) . @ pabort
|
|
W(b) . @ dabort
|
|
W(b) __enter_kernel @ hyp
|
|
W(b) . @ irq
|
|
W(b) . @ fiq
|
|
#endif /* CONFIG_ARM_VIRT_EXT */
|
|
|
|
__enter_kernel:
|
|
mov r0, #0 @ must be 0
|
|
mov r1, r7 @ restore architecture number
|
|
mov r2, r8 @ restore atags pointer
|
|
ARM( mov pc, r4 ) @ call kernel
|
|
M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class
|
|
THUMB( bx r4 ) @ entry point is always ARM for A/R classes
|
|
|
|
reloc_code_end:
|
|
|
|
#ifdef CONFIG_EFI_STUB
|
|
__enter_kernel_from_hyp:
|
|
mrc p15, 4, r0, c1, c0, 0 @ read HSCTLR
|
|
bic r0, r0, #0x5 @ disable MMU and caches
|
|
mcr p15, 4, r0, c1, c0, 0 @ write HSCTLR
|
|
isb
|
|
b __enter_kernel
|
|
|
|
ENTRY(efi_enter_kernel)
|
|
mov r4, r0 @ preserve image base
|
|
mov r8, r1 @ preserve DT pointer
|
|
|
|
adr_l r0, call_cache_fn
|
|
adr r1, 0f @ clean the region of code we
|
|
bl cache_clean_flush @ may run with the MMU off
|
|
|
|
#ifdef CONFIG_ARM_VIRT_EXT
|
|
@
|
|
@ The EFI spec does not support booting on ARM in HYP mode,
|
|
@ since it mandates that the MMU and caches are on, with all
|
|
@ 32-bit addressable DRAM mapped 1:1 using short descriptors.
|
|
@
|
|
@ While the EDK2 reference implementation adheres to this,
|
|
@ U-Boot might decide to enter the EFI stub in HYP mode
|
|
@ anyway, with the MMU and caches either on or off.
|
|
@
|
|
mrs r0, cpsr @ get the current mode
|
|
msr spsr_cxsf, r0 @ record boot mode
|
|
and r0, r0, #MODE_MASK @ are we running in HYP mode?
|
|
cmp r0, #HYP_MODE
|
|
bne .Lefi_svc
|
|
|
|
mrc p15, 4, r1, c1, c0, 0 @ read HSCTLR
|
|
tst r1, #0x1 @ MMU enabled at HYP?
|
|
beq 1f
|
|
|
|
@
|
|
@ When running in HYP mode with the caches on, we're better
|
|
@ off just carrying on using the cached 1:1 mapping that the
|
|
@ firmware provided. Set up the HYP vectors so HVC instructions
|
|
@ issued from HYP mode take us to the correct handler code. We
|
|
@ will disable the MMU before jumping to the kernel proper.
|
|
@
|
|
ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE
|
|
THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE
|
|
mcr p15, 4, r1, c1, c0, 0
|
|
adr r0, __hyp_reentry_vectors
|
|
mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR)
|
|
isb
|
|
b .Lefi_hyp
|
|
|
|
@
|
|
@ When running in HYP mode with the caches off, we need to drop
|
|
@ into SVC mode now, and let the decompressor set up its cached
|
|
@ 1:1 mapping as usual.
|
|
@
|
|
1: mov r9, r4 @ preserve image base
|
|
bl __hyp_stub_install @ install HYP stub vectors
|
|
safe_svcmode_maskall r1 @ drop to SVC mode
|
|
msr spsr_cxsf, r0 @ record boot mode
|
|
orr r4, r9, #1 @ restore image base and set LSB
|
|
b .Lefi_hyp
|
|
.Lefi_svc:
|
|
#endif
|
|
mrc p15, 0, r0, c1, c0, 0 @ read SCTLR
|
|
tst r0, #0x1 @ MMU enabled?
|
|
orreq r4, r4, #1 @ set LSB if not
|
|
|
|
.Lefi_hyp:
|
|
mov r0, r8 @ DT start
|
|
add r1, r8, r2 @ DT end
|
|
bl cache_clean_flush
|
|
|
|
adr r0, 0f @ switch to our stack
|
|
ldr sp, [r0]
|
|
add sp, sp, r0
|
|
|
|
mov r5, #0 @ appended DTB size
|
|
mov r7, #0xFFFFFFFF @ machine ID
|
|
b wont_overwrite
|
|
ENDPROC(efi_enter_kernel)
|
|
0: .long .L_user_stack_end - .
|
|
#endif
|
|
|
|
.align
|
|
.section ".stack", "aw", %nobits
|
|
.L_user_stack: .space 4096
|
|
.L_user_stack_end:
|