/* * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ #include #include #include #include #include .globl smc .globl zeromem .globl zero_normalmem .globl memcpy4 .globl disable_mmu_icache_secure .globl disable_mmu_secure .globl fixup_gdt_reloc #define PAGE_START_MASK ~(PAGE_SIZE_MASK) func smc /* * For AArch32 only r0-r3 will be in the registers; * rest r4-r6 will be pushed on to the stack. So here, we'll * have to load them from the stack to registers r4-r6 explicitly. * Clobbers: r4-r6 */ ldm sp, {r4, r5, r6} smc #0 endfunc smc /* ----------------------------------------------------------------------- * void zeromem(void *mem, unsigned int length) * * Initialise a region in normal memory to 0. This functions complies with the * AAPCS and can be called from C code. * * ----------------------------------------------------------------------- */ func zeromem /* * Readable names for registers * * Registers r0, r1 and r2 are also set by zeromem which * branches into the fallback path directly, so cursor, length and * stop_address should not be retargeted to other registers. */ cursor .req r0 /* Start address and then current address */ length .req r1 /* Length in bytes of the region to zero out */ /* * Reusing the r1 register as length is only used at the beginning of * the function. */ stop_address .req r1 /* Address past the last zeroed byte */ zeroreg1 .req r2 /* Source register filled with 0 */ zeroreg2 .req r3 /* Source register filled with 0 */ tmp .req r12 /* Temporary scratch register */ mov zeroreg1, #0 /* stop_address is the address past the last to zero */ add stop_address, cursor, length /* * Length cannot be used anymore as it shares the same register with * stop_address. */ .unreq length /* * If the start address is already aligned to 8 bytes, skip this loop. */ tst cursor, #(8-1) beq .Lzeromem_8bytes_aligned /* Calculate the next address aligned to 8 bytes */ orr tmp, cursor, #(8-1) adds tmp, tmp, #1 /* If it overflows, fallback to byte per byte zeroing */ beq .Lzeromem_1byte_aligned /* If the next aligned address is after the stop address, fall back */ cmp tmp, stop_address bhs .Lzeromem_1byte_aligned /* zero byte per byte */ 1: strb zeroreg1, [cursor], #1 cmp cursor, tmp bne 1b /* zero 8 bytes at a time */ .Lzeromem_8bytes_aligned: /* Calculate the last 8 bytes aligned address. */ bic tmp, stop_address, #(8-1) cmp cursor, tmp bhs 2f mov zeroreg2, #0 1: stmia cursor!, {zeroreg1, zeroreg2} cmp cursor, tmp blo 1b 2: /* zero byte per byte */ .Lzeromem_1byte_aligned: cmp cursor, stop_address beq 2f 1: strb zeroreg1, [cursor], #1 cmp cursor, stop_address bne 1b 2: bx lr .unreq cursor /* * length is already unreq'ed to reuse the register for another * variable. */ .unreq stop_address .unreq zeroreg1 .unreq zeroreg2 .unreq tmp endfunc zeromem /* * AArch32 does not have special ways of zeroing normal memory as AArch64 does * using the DC ZVA instruction, so we just alias zero_normalmem to zeromem. */ .equ zero_normalmem, zeromem /* -------------------------------------------------------------------------- * void memcpy4(void *dest, const void *src, unsigned int length) * * Copy length bytes from memory area src to memory area dest. * The memory areas should not overlap. * Destination and source addresses must be 4-byte aligned. * -------------------------------------------------------------------------- */ func memcpy4 #if ENABLE_ASSERTIONS orr r3, r0, r1 tst r3, #0x3 ASM_ASSERT(eq) #endif /* copy 4 bytes at a time */ m_loop4: cmp r2, #4 blo m_loop1 ldr r3, [r1], #4 str r3, [r0], #4 subs r2, r2, #4 bne m_loop4 bx lr /* copy byte per byte */ m_loop1: ldrb r3, [r1], #1 strb r3, [r0], #1 subs r2, r2, #1 bne m_loop1 bx lr endfunc memcpy4 /* --------------------------------------------------------------------------- * Disable the MMU in Secure State * --------------------------------------------------------------------------- */ func disable_mmu_secure mov r1, #(SCTLR_M_BIT | SCTLR_C_BIT) do_disable_mmu: #if ERRATA_A9_794073 stcopr r0, BPIALL dsb #endif ldcopr r0, SCTLR bic r0, r0, r1 stcopr r0, SCTLR isb // ensure MMU is off dsb sy bx lr endfunc disable_mmu_secure func disable_mmu_icache_secure ldr r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT) b do_disable_mmu endfunc disable_mmu_icache_secure /* --------------------------------------------------------------------------- * Helper to fixup Global Descriptor table (GDT) and dynamic relocations * (.rel.dyn) at runtime. * * This function is meant to be used when the firmware is compiled with -fpie * and linked with -pie options. We rely on the linker script exporting * appropriate markers for start and end of the section. For GOT, we * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect * __RELA_START__ and __RELA_END__. * * The function takes the limits of the memory to apply fixups to as * arguments (which is usually the limits of the relocable BL image). * r0 - the start of the fixup region * r1 - the limit of the fixup region * These addresses have to be 4KB page aligned. * --------------------------------------------------------------------------- */ /* Relocation codes */ #define R_ARM_RELATIVE 23 func fixup_gdt_reloc mov r6, r0 mov r7, r1 #if ENABLE_ASSERTIONS /* Test if the limits are 4K aligned */ orr r0, r0, r1 mov r1, #(PAGE_SIZE_MASK) tst r0, r1 ASM_ASSERT(eq) #endif /* * Calculate the offset based on return address in lr. * Assume that this function is called within a page at the start of * fixup region. */ ldr r1, =PAGE_START_MASK and r2, lr, r1 subs r0, r2, r6 /* Diff(S) = Current Address - Compiled Address */ beq 3f /* Diff(S) = 0. No relocation needed */ ldr r1, =__GOT_START__ add r1, r1, r0 ldr r2, =__GOT_END__ add r2, r2, r0 /* * GOT is an array of 32_bit addresses which must be fixed up as * new_addr = old_addr + Diff(S). * The new_addr is the address currently the binary is executing from * and old_addr is the address at compile time. */ 1: ldr r3, [r1] /* Skip adding offset if address is < lower limit */ cmp r3, r6 blo 2f /* Skip adding offset if address is > upper limit */ cmp r3, r7 bhi 2f add r3, r3, r0 str r3, [r1] 2: add r1, r1, #4 cmp r1, r2 blo 1b /* Starting dynamic relocations. Use ldr to get RELA_START and END */ 3: ldr r1, =__RELA_START__ add r1, r1, r0 ldr r2, =__RELA_END__ add r2, r2, r0 /* * According to ELF-32 specification, the RELA data structure is as * follows: * typedef struct { * Elf32_Addr r_offset; * Elf32_Xword r_info; * } Elf32_Rela; * * r_offset is address of reference * r_info is symbol index and type of relocation (in this case * code 23 which corresponds to R_ARM_RELATIVE). * * Size of Elf32_Rela structure is 8 bytes. */ /* Skip R_ARM_NONE entry with code 0 */ 1: ldr r3, [r1, #4] ands r3, r3, #0xff beq 2f #if ENABLE_ASSERTIONS /* Assert that the relocation type is R_ARM_RELATIVE */ cmp r3, #R_ARM_RELATIVE ASM_ASSERT(eq) #endif ldr r3, [r1] /* r_offset */ add r3, r0, r3 /* Diff(S) + r_offset */ ldr r4, [r3] /* Skip adding offset if address is < lower limit */ cmp r4, r6 blo 2f /* Skip adding offset if address is > upper limit */ cmp r4, r7 bhi 2f add r4, r0, r4 str r4, [r3] 2: add r1, r1, #8 cmp r1, r2 blo 1b bx lr endfunc fixup_gdt_reloc