整个过程是SPL→U-Boot→Linux。前面我们分析了**SPL调用U-Boot的过程,**接下来再分析一下U-Boot调用Linux的过程。
U-Boot代码追踪
U-Boot的执行过程同样是从start.S的reset开始。
在_main函数中我们会再次调用board_init_f函数,但是这时调用的函数与SPL阶段的board_init_f函数是不一样的。这里调用的board_init_f函数是在arch/arm/lib/board.c中定义的。
我们挑拣一些重点代码来说明。
1、board_init_f函数的代码
gd->mon_len = (ulong)&__bss_end - (ulong)_start; /*初始化gd->mon_len为U-Boot cod代码的大小*/ for (init_fnc_ptr = init_sequence; *init_fnc_ptr; ++init_fnc_ptr) { if ((*init_fnc_ptr)() != 0) { hang (); } } /*For循环遍历调用init_sequence中的所有函数,init_sequence的定义如下*/ init_fnc_t *init_sequence[] = { arch_cpu_init, /* basic arch cpu dependent setup */ mark_bootstage, #ifdef CONFIG_OF_CONTROL fdtdec_check_fdt, #endif #if defined(CONFIG_BOARD_EARLY_INIT_F) board_early_init_f, #endif timer_init, /* initialize timer */ #ifdef CONFIG_BOARD_POSTCLK_INIT board_postclk_init, #endif #ifdef CONFIG_FSL_ESDHC get_clocks, #endif env_init, /* initialize environment */ init_baudrate, /* initialze baudrate settings */ serial_init, /* serial communications setup */ console_init_f, /* stage 1 init of console */ display_banner, /* say that we are here */ print_cpuinfo, /* display cpu info (and speed) */ #if defined(CONFIG_DISPLAY_BOARDINFO) checkboard, /* display board info */ #endif #if defined(CONFIG_HARD_I2C) || defined(CONFIG_SYS_I2C) init_func_i2c, #endif dram_init, /* configure available RAM banks */ NULL, };
在这一系列函数中,我们最关心的是init_baudrate、serial_init、console_init_f和dram_init这几个初始化函数。前面三个函数依次初始化波特率、串口和打印终端。
dram_init函数对gd->ram_size进行初始化,以便board_init_f函数后续的代码对dram的空间进行分配。
#if defined(CONFIG_SYS_MEM_TOP_HIDE) /* * Subtract specified amount of memory to hide so that it won't * get "touched" at all by U-Boot. By fixing up gd->ram_size * the Linux kernel should now get passed the now "corrected" * memory size and won't touch it either. This should work * for arch/ppc and arch/powerpc. Only Linux board ports in * arch/powerpc with bootwrapper support, that recalculate the * memory size from the SDRAM controller setup will have to * get fixed. */ gd->ram_size -= CONFIG_SYS_MEM_TOP_HIDE; #endif
注释表明这个操作是适用于powerpc架构的,那么这个宏定义肯定是不起作用的,因此我们就无须分析了。
addr = CONFIG_SYS_SDRAM_BASE + get_effective_memsize();
为addr赋值,具体由SDRAM的基址和有效的memsize相加而成,也就是addr为SDRAM的顶端地址。
#ifdef CONFIG_LOGBUFFER #ifndef CONFIG_ALT_LB_ADDR /* reserve kernel log buffer */ addr -= (LOGBUFF_RESERVE); debug("Reserving %dk for kernel logbuffer at %08lx\n", LOGBUFF_LEN, addr); #endif #endif #ifdef CONFIG_PRAM /* * reserve protected RAM */ reg = getenv_ulong("pram", 10, CONFIG_PRAM); addr -= (reg << 10); /* size is in kB */ debug("Reserving %ldk for protected RAM at %08lx\n", reg, addr); #endif /* CONFIG_PRAM */ #if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) /* reserve TLB table */ gd->arch.tlb_size = PGTABLE_SIZE; addr -= gd->arch.tlb_size; /* round down to next 64 kB limit */ addr &= ~(0x10000 - 1); gd->arch.tlb_addr = addr; debug("TLB table from %08lx to %08lx\n", addr, addr + gd->arch.tlb_size); #endif
这一段代码为log buffer、pram和TLB table分配地址,因为宏定义都是关闭的,所以我们也不考虑。
/* round down to next 4 kB limit */ addr &= ~(4096 - 1); debug("Top of RAM usable for U-Boot at: %08lx\n", addr);
如果上面有分配的话,这里将addr的值进行4KB字节对齐。
/* * reserve memory for U-Boot code, data & bss * round down to next 4 kB limit */ addr -= gd->mon_len; addr &= ~(4096 - 1); debug("Reserving %ldk for U-Boot at: %08lx\n", gd->mon_len >> 10, addr);
这里将为U-Boot的代码、数据和BSS段预留空间,然后再次进行4KB字节对齐。
#ifndef CONFIG_SPL_BUILD /* * reserve memory for malloc() arena */ addr_sp = addr - TOTAL_MALLOC_LEN; debug("Reserving %dk for malloc() at: %08lx\n", TOTAL_MALLOC_LEN >> 10, addr_sp); /* * (permanently) allocate a Board Info struct * and a permanent copy of the "global" data */ addr_sp -= sizeof (bd_t); bd = (bd_t *) addr_sp; gd->bd = bd; debug("Reserving %zu Bytes for Board Info at: %08lx\n", sizeof (bd_t), addr_sp); #ifdef CONFIG_MACH_TYPE gd->bd->bi_arch_number = CONFIG_MACH_TYPE; /* board id for Linux */ #endif addr_sp -= sizeof (gd_t); id = (gd_t *) addr_sp; debug("Reserving %zu Bytes for Global Data at: %08lx\n", sizeof (gd_t), addr_sp); #if defined(CONFIG_OF_SEPARATE) && defined(CONFIG_OF_CONTROL) /* * If the device tree is sitting immediate above our image then we * must relocate it. If it is embedded in the data section, then it * will be relocated with other data. */ if (gd->fdt_blob) { fdt_size = ALIGN(fdt_totalsize(gd->fdt_blob) + 0x1000, 32); addr_sp -= fdt_size; new_fdt = (void *)addr_sp; debug("Reserving %zu Bytes for FDT at: %08lx\n", fdt_size, addr_sp); } #endif #ifndef CONFIG_ARM64 /* setup stackpointer for exeptions */ gd->irq_sp = addr_sp; #ifdef CONFIG_USE_IRQ addr_sp -= (CONFIG_STACKSIZE_IRQ+CONFIG_STACKSIZE_FIQ); debug("Reserving %zu Bytes for IRQ stack at: %08lx\n", CONFIG_STACKSIZE_IRQ+CONFIG_STACKSIZE_FIQ, addr_sp); #endif /* leave 3 words for abort-stack */ addr_sp -= 12; /* 8-byte alignment for ABI compliance */ addr_sp &= ~0x07; #else /* CONFIG_ARM64 */ /* 16-byte alignment for ABI compliance */ addr_sp &= ~0x0f; #endif /* CONFIG_ARM64 */ #else addr_sp += 128; /* leave 32 words for abort-stack */ gd->irq_sp = addr_sp; #endif debug("New Stack Pointer is: %08lx\n", addr_sp);
首先预留malloc len,这里预定义的长度为TOTAL_MALLOC_LEN。TOTAL_MALLOC_LEN的定义在include/common.h中。
注释说明,为bd、gd做一个永久的copy;留出全局信息bd_t结构体的空间,首地址存在于gd->bd;留出gd_t结构体的空间,首地址存在于ID中。将此地址保存在gd->irq_sp中作为异常栈指针。在U-Boot中我们没有用到中断。最后为abort stack留出12字节。
到这里,addr_sp值就确定了,总结一下addr_sp之上的空间分配,由高到低有:addr–>malloc len–>bd len–>gd len–>12 byte–>addr_sp(栈往下增长,addr_sp之下的空间作为栈空间)。
最后一部分代码如下:
gd->bd->bi_baudrate = gd->baudrate; /* Ram ist board specific, so move it to board code ... */ dram_init_banksize(); display_dram_config(); /* and display it */ gd->relocaddr = addr; gd->start_addr_sp = addr_sp; gd->reloc_off = addr - (ulong)&_start; debug("relocation Offset is: %08lx\n", gd->reloc_off); if (new_fdt) { memcpy(new_fdt, gd->fdt_blob, fdt_size); gd->fdt_blob = new_fdt; }
首先将bd->bi_baudrate赋值为gd->baudrate,gd->baudrate在前面的baudrate_init中已经完成了初始化。
dram_init_banksize()是需要实现的板级函数。根据板上ddrc获取ddr的bank信息,并填充在gd->bd->bi_dram[CONFIG_NR_DRAM_BANKS]中。
gd->relocaaddr为目标addr,gd->start_addr_sp为目标addr_sp,gd->reloc_off为目标addr与现在实际代码起始地址的偏移。reloc_off非常重要,会作为后面relocate_code函数的参数,以实现代码的复制。
最后将gd结构体的数据复制到新的地址ID上。
board_init_f函数将SDRAM空间重新进行了划分,可以看出栈空间和堆空间是分开的。
至此,board_init_f结束,回到_main函数。 因为在U-Boot中没有CONFIG_SPL_BUILD的定义,所以我们会在arch/arm/lib/crt0.S里的_main函数中调用relocate_code代码。
2、relocate_code
#if ! defined(CONFIG_SPL_BUILD) /* * Set up intermediate environment (new sp and gd) and call * relocate_code(addr_moni). Trick here is that we'll return * 'here' but relocated. */ ldr sp, [r9, #GD_START_ADDR_SP] /* sp = gd->start_addr_sp */ bic sp, sp, #7 /* 8-byte alignment for ABI compliance */ ldr r9, [r9, #GD_BD] /* r9 = gd->bd */ sub r9, r9, #GD_SIZE /* new GD is below bd */ adr lr, here ldr r0, [r9, #GD_RELOC_OFF] /* r0 = gd->reloc_off */ add lr, lr, r0 ldr r0, [r9, #GD_RELOCADDR] /* r0 = gd->relocaddr */ b relocate_code here: /* Set up final (full) environment */ bl c_runtime_cpu_setup /* we still call old routine here */ ldr r0, =__bss_start /* this is auto-relocated! */ ldr r1, =__bss_end /* this is auto-relocated! */ mov r2, #0x00000000 /* prepare zero to clear BSS */ clbss_l:cmp r0, r1 /* while not at end of BSS */ strlo r2, [r0] /* clear 32-bit BSS word */ addlo r0, r0, #4 /* move to next */ blo clbss_l bl coloured_LED_init bl red_led_on /* call board_init_r(gd_t *id, ulong dest_addr) */ mov r0, r9 /* gd_t */ ldr r1, [r9, #GD_RELOCADDR] /* dest_addr */ /* call board_init_r */ ldr pc, =board_init_r /* this is auto-relocated! */ /* we should not return here. */ #endif
我们先来看第一部分代码:
/* * Set up intermediate environment (new sp and gd) and call * relocate_code(addr_moni). Trick here is that we'll return * 'here' but relocated. */ ldr sp, [r9, #GD_START_ADDR_SP] /* sp = gd->start_addr_sp */ bic sp, sp, #7 /* 8-byte alignment for ABI compliance */ ldr r9, [r9, #GD_BD] /* r9 = gd->bd */ sub r9, r9, #GD_SIZE /* new GD is below bd */ adr lr, here ldr r0, [r9, #GD_RELOC_OFF] /* r0 = gd->reloc_off */ add lr, lr, r0 ldr r0, [r9, #GD_RELOCADDR] /* r0 = gd->relocaddr */ b relocate_code
这段注释写得很清晰,建立中间环境(新的sp和gd),然后调用relocate_code(addr_moni)。注意,从relocate_code返回时已经是重定位过的here地址了。
(这个过程在那副图中也展示过)
看到没,这里有个重定位得过程。
这段注释写得很清晰,建立中间环境(新的sp和gd),然后调用relocate_code(addr_moni)。注意,从relocate_code返回时已经是重定位过的here地址了。
前4条汇编实现了新gd结构体的更新:首先更新sp,并且将sp 8字节对齐,以便于后面函数开辟的新的调用栈能对齐,然后获取gd->bd地址到r9中,需要注意,在board_init_f中gd->bd已经更新为新分配的bd了,下一条汇编将r9减掉gd的SIZE,这样就获取到了board_init_f中新分配的gd了。
后面的汇编则是为relocate_code做准备,首先加载here地址,然后加上新地址偏移量给lr,得到的是代码重定位后的新here地址了,relocate_code返回条转到lr,也就是新位置的here。
最后在r0中保存代码的新地址,并跳转到relocate_code。relocate_code函数在arch/arm/lib/relocate.S中实现:
ENTRY(relocate_code) ldr r1, =__image_copy_start /* r1 <- SRC &__image_copy_start */ subs r4, r0, r1 /* r4 <- relocation offset */ beq relocate_done /* skip relocation */ ldr r2, =__image_copy_end /* r2 <- SRC &__image_copy_end */ copy_loop: ldmia r1!, {r10-r11} /* copy from source address [r1] */ stmia r0!, {r10-r11} /* copy to target address [r0] */ cmp r1, r2 /* until source end address [r2] */ blo copy_loop /* * fix .rel.dyn relocations */ ldr r2, =__rel_dyn_start /* r2 <- SRC &__rel_dyn_start */ ldr r3, =__rel_dyn_end /* r3 <- SRC &__rel_dyn_end */ fixloop: ldmia r2!, {r0-r1} /* (r0,r1) <- (SRC location,fixup) */ and r1, r1, #0xff cmp r1, #23 /* relative fixup */ bne fixnext /* relative fix: increase location by offset */ add r0, r0, r4 ldr r1, [r0] add r1, r1, r4 str r1, [r0] fixnext: cmp r2, r3 blo fixloop relocate_done: #ifdef __XSCALE__ /* * On xscale, icache must be invalidated and write buffers drained, * even with cache disabled - 4.2.7 of xscale core developer's manual */ mcr p15, 0, r0, c7, c7, 0 /* invalidate icache */ mcr p15, 0, r0, c7, c10, 4 /* drain write buffer */ #endif /* ARMv4- don't know bx lr but the assembler fails to see that */ #ifdef __ARM_ARCH_4__ mov pc, lr #else bx lr #endif ENDPROC(relocate_code)
relocate_code函数分成两个部分。第一个部分为:
ldr r1, =__image_copy_start /* r1 <- SRC &__image_copy_start */ subs r4, r0, r1 /* r4 <- relocation offset */ beq relocate_done /* skip relocation */ ldr r2, =__image_copy_end /* r2 <- SRC &__image_copy_end */ copy_loop: ldmia r1!, {r10-r11} /* copy from source address [r1] */ stmia r0!, {r10-r11} /* copy to target address [r0] */ cmp r1, r2 /* until source end address [r2] */ blo copy_loop
第一个部分为复制,即将__image_copy_start和__image_copy_end之间的数据复制到新的地址处。__image_copy_start和__image_copy_end在arch/arm/cpu/u-boot.lds中定义。 来看看u-boot.lds的代码:
#include <config.h> OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm") OUTPUT_ARCH(arm) ENTRY(_start) SECTIONS { . = 0x00000000; . = ALIGN(4); .text : { *(.__image_copy_start) *(.vectors) CPUDIR/start.o (.text*) *(.text*) } #ifdef CONFIG_ARMV7_NONSEC #ifndef CONFIG_ARMV7_SECURE_BASE #define CONFIG_ARMV7_SECURE_BASE #endif .__secure_start : { . = ALIGN(0x1000); *(.__secure_start) } .secure_text CONFIG_ARMV7_SECURE_BASE : AT(ADDR(.__secure_start) + SIZEOF(.__secure_start)) { *(._secure.text) } . = LOADADDR(.__secure_start) + SIZEOF(.__secure_start) + SIZEOF(.secure_text); __secure_end_lma = .; .__secure_end : AT(__secure_end_lma) { *(.__secure_end) LONG(0x1d1071c); /* Must output something to reset LMA */ } #endif . = ALIGN(4); .rodata : { *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.rodata*))) } . = ALIGN(4); .data : { *(.data*) } . = ALIGN(4); . = .; . = ALIGN(4); .u_boot_list : { KEEP(*(SORT(.u_boot_list*))); } . = ALIGN(4); .image_copy_end : { *(.__image_copy_end) } .rel_dyn_start : { *(.__rel_dyn_start) } .rel.dyn : { *(.rel*) } .rel_dyn_end : { *(.__rel_dyn_end) } .end : { *(.__end) }
__image_copy_start和__image_copy_end之间包含了.text段、.rodata段和.data段。在这里可以看到.rel.dyn段跟在其后,也就是说不会复制.rel.dyn段。
代码的第二部分才是真正完成重定位的代码:
/* * fix .rel.dyn relocations */ ldr r2, =__rel_dyn_start /* r2 <- SRC &__rel_dyn_start */ ldr r3, =__rel_dyn_end /* r3 <- SRC &__rel_dyn_end */ fixloop: ldmia r2!, {r0-r1} /* (r0,r1) <- (SRC location,fixup) */ and r1, r1, #0xff cmp r1, #23 /* relative fixup */ bne fixnext /* relative fix: increase location by offset */ add r0, r0, r4 ldr r1, [r0] add r1, r1, r4 str r1, [r0] fixnext: cmp r2, r3 blo fixloop relocate_done:
首先在r2中保存__rel_dyn_start,在r3中保存__rel_dyn_end。然后在这个地址范围内进行遍历,首先从地址处取连续两个字节地址的值存放在r0和r1中,取r1中的低8位值,并与0x17比较,如果相等,表明是需要重定位的数据。因为在rel.dyn段中每一个rel section(8个字节)的第二个4字节如果是0x17,则表明其是类型R_ARM_RELATIVE。如果是需要重定位的类型,则将地址加上r4的值以进行计算重定位。
完成重定位之后,我们重新回到_main函数中,剩下的操作就很简单了,首先调用c_runtime_cpu_setup函数,然后清除重定位后的BSS段,最后调用arch/arm/lib/board.c文件中的board_init_r函数。该函数会进行很多初始化操作,在这里就不一一分析了,在函数最后调用main_loop函数。
board_init_r函数代码片段
697 /* main_loop() can return to retry autoboot, if so just run it again. */ 698 for (;;) { 699 main_loop(); 700 }
在main_loop函数中将会调用process_boot_delay函数,这个函数会有一个倒计时:
Hit any key to stop autoboot: 3
如果在定义时间内没有按键,那么就自动去引导系统,其思路和SPL类似,在此留给读者去分析;如果有按键按下,就进入U-Boot的命令行,在里面可以键入“help”查看所有可用的命令。
(难怪 uboot源码中还需要解析命令行参数)
到这里就完成了整个UBoot,整个流程。对于这个部分,我后面打算做个流程图 来 梳理一下这个过程。期待。下一步该回到TEEOS了
参考资料:
《深入理解BootLoader》