一. 前言
本文主要分析RISCV linux kernel的启动汇编部分代码。先结合链接脚本和汇编代码介绍镜像头和启动汇编代码部分的执行过程。后面文章再详细分析重定向,mmu设置等相关重点内容。
代码路径arch/riscv/kernel/head.S, 不同版本内核可能有点差异。
二. 根据链接脚本查找入口与镜像头介绍
2.1 链接脚本
内核根目录Makefile中
export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds
指定了链接脚本
实际来自于arch/riscv/kernel/vmlinux.lds.S
include/asm-generic/vmlinux.lds.h
scripts/link-vmlinux.sh中ld链接时指定-T参数,指定该链接脚本
local lds="${objtree}/${KBUILD_LDS}"
2.2 通过链接脚本对应汇编入口
链接脚本vmlinux.lds最开始处
/* Beginning of code and text segment */
. = LOAD_OFFSET;
_start = .;
HEAD_TEXT_SECTION
. = ALIGN(PAGE_SIZE);
即对应的入口
查找_start即可搜索到对应的代码位于
arch/riscv/kernel/head.S
来看该处汇编代码
首先宏
__HEAD
在include/linux/init.h中定义
#define __HEAD .section ".head.text","ax"
即后续代码放在段.head.text中,ax表示可执行。
然后是
ENTRY(_start)
...
END(_start)
ENTRY,END的宏在include/linux/linkage.h中定义
/* deprecated, use SYM_FUNC_START */
SYM_FUNC_START(name)
/* deprecated, use SYM_FUNC_END, SYM_DATA_END, or SYM_END */
.size name, .-name
SYM_FUNC_START又定义如下
/* SYM_FUNC_START -- use for global functions */
/*
* The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
* later.
*/
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
SYM_START又定义如下
/* SYM_START -- use only if you have to */
SYM_ENTRY(name, linkage, align)
SYM_ENTRY又定义如下
/* SYM_ENTRY -- use only if you have to for non-paired symbols */
linkage(name) ASM_NL \
align ASM_NL \
name:
SYM_L_GLOBAL定义如下
#define SYM_L_GLOBAL(name) .globl name
ASM_NL定义如下
最终展开如下,即定义了标签_start:
.globl _start;
align ;
_start:
......
.size _start, .-_start
其中.size指定符号的大小.-_start表示当前位置减去_start标签的位置,即_start到该处的大小。
arch/riscv/kernel/vmlinux.lds.S中
SECTIONS
{
/* Beginning of code and text segment */
. = LOAD_OFFSET;
_start = .;
HEAD_TEXT_SECTION
HEAD_TEXT_SECTION在include/asm-generic/vmlinux.lds.h
中定义
/* Section used for early init (in .S files) */
.head.text : AT(ADDR(.head.text) - LOAD_OFFSET) { \
HEAD_TEXT \
}
即head.S最开始的代码放在了
.head.text段,即LOAD_OFFSET开始处。
而LOAD_OFFSET是
#define LOAD_OFFSET PAGE_OFFSET
arch/riscv/Kconfig中可menuconfig配置
config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
最后根据menuconfig配置生成的.config中
CONFIG_PAGE_OFFSET=0xffffffe000000000
arch/riscv/Makefile中最终会使用该宏
KBUILD_CFLAGS += -DCONFIG_PAGE_OFFSET=$(CONFIG_PAGE_OFFSET)
arch/riscv/include/asm/page.h中使用该宏
/*
* PAGE_OFFSET -- the first address of the first page of memory.
* When not using MMU this corresponds to the first free page in
* physical memory (aligned on a page boundary).
*/
_AC宏定义在include/uapi/linux/const.h
汇编时不变(定义了__ASSEMBLY__时)
否则为__AC, 即X##Y即加后缀ul, 即0xffffffe000000000ul
使用以下命令查看vmlinux中的段对应
riscv64-unknown-linux-gnu-readelf -S -l output/vmlinux
Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .head.text PROGBITS ffffffe000000000 00001000
0000000000001ea4 0000000000000000 AX 0 0 4096
可以看到 MAXPHYSMEM_128GB,64BIT时,链接地址是0xffffffe000000000(虚拟地址),实际会加载到0x80200000处执行,MMU配置之后重定向使用0xffffffe000000000(虚拟地址)。
2.3镜像头
Output/vmlinux是elf格式输出文件,而arch/riscv/boot/Image是二进制镜像,完全和内核代码二进制对应。Image的开始处即head.s处的代码。最开始的一段字节即镜像头
头格式见arch/riscv/include/asm/image.h
对于EFI是4K对齐的头,对于非EFI是64字节。
/**
* struct riscv_image_header - riscv kernel image header
* @code0: Executable code
* @code1: Executable code
* @text_offset: Image load offset (little endian)
* @image_size: Effective Image size (little endian)
* @flags: kernel flags (little endian)
* @version: version
* @res1: reserved
* @res2: reserved
* @magic: Magic number (RISC-V specific; deprecated)
* @magic2: Magic number 2 (to match the ARM64 'magic' field pos)
* @res3: reserved (will be used for PE COFF offset)
*
* The intention is for this header format to be shared between multiple
* architectures to avoid a proliferation of image header formats.
*/
struct riscv_image_header {
u32 code0;
u32 code1;
u64 text_offset;
u64 image_size;
u64 flags;
u32 version;
u32 res1;
u64 res2;
u64 magic;
u32 magic2;
u32 res3;
};
看汇编代码head.S
/*
* Image header expected by Linux boot-loaders. The image header data
* structure is described in asm/image.h.
* Do not modify it without modifying the structure and all bootloaders
* that expects this header format!!
*/
/*
* This instruction decodes to "MZ" ASCII required by UEFI.
*/
c.li s4,-13
j _start_kernel
/* jump to start kernel */
j _start_kernel
/* reserved */
.word 0
.balign 8
.balign 8表示后面从8字节开始对齐。前面8字节就对应code0和code1.
前面如果支持EFI(配置了CONFIG_EFI)则是
c.li s4,-13
j _start_kernel
否则是
j _start_kernel
.word 0
即配置支持EFI则最开始两字节必须是”MZ”,这里通过指令c.li s4,-13的指令码凑出来。
然后是跳转到_start_kernel处的指令。
如果是不支持EFI则直接是跳转指令,后面填充0.
riscv64-unknown-linux-gnu-objdump -l -S vmlinux > a.s可以查看到汇编指令
ffffffe000000000 <_start>:
_start():
arch/riscv/kernel/head.S:29
*/
#ifdef CONFIG_EFI
/*
* This instruction decodes to "MZ" ASCII required by UEFI.
*/
c.li s4,-13
ffffffe000000000: 5a4d li s4,-13
arch/riscv/kernel/head.S:30
j _start_kernel
ffffffe000000002: 7ff0106f j ffffffe000002000 <_start_kernel>
ffffffe000000006: 0001 nop
ffffffe000000008: 0000 unimp
ffffffe00000000a: 0020 addi s0,sp,8
ffffffe00000000c: 0000 unimp
ffffffe00000000e: 0000 unimp
ffffffe000000010: e000 sd s0,0(s0)
ffffffe000000012: 01bc addi a5,sp,200
也可以直接查看Image的二进制值
hexdump arch/riscv/boot/Image -C -n 64
00000000 4d 5a 6f 10 f0 7f 01 00 00 00 20 00 00 00 00 00 |MZo....... .....|
00000010 00 e0 1c 01 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000020 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000030 52 49 53 43 56 00 00 00 52 53 43 05 40 00 00 00 |RISCV...RSC.@...|
00000040
可以看到这里配置支持EFI,有”MZ”开头。
然后接下来是text_offset,如果配置了CONFIG_RISCV_M_MODE则偏移为0,否则32位偏移0x400000, 64位偏移0x200000.
/* Image load offset (0MB) from start of RAM for M-mode */
.dword 0
/* Image load offset(2MB) from start of RAM */
.dword 0x200000
/* Image load offset(4MB) from start of RAM */
.dword 0x400000
然后是镜像大小,image_size
/* Effective size of kernel image */
.dword _end - _start
这里通过两个符号_end - _start获取,即
arch/riscv/kernel/vmlinux.lds.S中的符号,分别代表镜像的开始结束位置。
_start = .;
_end = .;
这里看到的值是
00 e0 1c 01 00 00 00 00小端, 0x11CE000=18669568
查看镜像大小
ls -al arch/riscv/boot/Image
-rw-r--r-- 1 18314240 Oct 31 22:15 arch/riscv/boot/Image
(??这里为什么实际大小和头中大小对应不上)
然后是
.dword __HEAD_FLAGS
.word RISCV_HEADER_VERSION
.word 0
.dword 0
.ascii RISCV_IMAGE_MAGIC
.balign 4
.ascii RISCV_IMAGE_MAGIC2
u64 flags;
u32 version;
u32 res1;
u64 res2;
u64 magic;
u32 magic2;
00000000 4d 5a 6f 10 f0 7f 01 00 00 00 20 00 00 00 00 00 |MZo....... .....|
00000010 00 e0 1c 01 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000020 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000030 52 49 53 43 56 00 00 00 52 53 43 05 40 00 00 00 |RISCV...RSC.@...|
00000040
__HEAD_FLAGS在arch/riscv/include/asm/image.h中定义
这里是小端,展开后为0
RISCV_IMAGE_FLAG_#
Version为2
magic和magic2为RISCV_IMAGE_MAGIC和RISCV_IMAGE_MAGIC2
在arch/riscv/include/asm/image.h中定义
#define RISCV_IMAGE_MAGIC "RISCV\0\0\0"
#define RISCV_IMAGE_MAGIC2 "RSC\x05"
接下来
.word pe_head_start - _start
pe_head_start:
__EFI_PE_HEADER
.word 0
如果配置支持EFI则.word pe_head_start - _start,即后面pe_head_start位置到_start的偏移,
即包括这个.word本身前面的字节数,这里是是64字节40 00 00 00。
如果不支持EFI则这里是0
00000000 4d 5a 6f 10 f0 7f 01 00 00 00 20 00 00 00 00 00 |MZo....... .....|
00000010 00 e0 1c 01 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000020 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000030 52 49 53 43 56 00 00 00 52 53 43 05 40 00 00 00 |RISCV...RSC.@...|
00000040
如果支持EFI则__EFI_PE_HEADER后面放EFI头, EFI后面头按照0x1000对齐。
.balign 0x1000
efi_header_end:
在arch/riscv/kernel/efi-header.S中定义,EFI头这里不再详叙。
.macro __EFI_PE_HEADER
.long PE_MAGIC
coff_header:
.short IMAGE_FILE_MACHINE_RISCV64 // Machine
.short IMAGE_FILE_MACHINE_RISCV32 // Machine
.short section_count // NumberOfSections
.long 0 // TimeDateStamp
.long 0 // PointerToSymbolTable
.long 0 // NumberOfSymbols
.short section_table - optional_header // SizeOfOptionalHeader
.short IMAGE_FILE_DEBUG_STRIPPED | \
IMAGE_FILE_EXECUTABLE_IMAGE | \
IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics
optional_header:
.short PE_OPT_MAGIC_PE32PLUS // PE32+ format
.short PE_OPT_MAGIC_PE32 // PE32 format
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __pecoff_text_end - efi_header_end // SizeOfCode
.long __pecoff_data_virt_size // SizeOfInitializedData
.long 0 // SizeOfUninitializedData
.long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
.long efi_header_end - _start // BaseOfCode
.long __pecoff_text_end - _start // BaseOfData
extra_header_fields:
.quad 0 // ImageBase
.long PECOFF_SECTION_ALIGNMENT // SectionAlignment
.long PECOFF_FILE_ALIGNMENT // FileAlignment
.short 0 // MajorOperatingSystemVersion
.short 0 // MinorOperatingSystemVersion
.short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion
.short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion
.short 0 // MajorSubsystemVersion
.short 0 // MinorSubsystemVersion
.long 0 // Win32VersionValue
.long _end - _start // SizeOfImage
// Everything before the kernel image is considered part of the header
.long efi_header_end - _start // SizeOfHeaders
.long 0 // CheckSum
.short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem
.short 0 // DllCharacteristics
.quad 0 // SizeOfStackReserve
.quad 0 // SizeOfStackCommit
.quad 0 // SizeOfHeapReserve
.quad 0 // SizeOfHeapCommit
.long 0 // LoaderFlags
.long (section_table - .) / 8 // NumberOfRvaAndSizes
.quad 0 // ExportTable
.quad 0 // ImportTable
.quad 0 // ResourceTable
.quad 0 // ExceptionTable
.quad 0 // CertificationTable
.quad 0 // BaseRelocationTable
// Section table
section_table:
.ascii ".text\0\0\0"
.long __pecoff_text_end - efi_header_end // VirtualSize
.long efi_header_end - _start // VirtualAddress
.long __pecoff_text_end - efi_header_end // SizeOfRawData
.long efi_header_end - _start // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
.short 0 // NumberOfRelocations
.short 0 // NumberOfLineNumbers
.long IMAGE_SCN_CNT_CODE | \
IMAGE_SCN_MEM_READ | \
IMAGE_SCN_MEM_EXECUTE // Characteristics
.ascii ".data\0\0\0"
.long __pecoff_data_virt_size // VirtualSize
.long __pecoff_text_end - _start // VirtualAddress
.long __pecoff_data_raw_size // SizeOfRawData
.long __pecoff_text_end - _start // PointerToRawData
.long 0 // PointerToRelocations
.long 0 // PointerToLineNumbers
.short 0 // NumberOfRelocations
.short 0 // NumberOfLineNumbers
.long IMAGE_SCN_CNT_INITIALIZED_DATA | \
IMAGE_SCN_MEM_READ | \
IMAGE_SCN_MEM_WRITE // Characteristics
.set section_count, (. - section_table) / 40
.balign 0x1000
efi_header_end:
.endm
三. 启动汇编代码介绍
入口调用j _start_kernel跳到_start_kernel执行,对应代码如下
__INIT
ENTRY(_start_kernel)
Mask all interrupts */
csrw CSR_IE, zero
csrw CSR_IP, zero
#ifdef CONFIG_RISCV_M_MODE
flush the instruction cache */
fence.i
Reset all registers except ra, a0, a1 */
call reset_regs
/*
Setup a PMP to permit access to all of memory. Some machines may
not implement PMPs, so we set up a quick trap handler to just skip
touching the PMPs on any trap.
*/
la a0, pmp_done
csrw CSR_TVEC, a0
li a0, -1
csrw CSR_PMPADDR0, a0
li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
csrw CSR_PMPCFG0, a0
2
pmp_done:
/*
The hartid in a0 is expected later on, and we have no firmware
to hand it to us.
*/
csrr a0, CSR_MHARTID
#endif /* CONFIG_RISCV_M_MODE */
Load the global pointer */
push
norelax
la gp, __global_pointer$
pop
/*
Disable FPU to detect illegal usage of
floating point in kernel space
*/
li t0, SR_FS | SR_VS
csrc CSR_STATUS, t0
#ifdef CONFIG_SMP
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
:
#endif
Pick one hart to run the main boot sequence */
la a3, hart_lottery
li a2, 1
a3, a2, (a3)
bnez a3, .Lsecondary_start
Clear BSS for flat non-ELF images */
la a3, __bss_start
la a4, __bss_stop
ble a4, a3, clear_bss_done
clear_bss:
REG_S zero, (a3)
add a3, a3, RISCV_SZPTR
blt a3, a4, clear_bss
clear_bss_done:
Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
la a2, boot_cpu_hartid
REG_S a0, (a2)
Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
mv a0, s1
call setup_vm
#ifdef CONFIG_MMU
la a0, early_pg_dir
call relocate
#endif /* CONFIG_MMU */
call setup_trap_vector
Restore C environment */
la tp, init_task
sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union + THREAD_SIZE
#ifdef CONFIG_KASAN
call kasan_early_init
#endif
Start the kernel */
call soc_early_init
tail start_kernel
:
#ifdef CONFIG_SMP
Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
la a2, __cpu_up_task_pointer
add a1, a3, a1
add a2, a3, a2
/*
This hart didn't win the lottery, so we wait for the winning hart to
get far enough along the boot process that it should continue.
*/
:
FIXME: We should WFI to save some energy here. */
REG_L sp, (a1)
REG_L tp, (a2)
beqz sp, .Lwait_for_cpu_up
beqz tp, .Lwait_for_cpu_up
fence
tail secondary_start_common
#endif
END(_start_kernel)
首先
__INIT在
include/linux/init.h中定义
即后面代码放在了.init.text段,可执行
#define __INIT .section ".init.text","ax"
ENTRY(_start_kernel)
END(_start_kernel)
类似前面的ENTRY(_start) END(_start)
定义了标签_start_kernel
3.1 关闭中断
接着清除总中断和标志
/* Mask all interrupts */
csrw CSR_IE, zero
csrw CSR_IP, zero
CSR_IE,CSR_IP在arch/riscv/include/asm/csr.h中根据运行在M模式还是S模式分别定义为对应的CSR寄存器。
M模式
S模式
3.2 M模式配置
以下代码配置CONFIG_RISCV_M_MODE才执行
/* flush the instruction cache */
fence.i
/* Reset all registers except ra, a0, a1 */
call reset_regs
/*
* Setup a PMP to permit access to all of memory. Some machines may
* not implement PMPs, so we set up a quick trap handler to just skip
* touching the PMPs on any trap.
*/
la a0, pmp_done
csrw CSR_TVEC, a0
li a0, -1
csrw CSR_PMPADDR0, a0
li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
csrw CSR_PMPCFG0, a0
.align 2
pmp_done:
/*
* The hartid in a0 is expected later on, and we have no firmware
* to hand it to us.
*/
csrr a0, CSR_MHARTID
先fence.i flush指令cache
然后是call reset_regs清除所有寄存器,除了(ra a0 a1, ra是返回地址,a0记录了hardid,a1记录了设备树地址)
#ifdef CONFIG_RISCV_M_MODE
ENTRY(reset_regs)
li sp, 0
li gp, 0
li tp, 0
li t0, 0
li t1, 0
li t2, 0
li s0, 0
li s1, 0
li a2, 0
li a3, 0
li a4, 0
li a5, 0
li a6, 0
li a7, 0
li s2, 0
li s3, 0
li s4, 0
li s5, 0
li s6, 0
li s7, 0
li s8, 0
li s9, 0
li s10, 0
li s11, 0
li t3, 0
li t4, 0
li t5, 0
li t6, 0
csrw CSR_SCRATCH, 0
#ifdef CONFIG_FPU
csrr t0, CSR_MISA
andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
beqz t0, .Lreset_regs_done
li t1, SR_FS
csrs CSR_STATUS, t1
f0, zero
f1, zero
f2, zero
f3, zero
f4, zero
f5, zero
f6, zero
f7, zero
f8, zero
f9, zero
f10, zero
f11, zero
f12, zero
f13, zero
f14, zero
f15, zero
f16, zero
f17, zero
f18, zero
f19, zero
f20, zero
f21, zero
f22, zero
f23, zero
f24, zero
f25, zero
f26, zero
f27, zero
f28, zero
f29, zero
f30, zero
f31, zero
csrw fcsr, 0
note that the caller must clear SR_FS */
#endif /* CONFIG_FPU */
:
ret
END(reset_regs)
#endif /* CONFIG_RISCV_M_MODE */
然后设置PMP,设置所有区域都可读可写可执行。
/*
Setup a PMP to permit access to all of memory. Some machines may
not implement PMPs, so we set up a quick trap handler to just skip
touching the PMPs on any trap.
*/
la a0, pmp_done
csrw CSR_TVEC, a0
li a0, -1
csrw CSR_PMPADDR0, a0
li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
csrw CSR_PMPCFG0, a0
2
pmp_done:
这里有个特别处理,即先设置异常处理入口CSR_TVEC(根据运行在M模式还是S模式分别是CSR_MTVEC,CSR_STVEC)为pmp_done:, 这样如果不支持PMP则操作CSR_PMPADDR0时进入异常地址,直接跳到pmp_done:后继续执行,pmp_done:处2字节对齐。
la a0, pmp_done
csrw CSR_TVEC, a0
li a0, -1
csrw CSR_PMPADDR0, a0
即设置PMPADDR0寄存器为全F,这里写-1,对于32位即0xFFFFFFFF,对于64位即0xFFFFFFFFFFFFFFFF。
然后设置PMPCFG0的属性,可读可写可执行
li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X)
csrw CSR_PMPCFG0, a0
PMP的设置参考规格书he RISC-V Instruction Set Manual: Volume II的《3.7. Physical Memory Protection》
3.3获取当前ID
/*
* The hartid in a0 is expected later on, and we have no firmware
* to hand it to us.
*/
csrr a0, CSR_MHARTID
读hartid到a0
3.4设置GP
/* Load the global pointer */
.option push
.option norelax
la gp, __global_pointer$
.option pop
先push配置,然后修改为norelax配置,因为还未配置gp所以告诉编译器norelax不使用gp,最后pop恢复配置。
3.5关闭FPU/VECTOR
/*
* Disable FPU to detect illegal usage of
* floating point in kernel space
*/
li t0, SR_FS | SR_VS
csrc CSR_STATUS, t0
arch/riscv/include/asm/csr.h中定义
同样CSR_STATUS根据M模式还是S模式对应
CSR_MSTATUS,CSR_SSTATUS
3.6同构多核启动参数检查
#ifdef CONFIG_SMP
li t0, CONFIG_NR_CPUS
blt a0, t0, .Lgood_cores
tail .Lsecondary_park
.Lgood_cores:
#endif
对于M模式,a0是前面读出来的当前hardid
csrr a0, CSR_MHARTID
对于非M模式,a0是opensbi传入进来的参数,表示启动的hardid。
检查下a0要小于CONFIG_NR_CPUS
CONFIG_NR_CPUS在Kconfig中配置。
.config中
CONFIG_NR_CPUS=8
如果a0大于核数则进入以下,死循环。
.Lsecondary_park:
/* We lack SMP support or have too many harts, so park this hart */
wfi
j .Lsecondary_park
3.7选择核启动
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
li a2, 1
amoadd.w a3, a2, (a3)
bnez a3, .Lsecondary_start
hart_lottery是全局变量, 将其地址加载到a3,
然后通过 amoadd.w a3, a2, (a3)
将hart_lottery的值读出到a3,并将hart_lottrry递增1.
如果读出到a3的值不为0,说明别的核心先给其递增了,本核心没有抢占到,则跳到Lsecondary_start等待其他核心初始化。
读出a3为0则本核心抢占到,继续初始化。
3.8非启动核
.Lsecondary_start:
#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
la a2, __cpu_up_task_pointer
add a1, a3, a1
add a2, a3, a2
/*
* This hart didn't win the lottery, so we wait for the winning hart to
* get far enough along the boot process that it should continue.
*/
.Lwait_for_cpu_up:
/* FIXME: We should WFI to save some energy here. */
REG_L sp, (a1)
REG_L tp, (a2)
beqz sp, .Lwait_for_cpu_up
beqz tp, .Lwait_for_cpu_up
fence
tail secondary_start_common
#endif
先初始化异常入口为Lsecondary_park
la a3, .Lsecondary_park
csrw CSR_TVEC, a3
然后
获取a1为__cpu_up_stack_pointer[hartid]的地址
a2为__cpu_up_task_pointer[hartid]的地址,
64位则LGREG为3,32为为2,slli左移,
所以64位则偏移地址位haridx8,32位位haridx4
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
la a2, __cpu_up_task_pointer
add a1, a3, a1
add a2, a3, a2
然后不断读__cpu_up_stack_pointer[harid]
__cpu_up_task_pointer[hartid]直到两者不为0
注意fence flush i d cache。
/*
* This hart didn't win the lottery, so we wait for the winning hart to
* get far enough along the boot process that it should continue.
*/
.Lwait_for_cpu_up:
/* FIXME: We should WFI to save some energy here. */
REG_L sp, (a1)
REG_L tp, (a2)
beqz sp, .Lwait_for_cpu_up
beqz tp, .Lwait_for_cpu_up
fence
跳入
.global secondary_start_common
secondary_start_common:
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
call relocate
call setup_trap_vector
tail smp_callin
支持MMU则调用relocate,否则设置异常入口,调用smp_callin。
重定向和smp_callin后续再分析。
3.9启动核清除bss段
Clear BSS for flat non-ELF images */
la a3, __bss_start
la a4, __bss_stop
ble a4, a3, clear_bss_done
clear_bss:
REG_S zero, (a3)
add a3, a3, RISCV_SZPTR
blt a3, a4, clear_bss
clear_bss_done:
这里没什么特殊的从连接脚本中获取bss段开始结束地址
__bss_start, __bss_stop遍历将这段内存清除为0.
3.10保存hardid和设备树地址
Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
la a2, boot_cpu_hartid
REG_S a0, (a2)
a0 hardid
a1 设备树地址
保存到s0 s1
并将a0 hardid值保存到全局变量boot_cpu_hartid.
这里保存a0 a1是因为后面要调用c函数要使用这两个寄存器所以要保存。
3.11初始化栈指针
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
arch/riscv/include/asm/thread_info.h中
/* thread information allocation */
所以这里THREAD_SIZE为4k<<2=16k.
init_thread_union来源于
include/asm-generic/vmlinux.lds.h
预留的初始化栈大小空间
#define INIT_TASK_DATA(align) \
ALIGN(align); \ =
__start_init_task = .; \
init_thread_union = .; \
init_stack = .; \
\
\
__start_init_task + THREAD_SIZE; \ =
__end_init_task = .;
3.12 调用setup_vm
前面配置了sp栈准备好之后可以调用c。
mv a0, s1
call setup_vm
参数a0为设备树地址
后续再分析该部分mmu配置。
3.13 重定向
#ifdef CONFIG_MMU
la a0, early_pg_dir
call relocate
#endif /* CONFIG_MMU */
后面再分析重定向部分。
3.14 设置异常入口
call setup_trap_vector
设置异常入口为handle_exception
.align 2
setup_trap_vector:
/* Set trap vector to exception handler */
la a0, handle_exception
csrw CSR_TVEC, a0
/*
* Set sup0 scratch register to 0, indicating to exception vector that
* we are presently executing in kernel.
*/
csrw CSR_SCRATCH, zero
ret
3.15重新设置栈准备c环境
Restore C environment */
la tp, init_task
sw zero, TASK_TI_CPU(tp)
la sp, init_thread_union + THREAD_SIZE
全局变量init_task偏移TASK_TI_CPU处设置为0
#define TASK_TI_CPU 32 /* offsetof(struct task_struct, thread_info.cpu) */
即struct task_struct
init/init_task.c中的
struct task_struct init_task
重新设置sp指针到init_thread_union + THREAD_SIZE
前面已经使用了sp所以这里可再重新设置。
3.16 Kasan
#ifdef CONFIG_KASAN
call kasan_early_init
#endif
后续再分析。
3.17 早期初始化
call soc_early_init
后续再分析
3.18 启动内核
tail start_kernel
后续再分析
3.19 对齐
__PAGE_ALIGNED_BSS
/* Empty zero page */
.balign PAGE_SIZE
include/linux/linkage.h中,段
#define __PAGE_ALIGNED_BSS .section ".bss..page_aligned", "aw"
对齐大小为PAGE_SIZE
四. 总结
头信息对应如下
整个汇编代码的流程如下