一.前言
前面我们分析了setup_vm以及relocate,并详细手算了对应的页表了解了此时页表映射了哪些虚拟地址到哪个物理地址。现在继续来看setup_vm_final该函数实现最终的映射。
二. 分析过程
该函数调用路径如下
head.S的relocate之后
tail start_kernel->
start_kernel(init/main.c)->
setup_arch(arch/riscv/kernel/setup.c)->
paging_init(arch/riscv/mm/init.c)->
setup_vm_final(arch/riscv/mm/init.c)
实现如下
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
phys_addr_t pa, start, end;
u64 i;
/**
* MMU is enabled at this point. But page table setup is not complete yet.
* fixmap page table alloc functions should be used at this point
*/
pt_ops.alloc_pte = alloc_pte_fixmap;
pt_ops.get_pte_virt = get_pte_virt_fixmap;
pt_ops.alloc_pmd = alloc_pmd_fixmap;
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
map_size, PAGE_KERNEL_EXEC);
}
}
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
/* generic page allocation functions must be used to setup page table */
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
}
2.1 接口设置
开始设置接口
/**
* MMU is enabled at this point. But page table setup is not complete yet.
* fixmap page table alloc functions should be used at this point
*/
pt_ops.alloc_pte = alloc_pte_fixmap;
pt_ops.get_pte_virt = get_pte_virt_fixmap;
pt_ops.alloc_pmd = alloc_pmd_fixmap;
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
最后设置接口
/* generic page allocation functions must be used to setup page table */
pt_ops.alloc_pte = alloc_pte_late;
pt_ops.get_pte_virt = get_pte_virt_late;
pt_ops.alloc_pmd = alloc_pmd_late;
pt_ops.get_pmd_virt = get_pmd_virt_late;
2.2配置根页表swapper_pg_dir
首先配置根页表swapper_pg_dir等下会从early_pg_dir切换到该页表
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
此时参数为
和setup_vm时一样,swapper_pg_dir的315条目指向下一级fixmap_pmd
执行完后GDB查看如下,[315]位置的条目对应fixmap_pmd
(gdb) p /x swapper_pg_dir
$1 = {{pgd = 0x0} <repeats 315 times>, {pgd = 0x2075e801}, {pgd = 0x0} <repeats 196 times>}
(gdb)
2.3映射bank
/* Map all memory banks */
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET);
map_size = best_map_size(start, end - start);
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
map_size, PAGE_KERNEL_EXEC);
}
}
for_each_mem_range
遍历所有块映射。
第一次,此时映射的范围是0x80200000~0x88000000
按照2MB单位进行映射
此时alloc_pgd_next
pt_ops.alloc_pmd(__va)
pt_ops.alloc_pmd = alloc_pmd_fixmap;
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
{
return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
}
动态分配的pmd地址为0x87fff000
sz为2M
条目值为0x21fffc01
0x87fff000转为虚拟地址nextp=0xffffffcefeffe000
继续下一级pdm条目的配置,指向对应的2MB物理地址
然后继续2MB配置,直到0x8800000
动态分配的pmd物理地址为0x87fff000
虚拟地址计算接口如下
pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
{
clear_fixmap(FIX_PMD);
return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
}
__set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL)
/* Return a pointer with offset calculated */
({ \
unsigned long ________addr; \
__set_fixmap(idx, phys, flags); \
________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1)); \
________addr; \
})
fix_to_virt
static __always_inline unsigned long fix_to_virt(const unsigned int idx)
{
BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
return __fix_to_virt(idx);
}
其中
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
ptep = &fixmap_pte[pte_index(addr)];
if (pgprot_val(prot))
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
else
pte_clear(&init_mm, addr, ptep);
local_flush_tlb_page(addr);
}
通过fixmap_pte临时映射虚拟地址,对应如下地址,解决此时只能访问虚拟地址不能访问物理地址的问题
即将0x87fff000映射到了上述FIX_PMD页,然后通过去虚拟地址就可以访问该物理地址了。
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
x为FIX_PMD=2
FIXADDR_TOP=0xffffffcefee00000+0x200000=0xffffffceff000000
0xffffffceff000000-(2<<12)=0xFFFF FFCE FEFF E000
get_pmd_virt_fixmap->set_fixmap_offset
此时查看该pmd的内容
p /x *(pmd_t (*)[512])(0xffffffcefeffe000)
$38 = {{pmd = 0x200800ef}, {pmd = 0x201000ef}, {pmd = 0x201800ef}, {pmd = 0x202000ef}, {pmd = 0x202800ef}, {pmd = 0x203000ef}, {
pmd = 0x203800ef}, {pmd = 0x204000ef}, {pmd = 0x204800ef}, {pmd = 0x205000ef}, {pmd = 0x205800ef}, {pmd = 0x206000ef}, {
pmd = 0x206800ef}, {pmd = 0x207000ef}, {pmd = 0x207800ef}, {pmd = 0x208000ef}, {pmd = 0x208800ef}, {pmd = 0x209000ef}, {
pmd = 0x209800ef}, {pmd = 0x20a000ef}, {pmd = 0x20a800ef}, {pmd = 0x20b000ef}, {pmd = 0x20b800ef}, {pmd = 0x20c000ef}, {
pmd = 0x20c800ef}, {pmd = 0x20d000ef}, {pmd = 0x20d800ef}, {pmd = 0x20e000ef}, {pmd = 0x20e800ef}, {pmd = 0x20f000ef}, {
pmd = 0x20f800ef}, {pmd = 0x210000ef}, {pmd = 0x210800ef}, {pmd = 0x211000ef}, {pmd = 0x211800ef}, {pmd = 0x212000ef}, {
pmd = 0x212800ef}, {pmd = 0x213000ef}, {pmd = 0x213800ef}, {pmd = 0x214000ef}, {pmd = 0x214800ef}, {pmd = 0x215000ef}, {
pmd = 0x215800ef}, {pmd = 0x216000ef}, {pmd = 0x216800ef}, {pmd = 0x217000ef}, {pmd = 0x217800ef}, {pmd = 0x218000ef}, {
pmd = 0x218800ef}, {pmd = 0x219000ef}, {pmd = 0x219800ef}, {pmd = 0x21a000ef}, {pmd = 0x21a800ef}, {pmd = 0x21b000ef}, {
pmd = 0x21b800ef}, {pmd = 0x21c000ef}, {pmd = 0x21c800ef}, {pmd = 0x21d000ef}, {pmd = 0x21d800ef}, {pmd = 0x21e000ef}, {
pmd = 0x21e800ef}, {pmd = 0x21f000ef}, {pmd = 0x21f800ef}, {pmd = 0x0} <repeats 449 times>}
(gdb)
以上看到
看到映射了63个2MB的块
一共126M
刚好是0x80200000~0x88000000的范围126MB。
2.4清除fixmap的pte级别条目(FIX_PTE和FIX_PMD对应的页)
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
其中include/asm-generic/fixmap.h
__set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR)
arch/riscv/mm/init.c中
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
unsigned long addr = __fix_to_virt(idx);
pte_t *ptep;
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
ptep = &fixmap_pte[pte_index(addr)];
if (pgprot_val(prot))
set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
else
pte_clear(&init_mm, addr, ptep);
local_flush_tlb_page(addr);
}
FIX_PTE=1
FIX_PMD=2
FIX_PTE和FIX_PMD对应如下的页表,PTE是PMD的后级,所以先清除PTE条目,再清除PMD条目
对应
__set_fixmap(1,0,0)
__set_fixmap(2,0,0)
clean前PMD下有一个条目
(gdb) p /x fixmap_pmd
$1 = {{pmd = 0x0} <repeats 503 times>, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {
pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}
(gdb)
Pte下有一个条目
(gdb) p /x fixmap_pte
$2 = {{pte = 0x0} <repeats 510 times>, {pte = 0x21fffce7}, {pte = 0x0}}
(gdb)
来看__set_fixmap实现
unsigned long addr = __fix_to_virt(idx);
include/asm-generic/fixmap.h中
#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
FIXADDR_TOP=0xffffffcefee00000+0x200000
所以addr在PTE和PMD时分别是
0xffffffcefee00000+0x200000-(1<<12)=FFFFFFCEFEFFF000
0xffffffcefee00000+0x200000-(2<<12)=FFFFFFCEFEFFE000
然后检查
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
页索引要在以下范围内
即include/asm-generic/fixmap.h中的宏
enum fixed_addresses {
FIX_HOLE,
FIX_PTE,
FIX_PMD,
FIX_TEXT_POKE1,
FIX_TEXT_POKE0,
FIX_EARLYCON_MEM_BASE,
__end_of_permanent_fixed_addresses,
/*
* Temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*/
FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
__end_of_fixed_addresses
};
然后
ptep = &fixmap_pte[pte_index(addr)];
include/linux/pgtable.h中PAGE_SHIFT=12,PTRS_PER_PTE=512
static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
所以
pte_index(0xFFFFFFCEFEFFF000)=511
pte_index(0xFFFFFFCEFEFFE000)=510
继续prot为0,所以走
pte_clear(&init_mm, addr, ptep);
arch/riscv/include/asm/pgtable.h中
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
arch/riscv/include/asm/pgtable.h中
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
flush_icache_pte(pteval);
set_pte(ptep, pteval);
}
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
即将ptep设置为0.
即
fixmap_pte[511]=0
fixmap_pte[510]=0
最后local_flush_tlb_page调用
sfence.vma刷新tlb
执行完这两句后,看到fixmap_pte[511]变为了0.
(gdb) p /x fixmap_pmd
$4 = {{pmd = 0x0} <repeats 503 times>, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {
pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}
(gdb)
(gdb) p /x fixmap_pte
$5 = {{pte = 0x0} <repeats 512 times>}
(gdb)
2.5切换页表
然后切换satp到swapper_pg_dir
/* Move to swapper page table */
csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
arch/riscv/include/asm/page.h中
#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))
#define __phys_addr_symbol(x) __va_to_pa_nodebug(x)
#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
所以__pa_symbol(swapper_pg_dir)
计算物理地址就是&swapper_pg_dir-va_pa_offset
对应汇编代码如下
此时swapper_pg_dir地址为0xffffffe001b7e000对应寄存器a5
(gdb) p &swapper_pg_dir
$1 = (pgd_t (*)[512]) 0xffffffe001b7e000 <swapper_pg_dir>
(gdb)
变量va_pa_offset的值 0xffffffdf7fe00000为对应寄存器a4
实际就是(PAGE_OFFSET-load_pa=0xffffffe000000000-0x80200000).
(gdb) p /x va_pa_offset
$1 = 0xffffffdf7fe00000
(gdb)
计算完后值为0x81d7e000即
0xffffffe001b7e000 -0xffffffdf7fe00000
include/linux/pfn.h中
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
arch/riscv/include/asm/csr.h中
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE SATP_MODE_39
所以写入satp寄存器的值是
(0x81d7e000>>12)|0x8000000000000000=0x8000000000081d7e
然后
local_flush_tlb_all();即调用sfence.vma刷新tlb。
arch/riscv/include/asm/tlbflush.h中
/* Flush one page from local TLB */
static inline void local_flush_tlb_page(unsigned long addr)
{
csr_write(CSR_SMCIR, 1 << 26);
__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
}
三. 设置之后页表
借助上一篇,我们实现了在mmu使能后继续使用GDB调试,我们可以直接跳到
setup_vm_final,一步步查看执行过程分析。
hb setup_vm_final 打断点到函数入口
C 全速运行到函数处
可以运行完后查看页表信息
(gdb) p &swapper_pg_dir
$1 = (pgd_t (*)[512]) 0xffffffe001b7e000 <swapper_pg_dir>
(gdb)
(gdb) p /x swapper_pg_dir
$3 = {{pgd = 0x0} <repeats 315 times>, {pgd = 0x2075e801}, {pgd = 0x0} <repeats 68 times>, {pgd = 0x21fffc01}, {
pgd = 0x0} <repeats 127 times>}
(gdb)
swapper_pg_dir[315]指向的正是fixmap_pmd
(gdb) p &fixmap_pmd
$4 = (pmd_t (*)[512]) 0xffffffe001b7a000 <fixmap_pmd>
(gdb)
(gdb) p /x fixmap_pmd
$5 = {{pmd = 0x0} <repeats 503 times>, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {
pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}
(gdb)
指向的 fixmap_pmd[503]正是fixmap_pte
(gdb) p & fixmap_pte
$6 = (pte_t (*)[512]) 0xffffffe001b7c000 <fixmap_pte>
(gdb)
(gdb) p /x fixmap_pte
$7 = {{pte = 0x0} <repeats 512 times>}
(gdb)
fixmap_pte后没有映射了,前面看到clean了。
swapper_pg_dir[384]条目值是
((x>>12 ) <<10)| 1=0x21fffc01
所以反推对应的pdb页表地址为0x87FFF000。
此处对应的虚拟地址为0xffffffcefeffe000
PA-VA的计算方式前面已经分析
通过fixmap_pte[FIX_PMD]映射对应物理地址0x87fff000,虚拟地址是
0xffffffcefeffe000。
p /x *(pmd_t (*)[512])(0xffffffcefeffe000)
$38 = {{pmd = 0x200800ef}, {pmd = 0x201000ef}, {pmd = 0x201800ef}, {pmd = 0x202000ef}, {pmd = 0x202800ef}, {pmd = 0x203000ef}, {
pmd = 0x203800ef}, {pmd = 0x204000ef}, {pmd = 0x204800ef}, {pmd = 0x205000ef}, {pmd = 0x205800ef}, {pmd = 0x206000ef}, {
pmd = 0x206800ef}, {pmd = 0x207000ef}, {pmd = 0x207800ef}, {pmd = 0x208000ef}, {pmd = 0x208800ef}, {pmd = 0x209000ef}, {
pmd = 0x209800ef}, {pmd = 0x20a000ef}, {pmd = 0x20a800ef}, {pmd = 0x20b000ef}, {pmd = 0x20b800ef}, {pmd = 0x20c000ef}, {
pmd = 0x20c800ef}, {pmd = 0x20d000ef}, {pmd = 0x20d800ef}, {pmd = 0x20e000ef}, {pmd = 0x20e800ef}, {pmd = 0x20f000ef}, {
pmd = 0x20f800ef}, {pmd = 0x210000ef}, {pmd = 0x210800ef}, {pmd = 0x211000ef}, {pmd = 0x211800ef}, {pmd = 0x212000ef}, {
pmd = 0x212800ef}, {pmd = 0x213000ef}, {pmd = 0x213800ef}, {pmd = 0x214000ef}, {pmd = 0x214800ef}, {pmd = 0x215000ef}, {
pmd = 0x215800ef}, {pmd = 0x216000ef}, {pmd = 0x216800ef}, {pmd = 0x217000ef}, {pmd = 0x217800ef}, {pmd = 0x218000ef}, {
pmd = 0x218800ef}, {pmd = 0x219000ef}, {pmd = 0x219800ef}, {pmd = 0x21a000ef}, {pmd = 0x21a800ef}, {pmd = 0x21b000ef}, {
pmd = 0x21b800ef}, {pmd = 0x21c000ef}, {pmd = 0x21c800ef}, {pmd = 0x21d000ef}, {pmd = 0x21d800ef}, {pmd = 0x21e000ef}, {
pmd = 0x21e800ef}, {pmd = 0x21f000ef}, {pmd = 0x21f800ef}, {pmd = 0x0} <repeats 449 times>}
(gdb)
可以看到最终页表如下
其中
pmd_t xxx_pmd[PTRS_PER_PMD]
0x87fff000是动态分配出来的PMD
该PMD要通过虚拟地址访问,则需要先对齐进行映射,这是通过
pmd_t fixmap_pmd[PTRS_PER_PMD]
下映射pte_t fixmap_pte[PTRS_PER_PTE]
下映射一个4KB的页来实现的,这个映射是临时的,访问完xxx_pmd即可clean。
四. 总结
setup_vm_final最终切换到了swapper_pg_dir这个页表,映射了PAGE_OFFSET即0xffffffe000000000开始的126MB到0x80200000开始的126MB物理地址。
进行上述映射动态分配了xxx_pmd需要访问该地址,则现在使能了MMU不能直接访问物理地址,所以需要先借助fixmap_pmd->fixmap_pte临时映射一个xxx_pmd物理地址对应的虚拟地址,以便设置xxx_pmd的内容。这就是fixmap_pmd/pte的作用。