RISCV Linux kernel 启动代码分析之六:setup_vm_final分析

文摘   2024-11-19 13:21   湖南  

.前言

前面我们分析了setup_vm以及relocate,并详细手算了对应的页表了解了此时页表映射了哪些虚拟地址到哪个物理地址。现在继续来看setup_vm_final该函数实现最终的映射。

二. 分析过程

该函数调用路径如下

head.Srelocate之后

tail start_kernel->

start_kernel(init/main.c)->

setup_arch(arch/riscv/kernel/setup.c)->

paging_init(arch/riscv/mm/init.c)->

setup_vm_final(arch/riscv/mm/init.c)

实现如下

static void __init setup_vm_final(void){    uintptr_t va, map_size;    phys_addr_t pa, start, end;    u64 i;
    /**     * MMU is enabled at this point. But page table setup is not complete yet.     * fixmap page table alloc functions should be used at this point     */    pt_ops.alloc_pte = alloc_pte_fixmap;    pt_ops.get_pte_virt = get_pte_virt_fixmap;#ifndef __PAGETABLE_PMD_FOLDED    pt_ops.alloc_pmd = alloc_pmd_fixmap;    pt_ops.get_pmd_virt = get_pmd_virt_fixmap;#endif    /* Setup swapper PGD for fixmap */    create_pgd_mapping(swapper_pg_dir, FIXADDR_START,               __pa_symbol(fixmap_pgd_next),               PGDIR_SIZE, PAGE_TABLE);    /* Map all memory banks */    for_each_mem_range(i, &start, &end) {        if (start >= end)            break;        if (start <= __pa(PAGE_OFFSET) &&            __pa(PAGE_OFFSET) < end)            start = __pa(PAGE_OFFSET);        map_size = best_map_size(start, end - start);        for (pa = start; pa < end; pa += map_size) {            va = (uintptr_t)__va(pa);            create_pgd_mapping(swapper_pg_dir, va, pa,                       map_size, PAGE_KERNEL_EXEC);        }    }    /* Clear fixmap PTE and PMD mappings */    clear_fixmap(FIX_PTE);    clear_fixmap(FIX_PMD);    /* Move to swapper page table */    csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);    local_flush_tlb_all();    /* generic page allocation functions must be used to setup page table */    pt_ops.alloc_pte = alloc_pte_late;    pt_ops.get_pte_virt = get_pte_virt_late;#ifndef __PAGETABLE_PMD_FOLDED    pt_ops.alloc_pmd = alloc_pmd_late;    pt_ops.get_pmd_virt = get_pmd_virt_late;#endif}

2.1 接口设置

开始设置接口

 /**     * MMU is enabled at this point. But page table setup is not complete yet.     * fixmap page table alloc functions should be used at this point     */    pt_ops.alloc_pte = alloc_pte_fixmap;    pt_ops.get_pte_virt = get_pte_virt_fixmap;#ifndef __PAGETABLE_PMD_FOLDED    pt_ops.alloc_pmd = alloc_pmd_fixmap;    pt_ops.get_pmd_virt = get_pmd_virt_fixmap;#endif

最后设置接口

 /* generic page allocation functions must be used to setup page table */    pt_ops.alloc_pte = alloc_pte_late;    pt_ops.get_pte_virt = get_pte_virt_late;#ifndef __PAGETABLE_PMD_FOLDED    pt_ops.alloc_pmd = alloc_pmd_late;    pt_ops.get_pmd_virt = get_pmd_virt_late;#endif

2.2配置根页表swapper_pg_dir

首先配置根页表swapper_pg_dir等下会从early_pg_dir切换到该页表

    /* Setup swapper PGD for fixmap */    create_pgd_mapping(swapper_pg_dir, FIXADDR_START,               __pa_symbol(fixmap_pgd_next),               PGDIR_SIZE, PAGE_TABLE);

此时参数为

setup_vm时一样,swapper_pg_dir315条目指向下一级fixmap_pmd

执行完后GDB查看如下,[315]位置的条目对应fixmap_pmd

(gdb) p /x swapper_pg_dir$1 = {{pgd = 0x0} <repeats 315 times>, {pgd = 0x2075e801}, {pgd = 0x0} <repeats 196 times>}(gdb)

2.3映射bank

   /* Map all memory banks */    for_each_mem_range(i, &start, &end) {        if (start >= end)            break;        if (start <= __pa(PAGE_OFFSET) &&            __pa(PAGE_OFFSET) < end)            start = __pa(PAGE_OFFSET);
        map_size = best_map_size(start, end - start);        for (pa = start; pa < end; pa += map_size) {            va = (uintptr_t)__va(pa);            create_pgd_mapping(swapper_pg_dir, va, pa,                       map_size, PAGE_KERNEL_EXEC);        }    }

for_each_mem_range

遍历所有块映射。

第一次,此时映射的范围是0x80200000~0x88000000

按照2MB单位进行映射

此时alloc_pgd_next

pt_ops.alloc_pmd(__va)

pt_ops.alloc_pmd = alloc_pmd_fixmap;

static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)

{

return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);

}

动态分配的pmd地址为0x87fff000

sz2M

条目值为0x21fffc01

0x87fff000转为虚拟地址nextp=0xffffffcefeffe000

继续下一级pdm条目的配置,指向对应的2MB物理地址

然后继续2MB配置,直到0x8800000

动态分配的pmd物理地址为0x87fff000

虚拟地址计算接口如下

   pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa){    clear_fixmap(FIX_PMD);    return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);}


#define set_fixmap_offset(idx, phys) \    __set_fixmap_offset(idx, phys, FIXMAP_PAGE_NORMAL)

/* Return a pointer with offset calculated */#define __set_fixmap_offset(idx, phys, flags)               \({                                  \    unsigned long ________addr;                 \    __set_fixmap(idx, phys, flags);                 \    ________addr = fix_to_virt(idx) + ((phys) & (PAGE_SIZE - 1));   \    ________addr;                           \})

fix_to_virt

static __always_inline unsigned long fix_to_virt(const unsigned int idx){    BUILD_BUG_ON(idx >= __end_of_fixed_addresses);    return __fix_to_virt(idx);}

其中

void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot){    unsigned long addr = __fix_to_virt(idx);    pte_t *ptep;
    BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);    ptep = &fixmap_pte[pte_index(addr)];    if (pgprot_val(prot))        set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));    else        pte_clear(&init_mm, addr, ptep);    local_flush_tlb_page(addr);}

通过fixmap_pte临时映射虚拟地址,对应如下地址,解决此时只能访问虚拟地址不能访问物理地址的问题

即将0x87fff000映射到了上述FIX_PMD页,然后通过去虚拟地址就可以访问该物理地址了。

#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))

xFIX_PMD=2

FIXADDR_TOP=0xffffffcefee00000+0x200000=0xffffffceff000000

0xffffffceff000000-(2<<12)=0xFFFF FFCE FEFF E000

get_pmd_virt_fixmap->set_fixmap_offset

此时查看该pmd的内容

p /x *(pmd_t (*)[512])(0xffffffcefeffe000)$38 = {{pmd = 0x200800ef}, {pmd = 0x201000ef}, {pmd = 0x201800ef}, {pmd = 0x202000ef}, {pmd = 0x202800ef}, {pmd = 0x203000ef}, {    pmd = 0x203800ef}, {pmd = 0x204000ef}, {pmd = 0x204800ef}, {pmd = 0x205000ef}, {pmd = 0x205800ef}, {pmd = 0x206000ef}, {    pmd = 0x206800ef}, {pmd = 0x207000ef}, {pmd = 0x207800ef}, {pmd = 0x208000ef}, {pmd = 0x208800ef}, {pmd = 0x209000ef}, {    pmd = 0x209800ef}, {pmd = 0x20a000ef}, {pmd = 0x20a800ef}, {pmd = 0x20b000ef}, {pmd = 0x20b800ef}, {pmd = 0x20c000ef}, {    pmd = 0x20c800ef}, {pmd = 0x20d000ef}, {pmd = 0x20d800ef}, {pmd = 0x20e000ef}, {pmd = 0x20e800ef}, {pmd = 0x20f000ef}, {    pmd = 0x20f800ef}, {pmd = 0x210000ef}, {pmd = 0x210800ef}, {pmd = 0x211000ef}, {pmd = 0x211800ef}, {pmd = 0x212000ef}, {    pmd = 0x212800ef}, {pmd = 0x213000ef}, {pmd = 0x213800ef}, {pmd = 0x214000ef}, {pmd = 0x214800ef}, {pmd = 0x215000ef}, {    pmd = 0x215800ef}, {pmd = 0x216000ef}, {pmd = 0x216800ef}, {pmd = 0x217000ef}, {pmd = 0x217800ef}, {pmd = 0x218000ef}, {    pmd = 0x218800ef}, {pmd = 0x219000ef}, {pmd = 0x219800ef}, {pmd = 0x21a000ef}, {pmd = 0x21a800ef}, {pmd = 0x21b000ef}, {    pmd = 0x21b800ef}, {pmd = 0x21c000ef}, {pmd = 0x21c800ef}, {pmd = 0x21d000ef}, {pmd = 0x21d800ef}, {pmd = 0x21e000ef}, {    pmd = 0x21e800ef}, {pmd = 0x21f000ef}, {pmd = 0x21f800ef}, {pmd = 0x0} <repeats 449 times>}(gdb)

以上看到

看到映射了632MB的块

一共126M

刚好是0x80200000~0x88000000的范围126MB

2.4清除fixmappte级别条目(FIX_PTEFIX_PMD对应的页)

    /* Clear fixmap PTE and PMD mappings */    clear_fixmap(FIX_PTE);    clear_fixmap(FIX_PMD);

其中include/asm-generic/fixmap.h

#ifndef clear_fixmap#define clear_fixmap(idx)           \    __set_fixmap(idx, 0, FIXMAP_PAGE_CLEAR)#endif

arch/riscv/mm/init.c

void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot){    unsigned long addr = __fix_to_virt(idx);    pte_t *ptep;
    BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);    ptep = &fixmap_pte[pte_index(addr)];    if (pgprot_val(prot))        set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));    else        pte_clear(&init_mm, addr, ptep);    local_flush_tlb_page(addr);}

FIX_PTE=1

FIX_PMD=2

FIX_PTEFIX_PMD对应如下的页表,PTEPMD的后级,所以先清除PTE条目,再清除PMD条目

对应

__set_fixmap(1,0,0)

__set_fixmap(2,0,0)

cleanPMD下有一个条目

(gdb) p /x fixmap_pmd$1 = {{pmd = 0x0} <repeats 503 times>, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {    pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}(gdb)

Pte下有一个条目

(gdb) p /x fixmap_pte$2 = {{pte = 0x0} <repeats 510 times>, {pte = 0x21fffce7}, {pte = 0x0}}(gdb)

来看__set_fixmap实现

   unsigned long addr = __fix_to_virt(idx);

include/asm-generic/fixmap.h

#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))

FIXADDR_TOP=0xffffffcefee00000+0x200000

所以addrPTEPMD时分别是

0xffffffcefee00000+0x200000-(1<<12)=FFFFFFCEFEFFF000

0xffffffcefee00000+0x200000-(2<<12)=FFFFFFCEFEFFE000

然后检查

BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);

页索引要在以下范围内

include/asm-generic/fixmap.h中的宏

enum fixed_addresses {    FIX_HOLE,    FIX_PTE,    FIX_PMD,    FIX_TEXT_POKE1,    FIX_TEXT_POKE0,    FIX_EARLYCON_MEM_BASE,
    __end_of_permanent_fixed_addresses,    /*     * Temporary boot-time mappings, used by early_ioremap(),     * before ioremap() is functional.     */#define NR_FIX_BTMAPS       (SZ_256K / PAGE_SIZE)#define FIX_BTMAPS_SLOTS    7#define TOTAL_FIX_BTMAPS    (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)    FIX_BTMAP_END = __end_of_permanent_fixed_addresses,    FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,    __end_of_fixed_addresses};

然后

ptep = &fixmap_pte[pte_index(addr)];

include/linux/pgtable.hPAGE_SHIFT=12PTRS_PER_PTE=512

static inline unsigned long pte_index(unsigned long address){    return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);}

所以

pte_index(0xFFFFFFCEFEFFF000)=511

pte_index(0xFFFFFFCEFEFFE000)=510

继续prot0,所以走

pte_clear(&init_mm, addr, ptep);

arch/riscv/include/asm/pgtable.h

static inline void pte_clear(struct mm_struct *mm,    unsigned long addr, pte_t *ptep){    set_pte_at(mm, addr, ptep, __pte(0));}

arch/riscv/include/asm/pgtable.h

static inline void set_pte_at(struct mm_struct *mm,    unsigned long addr, pte_t *ptep, pte_t pteval){    if (pte_present(pteval) && pte_exec(pteval))        flush_icache_pte(pteval);
    set_pte(ptep, pteval);}
static inline void set_pte(pte_t *ptep, pte_t pteval){    *ptep = pteval;}

即将ptep设置为0.

fixmap_pte[511]=0

fixmap_pte[510]=0

最后local_flush_tlb_page调用

sfence.vma刷新tlb

执行完这两句后,看到fixmap_pte[511]变为了0.

(gdb) p /x fixmap_pmd$4 = {{pmd = 0x0} <repeats 503 times>, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {    pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}(gdb)
(gdb) p /x fixmap_pte$5 = {{pte = 0x0} <repeats 512 times>}(gdb)

2.5切换页表

然后切换satpswapper_pg_dir

    /* Move to swapper page table */    csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);    local_flush_tlb_all();

arch/riscv/include/asm/page.h

#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned long)(x), 0))

#define __phys_addr_symbol(x) __va_to_pa_nodebug(x)

#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)

所以__pa_symbol(swapper_pg_dir)

计算物理地址就是&swapper_pg_dir-va_pa_offset

对应汇编代码如下

此时swapper_pg_dir地址为0xffffffe001b7e000对应寄存器a5

(gdb) p &swapper_pg_dir$1 = (pgd_t (*)[512]) 0xffffffe001b7e000 <swapper_pg_dir>(gdb)

变量va_pa_offset的值 0xffffffdf7fe00000为对应寄存器a4

实际就是(PAGE_OFFSET-load_pa=0xffffffe000000000-0x80200000).

(gdb) p /x va_pa_offset$1 = 0xffffffdf7fe00000(gdb)

计算完后值为0x81d7e000

0xffffffe001b7e000 -0xffffffdf7fe00000

include/linux/pfn.h

#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)

arch/riscv/include/asm/csr.h

#define SATP_MODE_39 _AC(0x8000000000000000, UL)

#define SATP_MODE SATP_MODE_39

所以写入satp寄存器的值是

(0x81d7e000>>12)|0x8000000000000000=0x8000000000081d7e 

然后

local_flush_tlb_all();即调用sfence.vma刷新tlb

arch/riscv/include/asm/tlbflush.h

/* Flush one page from local TLB */static inline void local_flush_tlb_page(unsigned long addr){#ifdef CONFIG_NO_SFENCE_VMA    csr_write(CSR_SMCIR, 1 << 26);#else    __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");#endif}

三. 设置之后页表

借助上一篇,我们实现了在mmu使能后继续使用GDB调试,我们可以直接跳到

setup_vm_final,一步步查看执行过程分析。

hb setup_vm_final 打断点到函数入口

C 全速运行到函数处

可以运行完后查看页表信息

(gdb) p &swapper_pg_dir$1 = (pgd_t (*)[512]) 0xffffffe001b7e000 <swapper_pg_dir>(gdb)
(gdb) p /x swapper_pg_dir$3 = {{pgd = 0x0} <repeats 315 times>, {pgd = 0x2075e801}, {pgd = 0x0} <repeats 68 times>, {pgd = 0x21fffc01}, { pgd = 0x0} <repeats 127 times>}(gdb)

swapper_pg_dir[315]指向的正是fixmap_pmd

(gdb) p &fixmap_pmd$4 = (pmd_t (*)[512]) 0xffffffe001b7a000 <fixmap_pmd>(gdb)
(gdb) p /x fixmap_pmd$5 = {{pmd = 0x0} <repeats 503 times>, {pmd = 0x2075f001}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}, { pmd = 0x0}, {pmd = 0x0}, {pmd = 0x0}}(gdb)

指向的 fixmap_pmd[503]正是fixmap_pte

(gdb) p & fixmap_pte$6 = (pte_t (*)[512]) 0xffffffe001b7c000 <fixmap_pte>(gdb)
(gdb) p /x fixmap_pte$7 = {{pte = 0x0} <repeats 512 times>}(gdb)

fixmap_pte后没有映射了,前面看到clean了。

swapper_pg_dir[384]条目值是

((x>>12 ) <<10)| 1=0x21fffc01

所以反推对应的pdb页表地址为0x87FFF000

此处对应的虚拟地址为0xffffffcefeffe000

PA-VA的计算方式前面已经分析

通过fixmap_pte[FIX_PMD]映射对应物理地址0x87fff000,虚拟地址是

0xffffffcefeffe000

p /x *(pmd_t (*)[512])(0xffffffcefeffe000)$38 = {{pmd = 0x200800ef}, {pmd = 0x201000ef}, {pmd = 0x201800ef}, {pmd = 0x202000ef}, {pmd = 0x202800ef}, {pmd = 0x203000ef}, {    pmd = 0x203800ef}, {pmd = 0x204000ef}, {pmd = 0x204800ef}, {pmd = 0x205000ef}, {pmd = 0x205800ef}, {pmd = 0x206000ef}, {    pmd = 0x206800ef}, {pmd = 0x207000ef}, {pmd = 0x207800ef}, {pmd = 0x208000ef}, {pmd = 0x208800ef}, {pmd = 0x209000ef}, {    pmd = 0x209800ef}, {pmd = 0x20a000ef}, {pmd = 0x20a800ef}, {pmd = 0x20b000ef}, {pmd = 0x20b800ef}, {pmd = 0x20c000ef}, {    pmd = 0x20c800ef}, {pmd = 0x20d000ef}, {pmd = 0x20d800ef}, {pmd = 0x20e000ef}, {pmd = 0x20e800ef}, {pmd = 0x20f000ef}, {    pmd = 0x20f800ef}, {pmd = 0x210000ef}, {pmd = 0x210800ef}, {pmd = 0x211000ef}, {pmd = 0x211800ef}, {pmd = 0x212000ef}, {    pmd = 0x212800ef}, {pmd = 0x213000ef}, {pmd = 0x213800ef}, {pmd = 0x214000ef}, {pmd = 0x214800ef}, {pmd = 0x215000ef}, {    pmd = 0x215800ef}, {pmd = 0x216000ef}, {pmd = 0x216800ef}, {pmd = 0x217000ef}, {pmd = 0x217800ef}, {pmd = 0x218000ef}, {    pmd = 0x218800ef}, {pmd = 0x219000ef}, {pmd = 0x219800ef}, {pmd = 0x21a000ef}, {pmd = 0x21a800ef}, {pmd = 0x21b000ef}, {    pmd = 0x21b800ef}, {pmd = 0x21c000ef}, {pmd = 0x21c800ef}, {pmd = 0x21d000ef}, {pmd = 0x21d800ef}, {pmd = 0x21e000ef}, {    pmd = 0x21e800ef}, {pmd = 0x21f000ef}, {pmd = 0x21f800ef}, {pmd = 0x0} <repeats 449 times>}(gdb)

可以看到最终页表如下

其中

pmd_t xxx_pmd[PTRS_PER_PMD]

0x87fff000是动态分配出来的PMD

PMD要通过虚拟地址访问,则需要先对齐进行映射,这是通过

pmd_t fixmap_pmd[PTRS_PER_PMD]

下映射pte_t fixmap_pte[PTRS_PER_PTE]

下映射一个4KB的页来实现的,这个映射是临时的,访问完xxx_pmd即可clean

四. 总结

setup_vm_final最终切换到了swapper_pg_dir这个页表,映射了PAGE_OFFSET0xffffffe000000000开始的126MB0x80200000开始的126MB物理地址。

进行上述映射动态分配了xxx_pmd需要访问该地址,则现在使能了MMU不能直接访问物理地址,所以需要先借助fixmap_pmd->fixmap_pte临时映射一个xxx_pmd物理地址对应的虚拟地址,以便设置xxx_pmd的内容。这就是fixmap_pmd/pte的作用。




























嵌入式Lee
嵌入式软硬件技术:RTOS,GUI,FS,协议栈,ARM,总线,嵌入式C,开发环境 and blablaba....多年经验分享,非硬货不发,带你扒开每一个技术背后的根本原理。
 最新文章