本文对arm linux页表创建函数进行说明。在http://blog.csdn.net/flaoter/article/details/73381695中对MMU使能之前的临时页表进行了说明,此文是对kernel中正式页表创建过程进行说明。文中使用的kernel版本为4.4。
arm linux使用两级页表,L1是pgd,L2是pte。其中L1页表共2048项,每项占用8bytes,每项对应2M内存,共占用2048*8=16K bytes。L1项指向的page中放有2个L2页表,每个页表256项,每项占用4 bytes,对应4K内存,共2*256*4=2K bytes,如下面代码中的h/w pt。Linux pt是linux管理用的页表。
arch/arm/include/asm/pgtable-2level.h
pgd pte
| |
+--------+
| | +------------+ +0
+- - - - + | Linux pt 0 |
| | +------------+ +1024
+--------+ +0 | Linux pt 1 |
| |-----> +------------+ +2048
+- - - - + +4 | h/w pt 0 |
| |-----> +------------+ +3072
+--------+ +8 | h/w pt 1 |
| | +------------+ +4096
页表创建通过函数create_mapping实现,在kernel初始化时的paging_init函数中会对memblock进行页表创建。
struct map_desc {
unsigned long virtual; //虚拟地址
unsigned long pfn; //page frame
unsigned long length; //地址的长度
unsigned int type;
};
struct mem_type {
pteval_t prot_pte;
pteval_t prot_pte_s2;
pmdval_t prot_l1;
pmdval_t prot_sect;
unsigned int domain;
};
static void __init create_mapping(struct map_desc *md)
{
unsigned long addr, length, end;
phys_addr_t phys;
const struct mem_type *type;
pgd_t *pgd;
if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) { //用户空间返回
pr_warn("BUG: not creating mapping for 0x%08llx at 0x%08lx in user region\n",
(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
return;
}
if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
(md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { //IO类型内存申请低端内存
pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
(long long)__pfn_to_phys((u64)md->pfn), md->virtual);
}
type = &mem_types[md->type];
addr = md->virtual & PAGE_MASK;
phys = __pfn_to_phys(md->pfn);
length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) { //非段映射,返回
pr_warn("BUG: map for 0x%08llx at 0x%08lx can not be mapped using pages, ignoring.\n",
(long long)__pfn_to_phys(md->pfn), addr);
return;
}
pgd = pgd_offset_k(addr); //(1)
end = addr + length;
do {
unsigned long next = pgd_addr_end(addr, end);
alloc_init_pud(pgd, addr, next, phys, type); //(2)
phys += next - addr;
addr = next;
} while (pgd++, addr != end);
}
(1) 查找一级页表项
#define PGDIR_SHIFT 21
#define pgd_index(addr) ((addr) >> PGDIR_SHIFT)
#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
#define pgd_offset_k(addr) pgd_offset(&init_mm, addr)
在kernel初始化汇编阶段的head.S中可知,一级页表的地址范围是0xC0004000~0xC0008000,即pgd的起始地址是0xC0004000, map_lowmem中addr=0xC0000000, pgd_offset_k(addr)解释为
(pgd_t*)0xC0004000 + (0xC0000000) >> 21
pgtable-2level-types.h
typedef u32 pmdval_t;
typedef pmdval_t pgd_t[2];
pgd_t类型是u32[2]的数组类型,所以(pgd_t*)0xC0004000 + (0xC0000000) >> 21=0xC0007000
(2) 调用alloc_init_pmd
static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
unsigned long end, phys_addr_t phys,
const struct mem_type *type)
{
pmd_t *pmd = pmd_offset(pud, addr);
unsigned long next;
do {
/*
* With LPAE, we must loop over to map
* all the pmds for the given range.
*/
next = pmd_addr_end(addr, end);
/*
* Try a section mapping - addr, next and phys must all be
* aligned to a section boundary.
*/
if (type->prot_sect &&
((addr | next | phys) & ~SECTION_MASK) == 0) {
__map_init_section(pmd, addr, next, phys, type); //(a)
} else {
alloc_init_pte(pmd, addr, next,
__phys_to_pfn(phys), type); //(b)
}
phys += next - addr;
} while (pmd++, addr = next, addr != end);
}
(a)段映射,
static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
unsigned long end, phys_addr_t phys,
const struct mem_type *type)
{
pmd_t *p = pmd;
#ifndef CONFIG_ARM_LPAE
/*
* In classic MMU format, puds and pmds are folded in to
* the pgds. pmd_offset gives the PGD entry. PGDs refer to a
* group of L1 entries making up one logical pointer to
* an L2 table (2MB), where as PMDs refer to the individual
* L1 entries (1MB). Hence increment to get the correct
* offset for odd 1MB sections.
* (See arch/arm/include/asm/pgtable-2level.h)
*/
if (addr & SECTION_SIZE)
pmd++;
#endif
do {
*pmd = __pmd(phys | type->prot_sect); //页表填充项是(phys | type->prot_sect)
phys += SECTION_SIZE;
} while (pmd++, addr += SECTION_SIZE, addr != end);
flush_pmd_entry(p); //刷新到内存
}
typedef u32 pmdval_t;
typedef pmdval_t pmd_t;
pmd_t是u32类型,指针值就是传进来的pgd_t,*pmd填充的内容即页表项就是(phys | type->prot_sect)。此处地址是按1M累加的,传进来的addr是按2M累加的,所以一般情况此函数的do{}while()循环都是执行两次。
下图是我的平台执行map_lowmem后的结果,一共占用了0x74项内容,建立了虚拟地址0xC0000000~0xC7300000到物理地址0x80000000~0x87300000的映射。
0xC0000000 –> 页表项地址0xC0007000 –>页表项内容0x8001141E –>物理地址0x80000000
(b) 二级页表映射
pte_t * __init early_pte_alloc(pmd_t *pmd, unsigned long addr, unsigned long prot)
{
if (pmd_none(*pmd)) { //如果L1页表没映射
pte_t *pte = early_alloc(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE); //PTE_HWTABLE_OFF=512*4 bytes,PTE_HWTABLE_SIZE=512*4bystes,共申请4K
__pmd_populate(pmd, __pa(pte), prot); //填充L1页表,使L1页表关联到L2页表
}
BUG_ON(pmd_bad(*pmd));
return pte_offset_kernel(pmd, addr);
}
void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
unsigned long end, unsigned long pfn,
const struct mem_type *type)
{
pte_t *pte = early_pte_alloc(pmd, addr, type->prot_l1); //pte为二级页表指针
do {
set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0); //填充L2页表项
pfn++; //L2页表每一项对应4k地址
} while (pte++, addr += PAGE_SIZE, addr != end);
}
本例中virtual_addr = 0xFFFF0000, virtual_end=0xFFFF1000, pfn=0x000873FC,只进行4K地址的映射,因此只需要填充二级页表的1项内容即可。
二级页表指针pte = 0xC73FF7C0, [0xC73FF7C0] = 0x873FC5DF。建立了虚拟地址0xFFFF0000到物理地址0x873FC000的映射关系。