作者:朱涵俊
链接:https://zhuanlan.zhihu.com/p/140274586
来源:知乎
著作权归作者所有。商业转载请联系作者获得授权,非商业转载请注明出处。
内存管理分为虚拟地址空间管理,物理内存管理,内核堆栈管理。
内核在启动的过程中需要不断申请内存,这些内存申请之后一般就不需要释放。频繁的申请释放会导致内存碎片,性能下降。因此内核堆kheap管理的原则是申请一次,终身使用。堆内存分为2类,一是小内存,而是页对齐的内存。因为有的内核对象需要页对齐,比如页表。
kernel/kheap.c
//由于只申请不释放,不会有很多内存块。主要是BIOS,内核数据等引起的。
#define MAX_HEAP_HOLE 10
#define MAX_SMALL_HOLE 100
#if 1
#define DEBUG_PRINT printk
#else
#define DEBUG_PRINT inline_printk
#endifstruct kheap_t;
struct kheap_t {ulong addr;ulong size;ulong orgin_size;struct kheap_t *pnext;
};
static struct kheap_t *pfree4k;
static struct kheap_t *phead4k;
static struct kheap_t heap_holes[MAX_HEAP_HOLE];
static struct kheap_t heap_small_holes[MAX_SMALL_HOLE];
__thread static spinlock_t spin_4k;
__thread static spinlock_t spin_small;
static struct kheap_t *pfree_small;
static struct kheap_t *phead_small;
void init_kheap()
{for (int i = 0; i < MAX_HEAP_HOLE - 1; i++) {heap_holes[i].pnext = &heap_holes[i + 1];}heap_holes[MAX_HEAP_HOLE - 1].pnext = 0;pfree4k = heap_holes;phead4k = 0;init_spinlock(&spin_4k);for (int i = 0; i < MAX_SMALL_HOLE - 1; i++) {heap_small_holes[i].pnext = &heap_small_holes[i + 1];}heap_small_holes[MAX_SMALL_HOLE - 1].pnext = 0;pfree_small = heap_small_holes;phead_small = 0;init_spinlock(&spin_small);}
void free_kheap_4k(ulong addr, ulong size)
{struct kheap_t *p;struct kheap_t *ph;ASSERT((addr & 0xfff) == 0 && (size & 0xfff) == 0); //4k alignedp = pfree4k;if (!p) {panic("MAX_HEAP_HOLE too small!\n");}spin_lock(&spin_4k);pfree4k = pfree4k->pnext;p->addr = addr;p->orgin_size = size;p->size = size;//sort by addr asc,early init can only use low kheapif (!phead4k || phead4k->addr > p->addr) {p->pnext = phead4k;phead4k = p;spin_unlock(&spin_4k);return;}ph = phead4k;while (ph->pnext && ph->pnext->addr < p->addr) {ph = ph->pnext;}p->pnext = ph->pnext;ph->pnext = p;spin_unlock(&spin_4k);DEBUG_PRINT("KHeap: Free:%lx,addr,size:%lx\n", addr, size);
}void *alloc_kheap_4k(ulong size)
{struct kheap_t *p;void *pret;DEBUG_PRINT("alloc 4k:%x\n", size);if (size & 0xfff)size = (size & ~0xfff) + 0x1000; //align 4kspin_lock(&spin_4k);p = phead4k;while (p && p->size < size)p = p->pnext;while (!p) {ulong newaddr;spin_unlock(&spin_4k);newaddr = alloc_phy_page();if (!newaddr)return (void *)0;free_kheap_4k(newaddr, PAGE_SIZE);spin_lock(&spin_4k);p = phead4k;while (p && p->size < size)p = p->pnext;}p->size -= size;if (p->size == 0) {//add to free link?struct kheap_t *ptr = phead4k;if (ptr == p) {phead4k = phead4k->pnext;}else if (ptr->pnext == p) {ptr->pnext = p->pnext;}else {while (ptr->pnext && ptr->pnext != p)ptr = ptr->pnext;if (!ptr->pnext)panic("corrupted link,FILE:%s,LINE:%d", __FILE__, __LINE__);ptr->pnext = p->pnext;}p->pnext = pfree4k;pfree4k = p;}pret = (void *)p->addr;p->addr += size;spin_unlock(&spin_4k);return pret;
}void free_kheap_small(ulong addr, ulong size)
{struct kheap_t *p;ASSERT((addr & 0xfff) == 0 && (size & 0xfff) == 0); //4k alignedspin_lock(&spin_small);p = pfree_small;if (!p) {panic("MAX_HEAP_HOLE too small!\n");}pfree_small = pfree_small->pnext;p->addr = addr;p->orgin_size = size;p->size = size;p->pnext = phead_small;phead_small = p;spin_unlock(&spin_small);DEBUG_PRINT("KHeap: Small Free:%lx,addr,size:%lx\n", addr, size);
}void *alloc_kheap_small(ulong size)
{struct kheap_t *p;void *pret;DEBUG_PRINT("alloc small:%lx\n", size);ASSERT(size < 0x1000); //smallspin_lock(&spin_small);p = phead_small;while (p && p->size < size)p = p->pnext;while (!p) {ulong newaddr;spin_unlock(&spin_small);newaddr = (ulong) alloc_kheap_4k(0x1000);if (!newaddr)return (void *)0;free_kheap_small(newaddr, 0x1000);spin_lock(&spin_small);p = phead_small;while (p && p->size < size)p = p->pnext;}p->size -= size;if (p->size <= 16) { //drop small than 16//add to free link?struct kheap_t *ptr = phead_small;if (ptr == p) {phead_small = phead_small->pnext;}else if (ptr->pnext == p) {ptr->pnext = p->pnext;}else {while (ptr->pnext && ptr->pnext != p)ptr = ptr->pnext;if (!ptr->pnext)panic("corrupted link,FILE:%s,LINE:%d", __FILE__, __LINE__);ptr->pnext = p->pnext;}p->pnext = pfree_small;pfree_small = p;}pret = (void *)p->addr;p->addr += size;spin_unlock(&spin_small);return pret;
}
void kheap_debug()
{struct kheap_t *ptr = phead4k;printk("kheap 4k:\n");while (ptr) {printk("addr:%lx,size:%lx\n", ptr->addr, ptr->size);ptr = ptr->pnext;}
}
初始化之后kheap是没有内存可以申请的,需要进行释放。
内核加载的位置是0x10000,即1M,那0-1M位置去掉bios部分,都可以释放。1M以上去掉内核使用的部分可以释放。
multiboot协议在加载内核之前,有保存了内存相关信息,放在寄存器ebx跟eax。如果自己写加载器,也可以调用bios中断来获取。ax=0xe820,int 0x15
arch/x86_64/multiboot.c对内存信息进行读取,然后释放kheap内存块,释放虚拟地址空间块,后续才能申请这些资源。
在entry64.S那里,在跳转到arch/x86_64/main.c之前,
pop %esi #restore magic
pop %edi #restore multiboot_info_type
根据传参约定,edi就是第一个参数,esi就是第二个参数
因此bp_main的原型应该是:void bp_main(u32 info_addr, u32 magic)
今天遇到一个坑:
unsigned int *p=(unsigned int *)0LL;printk("%x",*p);
上面代码会导致不停重启,编译选项-O0就不会,-O2就会。后来发现是GCC优化的时候看到引用地址0的内容,直接插入了一个异常指令。
学习地址: Dpdk/网络协议栈/vpp/OvS/DDos/NFV/虚拟化/高性能专家-学习视频教程-腾讯课堂
更多DPDK相关学习资料有需要的可以自行报名学习,免费订阅,久学习,或点击这里加qun免费
领取,关注我持续更新哦! !
汇编结果:
bp_main:
.LFB84:.loc 1 42 0.cfi_startproc
.LVL2:subq $8, %rsp
.LCFI0:.cfi_def_cfa_offset 16.loc 1 45 0xorl %eax, %eax
.LBB12:
.LBB13:.loc 1 37 0movb $0, g_yaos(%rip)
.LVL3:
.LBE13:
.LBE12:.loc 1 45 0call uart_early_init
.LVL4:.loc 1 46 0movl 0, %eaxud2 #直接插入了一个异常指令就结束函数了。.cfi_endproc
.LFE84:.size bp_main, .-bp_main.section .text.unlikely
后来在Makefile里面CFLAGS添加-fno-delete-null-pointer-checks解决。
long mode分页大小有4K,2M,1G可以选择,虽然是64位的,但是地址空间只用了48位,或者52位。如果按4K一页来算,每级页表512项(4K/8字节),48位-4K对齐的12位=36位,需要4级页表。如果用2M一页,则需要3级页表,如果是1G大小,则是2级就行。
pgtable.c
struct pml4_t {u64 pml4e[PML4_PER_PDP];//第一级页表
};
struct pdp_t {u64 pdpte[PDP_PER_PD];//第二级页表
};
struct pd_t {u64 pde[PD_PER_PTE];//第三级页表,直接指向2M页,如果使用4K页,还需要第四级页表
};
static struct pml4_t pml4 __attribute__ ((aligned(4096)));;
static struct pdp_t first_pdp __attribute__ ((aligned(4096)));
static struct pd_t first_pd __attribute__ ((aligned(4096)));
static struct pd_t second_pd __attribute__ ((aligned(4096)));
static struct pd_t third_pd __attribute__ ((aligned(4096)));
static struct pd_t fourth_pd __attribute__ ((aligned(4096)));
//为0-4G空间申请静态页表4*512*2M=4G
static void init_pml4()
{extern ulong __max_phy_mem_addr;int map_page_p2v(ulong paddr, ulong vaddr, ulong flag);memset(&pml4, 0, sizeof(pml4));memset(&first_pdp, 0, sizeof(first_pdp));ASSERT((((ulong) & pml4) & 0xfff) == 0);ASSERT((((ulong) & first_pdp) & 0xfff) == 0);ASSERT((((ulong) & first_pd) & 0xfff) == 0);//init first 4G 4*512*2Mpml4.pml4e[0] = ((ulong) & first_pdp) | PTE_P | PTE_W;first_pdp.pdpte[0] = ((ulong) & first_pd) | PTE_P | PTE_W;first_pdp.pdpte[1] = ((ulong) & second_pd) | PTE_P | PTE_W;first_pdp.pdpte[2] = ((ulong) & third_pd) | PTE_P | PTE_W;first_pdp.pdpte[3] = ((ulong) & fourth_pd) | PTE_P | PTE_W;for (int i = 0; i < PD_PER_PTE; i++) {first_pd.pde[i] = i * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;second_pd.pde[i] =(i + PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;third_pd.pde[i] =(i + 2 * PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;fourth_pd.pde[i] =(i + 3 * PD_PER_PTE) * PAGE_SIZE | PTE_PS | PTE_W | PTE_P;}DEBUG_PRINT("__max_phy_mem_addr:%lx\n", __max_phy_mem_addr);if (__max_phy_mem_addr > NUMBER_4G) {ulong addr = NUMBER_4G;for (; addr + PAGE_SIZE < __max_phy_mem_addr; addr += PAGE_SIZE) {if (OK != map_page_p2v(addr, addr, PTE_PS | PTE_P | PTE_W)) {panic("Not enough init heap memory!");}}}write_cr3((ulong) & pml4);
}u64 get_pte_with_addr(u64 addr)
{ulong i, j, k, pml4base;struct pml4_t *p_pml4;struct pdp_t *p_pdp;struct pd_t *p_pd;pml4base = read_cr3();pml4base &= ~0xfff;printf(" %lx ", pml4base);i = (addr >> PML4_SHIFT) & (PML4_NR - 1);j = (addr >> PDP_SHIFT) & (PML4_PER_PDP - 1);k = (addr >> PD_SHIFT) & (PD_PER_PTE - 1);p_pml4 = (struct pml4_t *)pml4base;p_pdp = (struct pdp_t *)(p_pml4->pml4e[i] & ~0xfff);printf(" %lx ", p_pdp);if (!p_pdp)return 0;p_pd = (struct pd_t *)(p_pdp->pdpte[j] & ~0xfff);printf(" %lx ", p_pd);printf("%d %d %d ", i, j, k);if (!p_pd)return 0;return (u64) & p_pd->pde[k];
}int map_page_p2v(ulong paddr, ulong vaddr, ulong flag)
{ulong i, j, k;struct pdp_t *p_pdp;struct pd_t *pd;ASSERT((paddr & 0xfff) == 0); //align 4kASSERT((flag & ~0xfff) == 0);i = (vaddr >> PML4_SHIFT) & (PML4_NR - 1);j = (vaddr >> PDP_SHIFT) & (PML4_PER_PDP - 1);k = (vaddr >> PD_SHIFT) & (PD_PER_PTE - 1);p_pdp = (struct pdp_t *)(pml4.pml4e[i] & ~0xfff);if (!p_pdp) {p_pdp = (struct pdp_t *)alloc_kheap_4k(4096);DEBUG_PRINT("new pdp:%lx\n", p_pdp);ASSERT(((ulong) p_pdp & 0xfff) == 0);if (!p_pdp)return E_NOMEM;memset(p_pdp, 0, 4096);pml4.pml4e[i] = (ulong) p_pdp | PTE_P | PTE_W | PTE_U;}
//DEBUG_PRINT("p_pdp:%lx\n",p_pdp);pd = (struct pd_t *)(~0xfff & (p_pdp->pdpte[j]));if (!pd) {pd = (struct pd_t *)alloc_kheap_4k(4096);ASSERT(((ulong) pd & 0xfff) == 0);if (!pd)return E_NOMEM;memset(pd, 0, 4096);p_pdp->pdpte[j] = (ulong) pd | PTE_P | PTE_W | PTE_U;DEBUG_PRINT("pd:%lx\n", pd);}
//DEBUG_PRINT("pd:%lx\n",pd);pd->pde[k] = paddr | flag;
// DEBUG_PRINT("map phy:%lx to vaddr:%lx,pte:%lx,%d,%d,%d\n",paddr,vaddr,&pd[k],i,j,k);
}
//io空间映射,设置PTE_PCD,禁用cache,起始地址为IO_MEM_BASE,0xfffffff800000000
//页表地址是48位的,但取值范围是64位的,空的是中间部分,
//48位地址线的情况下是低位0-7FFF FFFFFFFF,高位是FFF8000 00000000-FFFFFFFF FFFFFFFF
void *ioremap_nocache(ulong addr,ulong size)
{ulong paddr=addr&~(PAGE_SIZE-1);size+=addr-paddr;for(ulong added=0;added<=size;added+=PAGE_SIZE){map_page_p2v(paddr, paddr + IO_MEM_BASE,PTE_P | PTE_W | PTE_PWT | PTE_PCD | PTE_PS);paddr+=PAGE_SIZE;}return (void *)(addr+IO_MEM_BASE);
}
void init_pgtable()
{init_pml4();}
void init_pgtable_ap()
{write_cr3((ulong) & pml4);}
运行本例:
git clone https://github.com/saneee/x86_64_kernel.git
cd 0005
make qemu
运行结果:
原文链接:https://zhuanlan.zhihu.com/p/140274586