提交 cf249131 创建 作者: Silas Boyd-Wickizer's avatar Silas Boyd-Wickizer

Rejigger vm.c.

上级 75e5229e
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
#include "proc.h" #include "proc.h"
#include "vm.h" #include "vm.h"
static void vmap_free(void *p);
static struct vma * static struct vma *
vma_alloc(void) vma_alloc(void)
{ {
...@@ -41,49 +43,53 @@ vma_free(void *p) ...@@ -41,49 +43,53 @@ vma_free(void *p)
kmfree(e); kmfree(e);
} }
#ifdef TREE
struct state {
int share;
void *pml4;
struct node *root;
};
static int static int
vmap_free_vma(struct kv *kv, void *p) vmn_doallocpg(struct vmnode *n)
{ {
struct state *st = (struct state *) p; for(u64 i = 0; i < n->npages; i++) {
vma_free(kv->val); if((n->page[i] = kalloc()) == 0)
st->root = tree_remove(st->root, kv->key); return -1;
return 1; memset((char *) n->page[i], 0, PGSIZE);
}
return 0;
} }
static void static struct vmnode *
vmap_free(void *p) vmn_copy(struct vmnode *n)
{ {
struct vmap *m = (struct vmap *) p; struct vmnode *c = vmn_alloc(n->npages, n->type);
struct state *st = kmalloc(sizeof(struct state)); if(c != 0) {
st->root = m->root; c->type = n->type;
tree_foreach(m->root, vmap_free_vma, st); if (n->type == ONDEMAND) {
m->root = st->root; c->ip = idup(n->ip);
freevm(m->pml4); c->offset = n->offset;
kmfree(st); c->sz = c->sz;
m->pml4 = 0; }
m->alloc = 0; if (n->page[0]) { // If the first page is present, all of them are present
if (vmn_doallocpg(c) < 0) {
cprintf("vmn_copy: out of memory\n");
vmn_free(c);
return 0;
}
for(u64 i = 0; i < n->npages; i++) {
memmove(c->page[i], n->page[i], PGSIZE);
}
}
}
return c;
} }
#else
static void struct vmnode *
vmap_free(void *p) vmn_allocpg(u64 npg)
{ {
struct vmap *m = (struct vmap *) p; struct vmnode *n = vmn_alloc(npg, EAGER);
for(u64 i = 0; i < NELEM(m->e); i++) { if (n == 0) return 0;
if (m->e[i]) if (vmn_doallocpg(n) < 0) {
vma_free(m->e[i]); vmn_free(n);
return 0;
} }
freevm(m->pml4); return n;
m->pml4 = 0;
m->alloc = 0;
} }
#endif
void void
vmap_decref(struct vmap *m) vmap_decref(struct vmap *m)
...@@ -92,42 +98,6 @@ vmap_decref(struct vmap *m) ...@@ -92,42 +98,6 @@ vmap_decref(struct vmap *m)
vmap_free(m); vmap_free(m);
} }
// Does any vma overlap start..start+len?
// If yes, return the vma pointer.
// If no, return 0.
// This code can't handle regions at the very end
// of the address space, e.g. 0xffffffff..0x0
// We key vma's by their end address.
struct vma *
vmap_lookup(struct vmap *m, uptr start, uptr len)
{
if(start + len < start)
panic("vmap_lookup bad len");
#ifdef TREE
struct kv *kv = tree_find_gt(m->root, start); // find vma with va_end > start
if (kv != 0) {
struct vma *e = (struct vma *) (kv->val);
if (e->va_end <= e->va_start)
panic("malformed va");
if (e->va_start < start+len && e->va_end > start) {
return e;
}
}
#else
for(u64 i = 0; i < NELEM(m->e); i++){
struct vma *e = m->e[i];
if(e) {
if(e->va_end <= e->va_start) // XXX shouldn't this involve start and len?
panic("vmap_lookup bad vma");
if(e->va_start < start+len && e->va_end > start)
return e;
}
}
#endif
return 0;
}
struct vmap * struct vmap *
vmap_alloc(void) vmap_alloc(void)
{ {
...@@ -148,93 +118,119 @@ vmap_alloc(void) ...@@ -148,93 +118,119 @@ vmap_alloc(void)
return m; return m;
} }
int static int
vmap_insert(struct vmap *m, struct vmnode *n, uptr va_start) vmn_doload(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{ {
acquire(&m->lock); for(u64 i = 0; i < sz; i += PGSIZE){
u64 len = n->npages * PGSIZE; char *p = vmn->page[i / PGSIZE];
u64 n;
if(vmap_lookup(m, va_start, len)){ if(sz - i < PGSIZE)
cprintf("vmap_insert: overlap\n"); n = sz - i;
release(&m->lock); else
return -1; n = PGSIZE;
} if(readi(ip, p, offset+i, n) != n)
return -1;
#ifdef TREE
struct vma *e = vma_alloc();
struct kv kv;
if (e == 0) {
release(&m->lock);
return -1;
} }
e->va_start = va_start;
e->va_end = va_start + len;
e->n = n;
__sync_fetch_and_add(&n->ref, 1);
kv.key = e->va_end;
kv.val = e;
m->root = tree_insert(m->root, &kv);
release(&m->lock);
return 0; return 0;
#else }
for(u64 i = 0; i < NELEM(m->e); i++) {
if(m->e[i]) // Load a program segment into a vmnode.
continue; int
m->e[i] = vma_alloc(); vmn_load(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
if (m->e[i] == 0) {
return -1; if (vmn->type == ONDEMAND) {
m->e[i]->va_start = va_start; vmn->ip = ip;
m->e[i]->va_end = va_start + len; vmn->offset = offset;
m->e[i]->n = n; vmn->sz = sz;
__sync_fetch_and_add(&n->ref, 1);
release(&m->lock);
return 0; return 0;
} else {
return vmn_doload(vmn, ip, offset, sz);
} }
release(&m->lock);
cprintf("vmap_insert: out of vma slots\n");
return -1;
#endif
} }
struct vmnode * static struct vma *
vmn_alloc(u64 npg, enum vmntype type) pagefault_ondemand(struct vmap *vmap, uptr va, u32 err, struct vma *m)
{ {
struct vmnode *n = kmalloc(sizeof(struct vmnode)); if (vmn_doallocpg(m->n) < 0) {
if (n == 0) { panic("pagefault: couldn't allocate pages");
cprintf("out of vmnodes");
return 0;
} }
if(npg > NELEM(n->page)) { release(&m->lock);
panic("vmnode too big\n"); if (vmn_doload(m->n, m->n->ip, m->n->offset, m->n->sz) < 0) {
panic("pagefault: couldn't load");
} }
memset(n, 0, sizeof(struct vmnode)); m = vmap_lookup(vmap, va, 1);
n->npages = npg; if (!m)
n->type = type; panic("pagefault_ondemand");
return n; acquire(&m->lock); // re-acquire lock on m
return m;
} }
static int static int
vmn_doallocpg(struct vmnode *n) pagefault_wcow(struct vmap *vmap, uptr va, pme_t *pte, struct vma *m, u64 npg)
{ {
for(u64 i = 0; i < n->npages; i++) { // Always make a copy of n, even if this process has the only ref,
if((n->page[i] = kalloc()) == 0) // because other processes may change ref count while this process
return -1; // is handling wcow.
memset((char *) n->page[i], 0, PGSIZE); struct vmnode *n = m->n;
struct vmnode *c = vmn_copy(m->n);
if (c == 0) {
cprintf("pagefault_wcow: out of mem\n");
return -1;
} }
c->ref = 1;
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma
updatepages(vmap->pml4, (void *) m->va_start, (void *) m->va_end, 0);
pte = walkpgdir(vmap->pml4, (const void *)va, 0);
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
// drop my ref to vmnode
vmn_decref(n);
return 0; return 0;
} }
struct vmnode * int
vmn_allocpg(u64 npg) pagefault(struct vmap *vmap, uptr va, u32 err)
{ {
struct vmnode *n = vmn_alloc(npg, EAGER); pme_t *pte = walkpgdir(vmap->pml4, (const void *)va, 1);
if (n == 0) return 0;
if (vmn_doallocpg(n) < 0) { // optimize checks of args to syscals
vmn_free(n); if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W))
return 0; return 0;
rcu_begin_read();
struct vma *m = vmap_lookup(vmap, va, 1);
if (m == 0) {
rcu_end_read();
return -1;
} }
return n;
acquire(&m->lock);
u64 npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE;
if (m->n && m->n->type == ONDEMAND && m->n->page[npg] == 0)
m = pagefault_ondemand(vmap, va, err, m);
if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(vmap, va, pte, m, npg) < 0) {
release(&m->lock);
rcu_end_read();
return -1;
}
} else if (m->va_type == COW) {
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
if (m->n->ref != 1)
panic("pagefault");
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
// XXX(sbw) Why reload hardware page tables?
lcr3(v2p(vmap->pml4)); // Reload hardware page tables
release(&m->lock);
rcu_end_read();
return 1;
} }
void void
...@@ -286,28 +282,104 @@ copyout(struct vmap *vmap, uptr va, void *p, u64 len) ...@@ -286,28 +282,104 @@ copyout(struct vmap *vmap, uptr va, void *p, u64 len)
} }
struct vmnode * struct vmnode *
vmn_copy(struct vmnode *n) vmn_alloc(u64 npg, enum vmntype type)
{ {
struct vmnode *c = vmn_alloc(n->npages, n->type); struct vmnode *n = kmalloc(sizeof(struct vmnode));
if(c != 0) { if (n == 0) {
c->type = n->type; cprintf("out of vmnodes");
if (n->type == ONDEMAND) { return 0;
c->ip = idup(n->ip); }
c->offset = n->offset; if(npg > NELEM(n->page)) {
c->sz = c->sz; panic("vmnode too big\n");
} }
if (n->page[0]) { // If the first page is present, all of them are present memset(n, 0, sizeof(struct vmnode));
if (vmn_doallocpg(c) < 0) { n->npages = npg;
cprintf("vmn_copy: out of memory\n"); n->type = type;
vmn_free(c); return n;
return 0; }
}
for(u64 i = 0; i < n->npages; i++) { #ifdef TREE
memmove(c->page[i], n->page[i], PGSIZE); struct state {
} int share;
void *pml4;
struct node *root;
};
static int
vmap_free_vma(struct kv *kv, void *p)
{
struct state *st = (struct state *) p;
vma_free(kv->val);
st->root = tree_remove(st->root, kv->key);
return 1;
}
static void
vmap_free(void *p)
{
struct vmap *m = (struct vmap *) p;
struct state *st = kmalloc(sizeof(struct state));
st->root = m->root;
tree_foreach(m->root, vmap_free_vma, st);
m->root = st->root;
freevm(m->pml4);
kmfree(st);
m->pml4 = 0;
m->alloc = 0;
}
// Does any vma overlap start..start+len?
// If yes, return the vma pointer.
// If no, return 0.
// This code can't handle regions at the very end
// of the address space, e.g. 0xffffffff..0x0
// We key vma's by their end address.
struct vma *
vmap_lookup(struct vmap *m, uptr start, uptr len)
{
if(start + len < start)
panic("vmap_lookup bad len");
struct kv *kv = tree_find_gt(m->root, start); // find vma with va_end > start
if (kv != 0) {
struct vma *e = (struct vma *) (kv->val);
if (e->va_end <= e->va_start)
panic("malformed va");
if (e->va_start < start+len && e->va_end > start) {
return e;
} }
} }
return c;
return 0;
}
int
vmap_insert(struct vmap *m, struct vmnode *n, uptr va_start)
{
acquire(&m->lock);
u64 len = n->npages * PGSIZE;
if(vmap_lookup(m, va_start, len)){
cprintf("vmap_insert: overlap\n");
release(&m->lock);
return -1;
}
struct vma *e = vma_alloc();
struct kv kv;
if (e == 0) {
release(&m->lock);
return -1;
}
e->va_start = va_start;
e->va_end = va_start + len;
e->n = n;
__sync_fetch_and_add(&n->ref, 1);
kv.key = e->va_end;
kv.val = e;
m->root = tree_insert(m->root, &kv);
release(&m->lock);
return 0;
} }
static int static int
...@@ -343,22 +415,6 @@ vmap_copy_vma(struct kv *kv, void *_st) ...@@ -343,22 +415,6 @@ vmap_copy_vma(struct kv *kv, void *_st)
return 1; return 1;
} }
static int
vmn_doload(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{
for(u64 i = 0; i < sz; i += PGSIZE){
char *p = vmn->page[i / PGSIZE];
u64 n;
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, p, offset+i, n) != n)
return -1;
}
return 0;
}
struct vmap * struct vmap *
vmap_copy(struct vmap *m, int share) vmap_copy(struct vmap *m, int share)
{ {
...@@ -367,7 +423,6 @@ vmap_copy(struct vmap *m, int share) ...@@ -367,7 +423,6 @@ vmap_copy(struct vmap *m, int share)
return 0; return 0;
acquire(&m->lock); acquire(&m->lock);
#ifdef TREE
struct state *st = kmalloc(sizeof(struct state)); struct state *st = kmalloc(sizeof(struct state));
st->share = share; st->share = share;
st->pml4 = m->pml4; st->pml4 = m->pml4;
...@@ -380,7 +435,112 @@ vmap_copy(struct vmap *m, int share) ...@@ -380,7 +435,112 @@ vmap_copy(struct vmap *m, int share)
} }
c->root = st->root; c->root = st->root;
kmfree(st); kmfree(st);
#else
if (share)
lcr3(v2p(m->pml4)); // Reload hardware page table
release(&m->lock);
return c;
}
int
vmap_remove(struct vmap *m, uptr va_start, u64 len)
{
acquire(&m->lock);
uptr va_end = va_start + len;
struct kv *kv = tree_find_gt(m->root, va_start);
if (kv == 0)
panic("no vma?");
struct vma *e = (struct vma *) kv->val;
if(e->va_start != va_start || e->va_end != va_end) {
cprintf("vmap_remove: partial unmap unsupported\n");
release(&m->lock);
return -1;
}
m->root = tree_remove(m->root, va_start+len);
rcu_delayed(e, vma_free);
release(&m->lock);
return 0;
}
#else // !TREE
static void
vmap_free(void *p)
{
struct vmap *m = (struct vmap *) p;
for(u64 i = 0; i < NELEM(m->e); i++) {
if (m->e[i])
vma_free(m->e[i]);
}
freevm(m->pml4);
m->pml4 = 0;
m->alloc = 0;
}
// Does any vma overlap start..start+len?
// If yes, return the vma pointer.
// If no, return 0.
// This code can't handle regions at the very end
// of the address space, e.g. 0xffffffff..0x0
// We key vma's by their end address.
struct vma *
vmap_lookup(struct vmap *m, uptr start, uptr len)
{
if(start + len < start)
panic("vmap_lookup bad len");
for(u64 i = 0; i < NELEM(m->e); i++){
struct vma *e = m->e[i];
if(e) {
if(e->va_end <= e->va_start) // XXX shouldn't this involve start and len?
panic("vmap_lookup bad vma");
if(e->va_start < start+len && e->va_end > start)
return e;
}
}
return 0;
}
int
vmap_insert(struct vmap *m, struct vmnode *n, uptr va_start)
{
acquire(&m->lock);
u64 len = n->npages * PGSIZE;
if(vmap_lookup(m, va_start, len)){
cprintf("vmap_insert: overlap\n");
release(&m->lock);
return -1;
}
for(u64 i = 0; i < NELEM(m->e); i++) {
if(m->e[i])
continue;
m->e[i] = vma_alloc();
if (m->e[i] == 0)
return -1;
m->e[i]->va_start = va_start;
m->e[i]->va_end = va_start + len;
m->e[i]->n = n;
__sync_fetch_and_add(&n->ref, 1);
release(&m->lock);
return 0;
}
release(&m->lock);
cprintf("vmap_insert: out of vma slots\n");
return -1;
}
struct vmap *
vmap_copy(struct vmap *m, int share)
{
struct vmap *c = vmap_alloc();
if(c == 0)
return 0;
acquire(&m->lock);
for(int i = 0; i < NELEM(m->e); i++) { for(int i = 0; i < NELEM(m->e); i++) {
if(m->e[i] == 0) if(m->e[i] == 0)
continue; continue;
...@@ -411,7 +571,6 @@ vmap_copy(struct vmap *m, int share) ...@@ -411,7 +571,6 @@ vmap_copy(struct vmap *m, int share)
} }
__sync_fetch_and_add(&c->e[i]->n->ref, 1); __sync_fetch_and_add(&c->e[i]->n->ref, 1);
} }
#endif
if (share) if (share)
lcr3(v2p(m->pml4)); // Reload hardware page table lcr3(v2p(m->pml4)); // Reload hardware page table
...@@ -419,123 +578,11 @@ vmap_copy(struct vmap *m, int share) ...@@ -419,123 +578,11 @@ vmap_copy(struct vmap *m, int share)
return c; return c;
} }
static struct vma *
pagefault_ondemand(struct vmap *vmap, uptr va, u32 err, struct vma *m)
{
if (vmn_doallocpg(m->n) < 0) {
panic("pagefault: couldn't allocate pages");
}
release(&m->lock);
if (vmn_doload(m->n, m->n->ip, m->n->offset, m->n->sz) < 0) {
panic("pagefault: couldn't load");
}
m = vmap_lookup(vmap, va, 1);
if (!m)
panic("pagefault_ondemand");
acquire(&m->lock); // re-acquire lock on m
return m;
}
static int
pagefault_wcow(struct vmap *vmap, uptr va, pme_t *pte, struct vma *m, u64 npg)
{
// Always make a copy of n, even if this process has the only ref,
// because other processes may change ref count while this process
// is handling wcow.
struct vmnode *n = m->n;
struct vmnode *c = vmn_copy(m->n);
if (c == 0) {
cprintf("pagefault_wcow: out of mem\n");
return -1;
}
c->ref = 1;
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma
updatepages(vmap->pml4, (void *) m->va_start, (void *) m->va_end, 0);
pte = walkpgdir(vmap->pml4, (const void *)va, 0);
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
// drop my ref to vmnode
vmn_decref(n);
return 0;
}
int
pagefault(struct vmap *vmap, uptr va, u32 err)
{
pme_t *pte = walkpgdir(vmap->pml4, (const void *)va, 1);
// optimize checks of args to syscals
if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W))
return 0;
rcu_begin_read();
struct vma *m = vmap_lookup(vmap, va, 1);
if (m == 0) {
rcu_end_read();
return -1;
}
acquire(&m->lock);
u64 npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE;
if (m->n && m->n->type == ONDEMAND && m->n->page[npg] == 0)
m = pagefault_ondemand(vmap, va, err, m);
if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(vmap, va, pte, m, npg) < 0) {
release(&m->lock);
rcu_end_read();
return -1;
}
} else if (m->va_type == COW) {
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
if (m->n->ref != 1)
panic("pagefault");
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
// XXX(sbw) Why reload hardware page tables?
lcr3(v2p(vmap->pml4)); // Reload hardware page tables
release(&m->lock);
rcu_end_read();
return 1;
}
// Load a program segment into a vmnode.
int
vmn_load(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{
if (vmn->type == ONDEMAND) {
vmn->ip = ip;
vmn->offset = offset;
vmn->sz = sz;
return 0;
} else {
return vmn_doload(vmn, ip, offset, sz);
}
}
int int
vmap_remove(struct vmap *m, uptr va_start, u64 len) vmap_remove(struct vmap *m, uptr va_start, u64 len)
{ {
acquire(&m->lock); acquire(&m->lock);
uptr va_end = va_start + len; uptr va_end = va_start + len;
#ifdef TREE
struct kv *kv = tree_find_gt(m->root, va_start);
if (kv == 0)
panic("no vma?");
struct vma *e = (struct vma *) kv->val;
if(e->va_start != va_start || e->va_end != va_end) {
cprintf("vmap_remove: partial unmap unsupported\n");
release(&m->lock);
return -1;
}
m->root = tree_remove(m->root, va_start+len);
rcu_delayed(e, vma_free);
#else
for(int i = 0; i < NELEM(m->e); i++) { for(int i = 0; i < NELEM(m->e); i++) {
if(m->e[i] && (m->e[i]->va_start < va_end && m->e[i]->va_end > va_start)) { if(m->e[i] && (m->e[i]->va_start < va_end && m->e[i]->va_end > va_start)) {
if(m->e[i]->va_start != va_start || m->e[i]->va_end != va_end) { if(m->e[i]->va_start != va_start || m->e[i]->va_end != va_end) {
...@@ -547,7 +594,7 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len) ...@@ -547,7 +594,7 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len)
m->e[i] = 0; m->e[i] = 0;
} }
} }
#endif
release(&m->lock); release(&m->lock);
return 0; return 0;
} }
#endif
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论