提交 cf249131 创建 作者: Silas Boyd-Wickizer's avatar Silas Boyd-Wickizer

Rejigger vm.c.

上级 75e5229e
......@@ -12,6 +12,8 @@
#include "proc.h"
#include "vm.h"
static void vmap_free(void *p);
static struct vma *
vma_alloc(void)
{
......@@ -41,49 +43,53 @@ vma_free(void *p)
kmfree(e);
}
#ifdef TREE
struct state {
int share;
void *pml4;
struct node *root;
};
static int
vmap_free_vma(struct kv *kv, void *p)
{
struct state *st = (struct state *) p;
vma_free(kv->val);
st->root = tree_remove(st->root, kv->key);
return 1;
vmn_doallocpg(struct vmnode *n)
{
for(u64 i = 0; i < n->npages; i++) {
if((n->page[i] = kalloc()) == 0)
return -1;
memset((char *) n->page[i], 0, PGSIZE);
}
return 0;
}
static void
vmap_free(void *p)
static struct vmnode *
vmn_copy(struct vmnode *n)
{
struct vmap *m = (struct vmap *) p;
struct state *st = kmalloc(sizeof(struct state));
st->root = m->root;
tree_foreach(m->root, vmap_free_vma, st);
m->root = st->root;
freevm(m->pml4);
kmfree(st);
m->pml4 = 0;
m->alloc = 0;
struct vmnode *c = vmn_alloc(n->npages, n->type);
if(c != 0) {
c->type = n->type;
if (n->type == ONDEMAND) {
c->ip = idup(n->ip);
c->offset = n->offset;
c->sz = c->sz;
}
if (n->page[0]) { // If the first page is present, all of them are present
if (vmn_doallocpg(c) < 0) {
cprintf("vmn_copy: out of memory\n");
vmn_free(c);
return 0;
}
for(u64 i = 0; i < n->npages; i++) {
memmove(c->page[i], n->page[i], PGSIZE);
}
}
}
return c;
}
#else
static void
vmap_free(void *p)
struct vmnode *
vmn_allocpg(u64 npg)
{
struct vmap *m = (struct vmap *) p;
for(u64 i = 0; i < NELEM(m->e); i++) {
if (m->e[i])
vma_free(m->e[i]);
struct vmnode *n = vmn_alloc(npg, EAGER);
if (n == 0) return 0;
if (vmn_doallocpg(n) < 0) {
vmn_free(n);
return 0;
}
freevm(m->pml4);
m->pml4 = 0;
m->alloc = 0;
return n;
}
#endif
void
vmap_decref(struct vmap *m)
......@@ -92,42 +98,6 @@ vmap_decref(struct vmap *m)
vmap_free(m);
}
// Does any vma overlap start..start+len?
// If yes, return the vma pointer.
// If no, return 0.
// This code can't handle regions at the very end
// of the address space, e.g. 0xffffffff..0x0
// We key vma's by their end address.
struct vma *
vmap_lookup(struct vmap *m, uptr start, uptr len)
{
if(start + len < start)
panic("vmap_lookup bad len");
#ifdef TREE
struct kv *kv = tree_find_gt(m->root, start); // find vma with va_end > start
if (kv != 0) {
struct vma *e = (struct vma *) (kv->val);
if (e->va_end <= e->va_start)
panic("malformed va");
if (e->va_start < start+len && e->va_end > start) {
return e;
}
}
#else
for(u64 i = 0; i < NELEM(m->e); i++){
struct vma *e = m->e[i];
if(e) {
if(e->va_end <= e->va_start) // XXX shouldn't this involve start and len?
panic("vmap_lookup bad vma");
if(e->va_start < start+len && e->va_end > start)
return e;
}
}
#endif
return 0;
}
struct vmap *
vmap_alloc(void)
{
......@@ -148,93 +118,119 @@ vmap_alloc(void)
return m;
}
int
vmap_insert(struct vmap *m, struct vmnode *n, uptr va_start)
static int
vmn_doload(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{
acquire(&m->lock);
u64 len = n->npages * PGSIZE;
if(vmap_lookup(m, va_start, len)){
cprintf("vmap_insert: overlap\n");
release(&m->lock);
return -1;
}
#ifdef TREE
struct vma *e = vma_alloc();
struct kv kv;
if (e == 0) {
release(&m->lock);
return -1;
for(u64 i = 0; i < sz; i += PGSIZE){
char *p = vmn->page[i / PGSIZE];
u64 n;
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, p, offset+i, n) != n)
return -1;
}
e->va_start = va_start;
e->va_end = va_start + len;
e->n = n;
__sync_fetch_and_add(&n->ref, 1);
kv.key = e->va_end;
kv.val = e;
m->root = tree_insert(m->root, &kv);
release(&m->lock);
return 0;
#else
for(u64 i = 0; i < NELEM(m->e); i++) {
if(m->e[i])
continue;
m->e[i] = vma_alloc();
if (m->e[i] == 0)
return -1;
m->e[i]->va_start = va_start;
m->e[i]->va_end = va_start + len;
m->e[i]->n = n;
__sync_fetch_and_add(&n->ref, 1);
release(&m->lock);
}
// Load a program segment into a vmnode.
int
vmn_load(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{
if (vmn->type == ONDEMAND) {
vmn->ip = ip;
vmn->offset = offset;
vmn->sz = sz;
return 0;
} else {
return vmn_doload(vmn, ip, offset, sz);
}
release(&m->lock);
cprintf("vmap_insert: out of vma slots\n");
return -1;
#endif
}
struct vmnode *
vmn_alloc(u64 npg, enum vmntype type)
static struct vma *
pagefault_ondemand(struct vmap *vmap, uptr va, u32 err, struct vma *m)
{
struct vmnode *n = kmalloc(sizeof(struct vmnode));
if (n == 0) {
cprintf("out of vmnodes");
return 0;
if (vmn_doallocpg(m->n) < 0) {
panic("pagefault: couldn't allocate pages");
}
if(npg > NELEM(n->page)) {
panic("vmnode too big\n");
release(&m->lock);
if (vmn_doload(m->n, m->n->ip, m->n->offset, m->n->sz) < 0) {
panic("pagefault: couldn't load");
}
memset(n, 0, sizeof(struct vmnode));
n->npages = npg;
n->type = type;
return n;
m = vmap_lookup(vmap, va, 1);
if (!m)
panic("pagefault_ondemand");
acquire(&m->lock); // re-acquire lock on m
return m;
}
static int
vmn_doallocpg(struct vmnode *n)
pagefault_wcow(struct vmap *vmap, uptr va, pme_t *pte, struct vma *m, u64 npg)
{
for(u64 i = 0; i < n->npages; i++) {
if((n->page[i] = kalloc()) == 0)
return -1;
memset((char *) n->page[i], 0, PGSIZE);
// Always make a copy of n, even if this process has the only ref,
// because other processes may change ref count while this process
// is handling wcow.
struct vmnode *n = m->n;
struct vmnode *c = vmn_copy(m->n);
if (c == 0) {
cprintf("pagefault_wcow: out of mem\n");
return -1;
}
c->ref = 1;
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma
updatepages(vmap->pml4, (void *) m->va_start, (void *) m->va_end, 0);
pte = walkpgdir(vmap->pml4, (const void *)va, 0);
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
// drop my ref to vmnode
vmn_decref(n);
return 0;
}
struct vmnode *
vmn_allocpg(u64 npg)
int
pagefault(struct vmap *vmap, uptr va, u32 err)
{
struct vmnode *n = vmn_alloc(npg, EAGER);
if (n == 0) return 0;
if (vmn_doallocpg(n) < 0) {
vmn_free(n);
pme_t *pte = walkpgdir(vmap->pml4, (const void *)va, 1);
// optimize checks of args to syscals
if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W))
return 0;
rcu_begin_read();
struct vma *m = vmap_lookup(vmap, va, 1);
if (m == 0) {
rcu_end_read();
return -1;
}
return n;
acquire(&m->lock);
u64 npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE;
if (m->n && m->n->type == ONDEMAND && m->n->page[npg] == 0)
m = pagefault_ondemand(vmap, va, err, m);
if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(vmap, va, pte, m, npg) < 0) {
release(&m->lock);
rcu_end_read();
return -1;
}
} else if (m->va_type == COW) {
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
if (m->n->ref != 1)
panic("pagefault");
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
// XXX(sbw) Why reload hardware page tables?
lcr3(v2p(vmap->pml4)); // Reload hardware page tables
release(&m->lock);
rcu_end_read();
return 1;
}
void
......@@ -286,28 +282,104 @@ copyout(struct vmap *vmap, uptr va, void *p, u64 len)
}
struct vmnode *
vmn_copy(struct vmnode *n)
vmn_alloc(u64 npg, enum vmntype type)
{
struct vmnode *c = vmn_alloc(n->npages, n->type);
if(c != 0) {
c->type = n->type;
if (n->type == ONDEMAND) {
c->ip = idup(n->ip);
c->offset = n->offset;
c->sz = c->sz;
}
if (n->page[0]) { // If the first page is present, all of them are present
if (vmn_doallocpg(c) < 0) {
cprintf("vmn_copy: out of memory\n");
vmn_free(c);
return 0;
}
for(u64 i = 0; i < n->npages; i++) {
memmove(c->page[i], n->page[i], PGSIZE);
}
struct vmnode *n = kmalloc(sizeof(struct vmnode));
if (n == 0) {
cprintf("out of vmnodes");
return 0;
}
if(npg > NELEM(n->page)) {
panic("vmnode too big\n");
}
memset(n, 0, sizeof(struct vmnode));
n->npages = npg;
n->type = type;
return n;
}
#ifdef TREE
struct state {
int share;
void *pml4;
struct node *root;
};
static int
vmap_free_vma(struct kv *kv, void *p)
{
struct state *st = (struct state *) p;
vma_free(kv->val);
st->root = tree_remove(st->root, kv->key);
return 1;
}
static void
vmap_free(void *p)
{
struct vmap *m = (struct vmap *) p;
struct state *st = kmalloc(sizeof(struct state));
st->root = m->root;
tree_foreach(m->root, vmap_free_vma, st);
m->root = st->root;
freevm(m->pml4);
kmfree(st);
m->pml4 = 0;
m->alloc = 0;
}
// Does any vma overlap start..start+len?
// If yes, return the vma pointer.
// If no, return 0.
// This code can't handle regions at the very end
// of the address space, e.g. 0xffffffff..0x0
// We key vma's by their end address.
struct vma *
vmap_lookup(struct vmap *m, uptr start, uptr len)
{
if(start + len < start)
panic("vmap_lookup bad len");
struct kv *kv = tree_find_gt(m->root, start); // find vma with va_end > start
if (kv != 0) {
struct vma *e = (struct vma *) (kv->val);
if (e->va_end <= e->va_start)
panic("malformed va");
if (e->va_start < start+len && e->va_end > start) {
return e;
}
}
return c;
return 0;
}
int
vmap_insert(struct vmap *m, struct vmnode *n, uptr va_start)
{
acquire(&m->lock);
u64 len = n->npages * PGSIZE;
if(vmap_lookup(m, va_start, len)){
cprintf("vmap_insert: overlap\n");
release(&m->lock);
return -1;
}
struct vma *e = vma_alloc();
struct kv kv;
if (e == 0) {
release(&m->lock);
return -1;
}
e->va_start = va_start;
e->va_end = va_start + len;
e->n = n;
__sync_fetch_and_add(&n->ref, 1);
kv.key = e->va_end;
kv.val = e;
m->root = tree_insert(m->root, &kv);
release(&m->lock);
return 0;
}
static int
......@@ -343,22 +415,6 @@ vmap_copy_vma(struct kv *kv, void *_st)
return 1;
}
static int
vmn_doload(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{
for(u64 i = 0; i < sz; i += PGSIZE){
char *p = vmn->page[i / PGSIZE];
u64 n;
if(sz - i < PGSIZE)
n = sz - i;
else
n = PGSIZE;
if(readi(ip, p, offset+i, n) != n)
return -1;
}
return 0;
}
struct vmap *
vmap_copy(struct vmap *m, int share)
{
......@@ -367,7 +423,6 @@ vmap_copy(struct vmap *m, int share)
return 0;
acquire(&m->lock);
#ifdef TREE
struct state *st = kmalloc(sizeof(struct state));
st->share = share;
st->pml4 = m->pml4;
......@@ -380,7 +435,112 @@ vmap_copy(struct vmap *m, int share)
}
c->root = st->root;
kmfree(st);
#else
if (share)
lcr3(v2p(m->pml4)); // Reload hardware page table
release(&m->lock);
return c;
}
int
vmap_remove(struct vmap *m, uptr va_start, u64 len)
{
acquire(&m->lock);
uptr va_end = va_start + len;
struct kv *kv = tree_find_gt(m->root, va_start);
if (kv == 0)
panic("no vma?");
struct vma *e = (struct vma *) kv->val;
if(e->va_start != va_start || e->va_end != va_end) {
cprintf("vmap_remove: partial unmap unsupported\n");
release(&m->lock);
return -1;
}
m->root = tree_remove(m->root, va_start+len);
rcu_delayed(e, vma_free);
release(&m->lock);
return 0;
}
#else // !TREE
static void
vmap_free(void *p)
{
struct vmap *m = (struct vmap *) p;
for(u64 i = 0; i < NELEM(m->e); i++) {
if (m->e[i])
vma_free(m->e[i]);
}
freevm(m->pml4);
m->pml4 = 0;
m->alloc = 0;
}
// Does any vma overlap start..start+len?
// If yes, return the vma pointer.
// If no, return 0.
// This code can't handle regions at the very end
// of the address space, e.g. 0xffffffff..0x0
// We key vma's by their end address.
struct vma *
vmap_lookup(struct vmap *m, uptr start, uptr len)
{
if(start + len < start)
panic("vmap_lookup bad len");
for(u64 i = 0; i < NELEM(m->e); i++){
struct vma *e = m->e[i];
if(e) {
if(e->va_end <= e->va_start) // XXX shouldn't this involve start and len?
panic("vmap_lookup bad vma");
if(e->va_start < start+len && e->va_end > start)
return e;
}
}
return 0;
}
int
vmap_insert(struct vmap *m, struct vmnode *n, uptr va_start)
{
acquire(&m->lock);
u64 len = n->npages * PGSIZE;
if(vmap_lookup(m, va_start, len)){
cprintf("vmap_insert: overlap\n");
release(&m->lock);
return -1;
}
for(u64 i = 0; i < NELEM(m->e); i++) {
if(m->e[i])
continue;
m->e[i] = vma_alloc();
if (m->e[i] == 0)
return -1;
m->e[i]->va_start = va_start;
m->e[i]->va_end = va_start + len;
m->e[i]->n = n;
__sync_fetch_and_add(&n->ref, 1);
release(&m->lock);
return 0;
}
release(&m->lock);
cprintf("vmap_insert: out of vma slots\n");
return -1;
}
struct vmap *
vmap_copy(struct vmap *m, int share)
{
struct vmap *c = vmap_alloc();
if(c == 0)
return 0;
acquire(&m->lock);
for(int i = 0; i < NELEM(m->e); i++) {
if(m->e[i] == 0)
continue;
......@@ -411,7 +571,6 @@ vmap_copy(struct vmap *m, int share)
}
__sync_fetch_and_add(&c->e[i]->n->ref, 1);
}
#endif
if (share)
lcr3(v2p(m->pml4)); // Reload hardware page table
......@@ -419,123 +578,11 @@ vmap_copy(struct vmap *m, int share)
return c;
}
static struct vma *
pagefault_ondemand(struct vmap *vmap, uptr va, u32 err, struct vma *m)
{
if (vmn_doallocpg(m->n) < 0) {
panic("pagefault: couldn't allocate pages");
}
release(&m->lock);
if (vmn_doload(m->n, m->n->ip, m->n->offset, m->n->sz) < 0) {
panic("pagefault: couldn't load");
}
m = vmap_lookup(vmap, va, 1);
if (!m)
panic("pagefault_ondemand");
acquire(&m->lock); // re-acquire lock on m
return m;
}
static int
pagefault_wcow(struct vmap *vmap, uptr va, pme_t *pte, struct vma *m, u64 npg)
{
// Always make a copy of n, even if this process has the only ref,
// because other processes may change ref count while this process
// is handling wcow.
struct vmnode *n = m->n;
struct vmnode *c = vmn_copy(m->n);
if (c == 0) {
cprintf("pagefault_wcow: out of mem\n");
return -1;
}
c->ref = 1;
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma
updatepages(vmap->pml4, (void *) m->va_start, (void *) m->va_end, 0);
pte = walkpgdir(vmap->pml4, (const void *)va, 0);
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
// drop my ref to vmnode
vmn_decref(n);
return 0;
}
int
pagefault(struct vmap *vmap, uptr va, u32 err)
{
pme_t *pte = walkpgdir(vmap->pml4, (const void *)va, 1);
// optimize checks of args to syscals
if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W))
return 0;
rcu_begin_read();
struct vma *m = vmap_lookup(vmap, va, 1);
if (m == 0) {
rcu_end_read();
return -1;
}
acquire(&m->lock);
u64 npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE;
if (m->n && m->n->type == ONDEMAND && m->n->page[npg] == 0)
m = pagefault_ondemand(vmap, va, err, m);
if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(vmap, va, pte, m, npg) < 0) {
release(&m->lock);
rcu_end_read();
return -1;
}
} else if (m->va_type == COW) {
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
if (m->n->ref != 1)
panic("pagefault");
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
// XXX(sbw) Why reload hardware page tables?
lcr3(v2p(vmap->pml4)); // Reload hardware page tables
release(&m->lock);
rcu_end_read();
return 1;
}
// Load a program segment into a vmnode.
int
vmn_load(struct vmnode *vmn, struct inode *ip, u64 offset, u64 sz)
{
if (vmn->type == ONDEMAND) {
vmn->ip = ip;
vmn->offset = offset;
vmn->sz = sz;
return 0;
} else {
return vmn_doload(vmn, ip, offset, sz);
}
}
int
vmap_remove(struct vmap *m, uptr va_start, u64 len)
{
acquire(&m->lock);
uptr va_end = va_start + len;
#ifdef TREE
struct kv *kv = tree_find_gt(m->root, va_start);
if (kv == 0)
panic("no vma?");
struct vma *e = (struct vma *) kv->val;
if(e->va_start != va_start || e->va_end != va_end) {
cprintf("vmap_remove: partial unmap unsupported\n");
release(&m->lock);
return -1;
}
m->root = tree_remove(m->root, va_start+len);
rcu_delayed(e, vma_free);
#else
for(int i = 0; i < NELEM(m->e); i++) {
if(m->e[i] && (m->e[i]->va_start < va_end && m->e[i]->va_end > va_start)) {
if(m->e[i]->va_start != va_start || m->e[i]->va_end != va_end) {
......@@ -547,7 +594,7 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len)
m->e[i] = 0;
}
}
#endif
release(&m->lock);
return 0;
}
#endif
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论