提交 be083757 创建 作者: Nickolai Zeldovich's avatar Nickolai Zeldovich

try to handle COW correctly by replacing the vma in crange, rather

than modifying the vma's type and vmnode inplace.
上级 ea416994
...@@ -33,14 +33,14 @@ struct vmnode { ...@@ -33,14 +33,14 @@ struct vmnode {
enum vmatype { PRIVATE, COW }; enum vmatype { PRIVATE, COW };
struct vma : public range { struct vma : public range {
uptr vma_start; // start of mapping const uptr vma_start; // start of mapping
uptr vma_end; // one past the last byte const uptr vma_end; // one past the last byte
enum vmatype va_type; const enum vmatype va_type;
struct vmnode *n; struct vmnode *n;
struct spinlock lock; // serialize fault/unmap struct spinlock lock; // serialize fault/unmap
char lockname[16]; char lockname[16];
vma(vmap *vmap, uptr start, uptr end); vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype);
~vma(); ~vma();
virtual void do_gc() { delete this; } virtual void do_gc() { delete this; }
...@@ -53,13 +53,15 @@ struct vmap { ...@@ -53,13 +53,15 @@ struct vmap {
struct spinlock lock; // serialize map/lookup/unmap struct spinlock lock; // serialize map/lookup/unmap
atomic<u64> ref; atomic<u64> ref;
u64 alloc; u64 alloc;
pgmap *pml4; // Page table pgmap *const pml4; // Page table
char *kshared; char *const kshared;
char lockname[16]; char lockname[16];
vmap(); vmap();
~vmap(); ~vmap();
bool replace_vma(vma *a, vma *b);
void decref(); void decref();
vmap* copy(int share); vmap* copy(int share);
vma* lookup(uptr start, uptr len); vma* lookup(uptr start, uptr len);
......
...@@ -119,9 +119,9 @@ vmnode::load(inode *iparg, u64 offarg, u64 szarg) ...@@ -119,9 +119,9 @@ vmnode::load(inode *iparg, u64 offarg, u64 szarg)
* vma * vma
*/ */
vma::vma(vmap *vmap, uptr start, uptr end) vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype)
: range(&vmap->cr, start, end-start), : range(&vmap->cr, start, end-start),
vma_start(start), vma_end(end), va_type(PRIVATE), n(0) vma_start(start), vma_end(end), va_type(vtype), n(0)
{ {
snprintf(lockname, sizeof(lockname), "vma:%p", this); snprintf(lockname, sizeof(lockname), "vma:%p", this);
initlock(&lock, lockname, LOCKSTAT_VM); initlock(&lock, lockname, LOCKSTAT_VM);
...@@ -139,39 +139,36 @@ vma::~vma() ...@@ -139,39 +139,36 @@ vma::~vma()
*/ */
vmap::vmap() vmap::vmap()
: cr(10) : cr(10), pml4(setupkvm()), kshared((char*) ksalloc(slab_kshared))
{ {
snprintf(lockname, sizeof(lockname), "vmap:%p", this); snprintf(lockname, sizeof(lockname), "vmap:%p", this);
initlock(&lock, lockname, LOCKSTAT_VM); initlock(&lock, lockname, LOCKSTAT_VM);
ref = 1; ref = 1;
alloc = 0; alloc = 0;
kshared = 0;
pml4 = setupkvm();
if (pml4 == 0) { if (pml4 == 0) {
cprintf("vmap_alloc: setupkvm out of memory\n"); cprintf("vmap_alloc: setupkvm out of memory\n");
goto err0; goto err;
} }
kshared = (char*) ksalloc(slab_kshared);
if (kshared == NULL) { if (kshared == NULL) {
cprintf("vmap::vmap: kshared out of memory\n"); cprintf("vmap::vmap: kshared out of memory\n");
goto err1; goto err;
} }
if (setupkshared(pml4, kshared)) { if (setupkshared(pml4, kshared)) {
cprintf("vmap::vmap: setupkshared out of memory\n"); cprintf("vmap::vmap: setupkshared out of memory\n");
goto err2; goto err;
} }
return; return;
err2: err:
ksfree(slab_kshared, kshared); if (kshared)
err1: ksfree(slab_kshared, kshared);
freevm(pml4); if (pml4)
err0: freevm(pml4);
destroylock(&lock); destroylock(&lock);
} }
...@@ -192,6 +189,18 @@ vmap::decref() ...@@ -192,6 +189,18 @@ vmap::decref()
delete this; delete this;
} }
bool
vmap::replace_vma(vma *a, vma *b)
{
auto span = cr.search_lock(a->vma_start, a->vma_end - a->vma_start);
if (a->deleted())
return false;
for (auto e: span)
assert(a == e);
span.replace(b);
return true;
}
vmap* vmap*
vmap::copy(int share) vmap::copy(int share)
{ {
...@@ -206,25 +215,33 @@ vmap::copy(int share) ...@@ -206,25 +215,33 @@ vmap::copy(int share)
vma *e = (vma *) r; vma *e = (vma *) r;
scoped_acquire sae(&e->lock); scoped_acquire sae(&e->lock);
struct vma *ne = new vma(nm, e->vma_start, e->vma_end); struct vma *ne = new vma(nm, e->vma_start, e->vma_end, share ? COW : PRIVATE);
if (ne == 0) if (ne == 0)
goto err; goto err;
if (share) { if (share) {
ne->n = e->n; ne->n = e->n;
ne->va_type = COW;
e->va_type = COW; // if the original vma wasn't COW, replace it with a COW vma
updatepages(pml4, e->vma_start, e->vma_end, [](atomic<pme_t>* p) { if (e->va_type != COW) {
for (;;) { vma *repl = new vma(this, e->vma_start, e->vma_end, COW);
pme_t v = p->load(); repl->n = e->n;
if (!(v & PTE_P) || !(v & PTE_U) || !(v & PTE_W) || repl->n->ref++;
cmpxch(p, v, PTE_ADDR(v) | PTE_P | PTE_U | PTE_COW))
break; replace_vma(e, repl);
} updatepages(pml4, e->vma_start, e->vma_end, [](atomic<pme_t>* p) {
}); for (;;) {
pme_t v = p->load();
if (v & PTE_LOCK)
continue;
if (!(v & PTE_P) || !(v & PTE_U) || !(v & PTE_W) ||
cmpxch(p, v, PTE_ADDR(v) | PTE_P | PTE_U | PTE_COW))
break;
}
});
}
} else { } else {
ne->n = e->n->copy(); ne->n = e->n->copy();
ne->va_type = e->va_type;
} }
if (ne->n == 0) if (ne->n == 0)
...@@ -290,7 +307,7 @@ vmap::insert(vmnode *n, uptr vma_start) ...@@ -290,7 +307,7 @@ vmap::insert(vmnode *n, uptr vma_start)
// XXX handle overlaps // XXX handle overlaps
e = new vma(this, vma_start, vma_start+len); e = new vma(this, vma_start, vma_start+len, PRIVATE);
if (e == 0) { if (e == 0) {
cprintf("vmap::insert: out of vmas\n"); cprintf("vmap::insert: out of vmas\n");
return -1; return -1;
...@@ -373,34 +390,33 @@ vmap::pagefault_wcow(vma *m) ...@@ -373,34 +390,33 @@ vmap::pagefault_wcow(vma *m)
// Always make a copy of n, even if this process has the only ref, // Always make a copy of n, even if this process has the only ref,
// because other processes may change ref count while this process // because other processes may change ref count while this process
// is handling wcow. // is handling wcow.
struct vmnode *n = m->n; struct vmnode *nodecopy = m->n->copy();
struct vmnode *c = m->n->copy(); if (nodecopy == 0) {
if (c == 0) {
cprintf("pagefault_wcow: out of mem\n"); cprintf("pagefault_wcow: out of mem\n");
return -1; return -1;
} }
c->ref = 1;
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma vma *repl = new vma(this, m->vma_start, m->vma_end, PRIVATE);
repl->n = nodecopy;
nodecopy->ref = 1;
replace_vma(m, repl);
updatepages(pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) { updatepages(pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) {
for (;;) { for (;;) {
pme_t v = p->load(); pme_t v = p->load();
if (!(v & PTE_P) || cmpxch(p, v, v & ~PTE_P)) if (v & PTE_LOCK)
continue;
if (cmpxch(p, v, (pme_t) 0))
break; break;
} }
}); });
// drop my ref to vmnode
n->decref();
return 0; return 0;
} }
int int
vmap::pagefault(uptr va, u32 err) vmap::pagefault(uptr va, u32 err)
{ {
bool needflush = false;
atomic<pme_t> *pte = walkpgdir(pml4, va, 1); atomic<pme_t> *pte = walkpgdir(pml4, va, 1);
retry: retry:
...@@ -435,7 +451,10 @@ vmap::pagefault(uptr va, u32 err) ...@@ -435,7 +451,10 @@ vmap::pagefault(uptr va, u32 err)
if (m->va_type == COW && (err & FEC_WR)) { if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(m) < 0) if (pagefault_wcow(m) < 0)
return -1; return -1;
needflush = true;
mlock.release();
tlbflush();
goto retry;
} }
if (!cmpxch(pte, ptev, ptev | PTE_LOCK)) if (!cmpxch(pte, ptev, ptev | PTE_LOCK))
...@@ -453,9 +472,6 @@ vmap::pagefault(uptr va, u32 err) ...@@ -453,9 +472,6 @@ vmap::pagefault(uptr va, u32 err)
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W; *pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
} }
mlock.release();
if (needflush)
tlbflush();
return 1; return 1;
} }
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论