First pass at some per-thread pgtable plumbing

上级 cbe04cd9
......@@ -6,6 +6,6 @@
#define AT_FDCWD -100
#define FORK_SHARE_VMAP (1<<0)
#define FORK_SHARE_FD (1<<1)
#define FORK_SHARE_VMAP (1<<0)
#define FORK_SHARE_FD (1<<1)
#define FORK_SEPARATE_PGMAP (1<<2)
......@@ -28,6 +28,7 @@ static inline void *p2v(uptr a) {
return (u8 *) a + KBASE;
}
struct proc_pgmap;
struct trapframe;
struct spinlock;
struct condvar;
......@@ -248,7 +249,7 @@ void uartintr(void);
// vm.c
void switchvm(struct proc*);
int pagefault(struct vmap*, uptr, u32);
int pagefault(struct vmap*, uptr, u32, proc_pgmap* pgmap);
void* pagelookup(struct vmap*, uptr);
// zalloc.cc
......
......@@ -10,6 +10,28 @@
class uwq;
class uwq_worker;
struct pgmap;
#if 0
// This should be per-address space
if (mapkva(pml4, kshared, KSHARED, KSHAREDSIZE)) {
cprintf("vmap::vmap: mapkva out of memory\n");
goto err;
}
#endif
struct proc_pgmap : public referenced {
pgmap* const pml4;
static proc_pgmap* alloc();
virtual void onzero() const { delete this; }
proc_pgmap& operator=(const proc_pgmap&) = delete;
proc_pgmap(const proc_pgmap& x) = delete;
private:
proc_pgmap();
~proc_pgmap();
NEW_DELETE_OPS(proc_pgmap)
};
// Saved registers for kernel context switches.
// (also implicitly defined in swtch.S)
......@@ -93,6 +115,7 @@ struct proc : public rcu_freed, public sched_link {
std::atomic<int> exception_inuse;
u8 exception_buf[256];
u64 magic;
proc_pgmap* pgmap;
static proc* alloc();
void set_state(procstate_t s);
......
......@@ -35,7 +35,8 @@ struct uwq_worker {
struct uwq : public referenced, public rcu_freed {
friend struct uwq_worker;
static uwq* alloc(vmap* vmap, filetable *ftable, uptr uentry);
static uwq* alloc(proc_pgmap* pgmap, vmap* vmap,
filetable *ftable, uptr uentry);
bool haswork() const;
bool tryworker();
......@@ -45,7 +46,8 @@ protected:
virtual void onzero() const;
private:
uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf *ipc, uptr uentry);
uwq(proc_pgmap* pgmap, vmap* vmap,
filetable* ftable, uwq_ipcbuf *ipc, uptr uentry);
~uwq();
uwq& operator=(const uwq&);
uwq(const uwq& x);
......@@ -54,6 +56,7 @@ private:
NEW_DELETE_OPS(uwq);
struct spinlock lock_;
proc_pgmap *pgmap_;
vmap* vmap_;
filetable* ftable_;
uwq_ipcbuf* ipc_;
......
......@@ -105,7 +105,6 @@ struct vmap {
static vmap* alloc();
atomic<u64> ref;
pgmap *const pml4; // Page table
char *const kshared;
bool replace_vma(vma *a, vma *b);
......@@ -113,12 +112,12 @@ struct vmap {
void decref();
void incref();
vmap* copy(int share);
vmap* copy(int share, proc_pgmap* pgmap);
vma* lookup(uptr start, uptr len);
long insert(vmnode *n, uptr va_start, int dotlb);
int remove(uptr start, uptr len);
long insert(vmnode *n, uptr va_start, int dotlb, proc_pgmap* pgmap);
int remove(uptr start, uptr len, proc_pgmap* pgmap);
int pagefault(uptr va, u32 err);
int pagefault(uptr va, u32 err, proc_pgmap* pgmap);
void* pagelookup(uptr va);
int copyout(uptr va, void *p, u64 len);
int sbrk(ssize_t n, uptr *addr);
......@@ -131,7 +130,7 @@ private:
vmap& operator=(const vmap&);
~vmap();
NEW_DELETE_OPS(vmap)
int pagefault_wcow(vma *m);
int pagefault_wcow(vma *m, proc_pgmap* pgmap);
uptr unmapped_area(size_t n);
struct spinlock brklock_;
......
......@@ -201,7 +201,9 @@ panic(const char *fmt, ...)
cli();
acquire(&cons.lock);
__cprintf("cpu%d: panic: ", mycpu()->id);
__cprintf("cpu%d-%s: panic: ",
mycpu()->id,
myproc() ? myproc()->name : "(unknown)");
va_start(ap, fmt);
vprintfmt(writecons, 0, fmt, ap);
va_end(ap);
......
......@@ -45,7 +45,7 @@ dosegment(inode* ip, vmap* vmp, u64 off)
if (node == nullptr)
return -1;
if (vmp->insert(node, va_start, 1) < 0) {
if (vmp->insert(node, va_start, 1, nullptr) < 0) {
delete node;
return -1;
}
......@@ -62,7 +62,7 @@ dosegment(inode* ip, vmap* vmp, u64 off)
if (node == nullptr)
return -1;
if (vmp->insert(node, backed_end, 1) < 0) {
if (vmp->insert(node, backed_end, 1, nullptr) < 0) {
delete node;
return -1;
}
......@@ -90,7 +90,7 @@ dostack(vmap* vmp, char** argv, const char* path)
// Allocate a one-page stack at the top of the (user) address space
if((vmn = new vmnode(USTACKPAGES)) == 0)
return -1;
if(vmp->insert(vmn, USERTOP-(USTACKPAGES*PGSIZE), 1) < 0)
if(vmp->insert(vmn, USERTOP-(USTACKPAGES*PGSIZE), 1, nullptr) < 0)
return -1;
for (argc = 0; argv[argc]; argc++)
......@@ -126,7 +126,7 @@ doheap(vmap* vmp)
if((vmn = new vmnode(32)) == nullptr)
return -1;
if(vmp->insert(vmn, BRK, 1) < 0)
if(vmp->insert(vmn, BRK, 1, nullptr) < 0)
return -1;
vmp->brk_ = BRK + 8; // XXX so that brk-1 points within heap vma..
......@@ -134,11 +134,12 @@ doheap(vmap* vmp)
}
static void
exec_cleanup(vmap *oldvmap, uwq *olduwq)
exec_cleanup(vmap *oldvmap, uwq *olduwq, proc_pgmap* oldpgmap)
{
if (olduwq != nullptr)
olduwq->dec();
oldvmap->decref();
oldpgmap->dec();
}
int
......@@ -147,11 +148,13 @@ exec(const char *path, char **argv, void *ascopev)
ANON_REGION(__func__, &perfgroup);
struct inode *ip = nullptr;
struct vmap *vmp = nullptr;
proc_pgmap *pgmap = nullptr;
const char *s, *last;
struct elfhdr elf;
struct proghdr ph;
u64 off;
int i;
proc_pgmap* oldpgmap;
vmap* oldvmap;
uwq* olduwq;
cwork* w;
......@@ -178,6 +181,8 @@ exec(const char *path, char **argv, void *ascopev)
if((vmp = vmap::alloc()) == 0)
goto bad;
if ((pgmap = proc_pgmap::alloc()) == 0)
goto bad;
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
Elf64_Word type;
......@@ -207,6 +212,9 @@ exec(const char *path, char **argv, void *ascopev)
// Commit to the user image.
oldvmap = myproc()->vmap;
olduwq = myproc()->uwq;
oldpgmap = myproc()->pgmap;
myproc()->pgmap = pgmap;
myproc()->vmap = vmp;
myproc()->tf->rip = elf.entry;
myproc()->tf->rsp = sp;
......@@ -224,6 +232,7 @@ exec(const char *path, char **argv, void *ascopev)
w->rip = (void*) exec_cleanup;
w->arg0 = oldvmap;
w->arg1 = olduwq;
w->arg2 = oldpgmap;
assert(wqcrit_push(w, myproc()->data_cpuid) >= 0);
myproc()->data_cpuid = myid();
......
......@@ -158,8 +158,8 @@ switchvm(struct proc *p)
ltr(TSSSEG);
u64 nreq = tlbflush_req.load();
if (p->vmap != 0 && p->vmap->pml4 != 0)
lcr3(v2p(p->vmap->pml4)); // switch to new address space
if (p->pgmap != 0 && p->pgmap->pml4 != 0)
lcr3(v2p(p->pgmap->pml4)); // switch to new address space
else
switchkvm();
mycpu()->tlbflush_done = nreq;
......
......@@ -30,6 +30,26 @@ struct kstack_tag kstack_tag[NCPU];
enum { sched_debug = 0 };
proc_pgmap::proc_pgmap(void)
: pml4(setupkvm())
{
if (pml4 == nullptr) {
cprintf("proc_pgmap::proc_pgmap: setupkvm out of memory\n");
throw_bad_alloc();
}
}
proc_pgmap*
proc_pgmap::alloc(void)
{
return new proc_pgmap();
}
proc_pgmap::~proc_pgmap(void)
{
freevm(pml4);
}
proc::proc(int npid) :
rcu_freed("proc"), vmap(0), uwq(0), worker(0), kstack(0),
pid(npid), parent(0), tf(0), context(0), killed(0),
......@@ -37,7 +57,7 @@ proc::proc(int npid) :
cpu_pin(0), oncv(0), cv_wakeup(0),
user_fs_(0), unmap_tlbreq_(0), data_cpuid(-1), in_exec_(0),
uaccess_(0), upath(0), uargv(userptr<const char>(nullptr)),
exception_inuse(0), magic(PROC_MAGIC), state_(EMBRYO)
exception_inuse(0), magic(PROC_MAGIC), pgmap(0), state_(EMBRYO)
{
snprintf(lockname, sizeof(lockname), "cv:proc:%d", pid);
initlock(&lock, lockname+3, LOCKSTAT_PROC);
......@@ -420,7 +440,7 @@ fork(int flags)
struct proc *np;
int cow = 1;
// cprintf("%d: fork\n", myproc()->pid);
//cprintf("%d: fork\n", myproc()->pid);
// Allocate process.
if((np = proc::alloc()) == 0)
......@@ -435,9 +455,16 @@ fork(int flags)
if(flags & FORK_SHARE_VMAP) {
np->vmap = myproc()->vmap;
np->vmap->ref++;
if (flags & FORK_SEPARATE_PGMAP) {
panic("fork: FORK_SEPARATE_PGMAP");
} else {
np->pgmap = myproc()->pgmap;
myproc()->pgmap->inc();
}
} else {
// Copy process state from p.
np->vmap = myproc()->vmap->copy(cow);
np->vmap = myproc()->vmap->copy(cow, myproc()->pgmap);
np->pgmap = proc_pgmap::alloc();
}
np->parent = myproc();
......@@ -479,6 +506,8 @@ finishproc(struct proc *p, bool removepid)
panic("finishproc: ns_remove");
if (p->vmap != nullptr)
p->vmap->decref();
if (p->pgmap != nullptr)
p->pgmap->dec();
if (p->uwq != nullptr)
p->uwq->dec();
p->pid = 0;
......
......@@ -22,7 +22,7 @@ script_mmap_work(void *a0, void *a1, void *a2, void *a3)
if(vmn == 0)
panic("sys_script: new vmnode");
if(p->vmap->insert(vmn, PGROUNDDOWN((u64)addr), 0) < 0)
if(p->vmap->insert(vmn, PGROUNDDOWN((u64)addr), 0, p->pgmap) < 0)
panic("sys_script: insert");
*donep += 1;
......
......@@ -57,7 +57,7 @@ argcheckstr(const char *addr)
const char *s = addr;
while(1){
if(pagefault(myproc()->vmap, (uptr) s, 0) < 0)
if(pagefault(myproc()->vmap, (uptr) s, 0, myproc()->pgmap) < 0)
return -1;
if(*s == 0)
return s - addr;
......@@ -74,7 +74,7 @@ argcheckptr(const void *p, int size)
{
u64 i = (u64) p;
for(uptr va = PGROUNDDOWN(i); va < i+size; va = va + PGSIZE)
if(pagefault(myproc()->vmap, va, 0) < 0)
if(pagefault(myproc()->vmap, va, 0, myproc()->pgmap) < 0)
return -1;
return 0;
}
......
......@@ -79,7 +79,7 @@ sys_pread(int fd, void *ubuf, size_t count, off_t offset)
// while pread executes
uptr i = (uptr)ubuf;
for(uptr va = PGROUNDDOWN(i); va < i+count; va = va + PGSIZE)
if(pagefault(myproc()->vmap, va, 0) < 0)
if(pagefault(myproc()->vmap, va, 0, myproc()->pgmap) < 0)
return -1;
return f->pread((char*)ubuf, count, offset);
}
......@@ -117,7 +117,7 @@ sys_pwrite(int fd, const void *ubuf, size_t count, off_t offset)
// while pread executes
uptr i = (uptr)ubuf;
for(uptr va = PGROUNDDOWN(i); va < i+count; va = va + PGSIZE)
if(pagefault(myproc()->vmap, va, 0) < 0)
if(pagefault(myproc()->vmap, va, 0, myproc()->pgmap) < 0)
return -1;
return f->pwrite((char*)ubuf, count, offset);
}
......
......@@ -139,7 +139,7 @@ sys_mmap(userptr<void> addr, size_t len, int prot, int flags, int fd,
if (vmn == 0)
return MAP_FAILED;
uptr r = myproc()->vmap->insert(vmn, start, 1);
uptr r = myproc()->vmap->insert(vmn, start, 1, myproc()->pgmap);
if (r < 0) {
delete vmn;
return MAP_FAILED;
......@@ -162,7 +162,7 @@ sys_munmap(userptr<void> addr, size_t len)
uptr align_addr = PGROUNDDOWN(addr);
uptr align_len = PGROUNDUP(addr + len) - align_addr;
if (myproc()->vmap->remove(align_addr, align_len) < 0)
if (myproc()->vmap->remove(align_addr, align_len, myproc()->pgmap) < 0)
return -1;
return 0;
......
......@@ -52,7 +52,7 @@ do_pagefault(struct trapframe *tf)
panic("do_pagefault: %lx", addr);
sti();
if(pagefault(myproc()->vmap, addr, tf->err) >= 0){
if(pagefault(myproc()->vmap, addr, tf->err, myproc()->pgmap) >= 0){
#if MTRACE
mtstop(myproc());
if (myproc()->mtrace_stacks.curr >= 0)
......@@ -66,7 +66,7 @@ do_pagefault(struct trapframe *tf)
return 0;
} else if (tf->err & FEC_U) {
sti();
if(pagefault(myproc()->vmap, addr, tf->err) >= 0){
if(pagefault(myproc()->vmap, addr, tf->err, myproc()->pgmap) >= 0){
#if MTRACE
mtstop(myproc());
if (myproc()->mtrace_stacks.curr >= 0)
......
......@@ -29,10 +29,12 @@ inituser(void)
bootproc = p;
if((p->vmap = vmap::alloc()) == 0)
panic("userinit: out of vmaps?");
if ((p->pgmap = proc_pgmap::alloc()) == 0)
panic("inituser: alloc proc_pgmap");
vmnode *vmn = new vmnode(PGROUNDUP(_initcode_size) / PGSIZE);
if(vmn == 0)
panic("userinit: vmn_allocpg");
if(p->vmap->insert(vmn, INIT_START, 1) < 0)
if(p->vmap->insert(vmn, INIT_START, 1, nullptr) < 0)
panic("userinit: vmap_insert");
if(p->vmap->copyout(INIT_START, _initcode_start, _initcode_size) < 0)
panic("userinit: copyout");
......
......@@ -59,7 +59,7 @@ sys_wqinit(uptr uentry)
if (myproc()->uwq != nullptr)
return -1;
uwq = uwq::alloc(myproc()->vmap, myproc()->ftable, uentry);
uwq = uwq::alloc(myproc()->pgmap, myproc()->vmap, myproc()->ftable, uentry);
if (uwq == nullptr)
return -1;
......@@ -118,7 +118,7 @@ uwq_worker::wait(void)
// uwq
//
uwq*
uwq::alloc(vmap* vmap, filetable *ftable, uptr uentry)
uwq::alloc(proc_pgmap* pgmap, vmap* vmap, filetable *ftable, uptr uentry)
{
uwq_ipcbuf* ipc;
uwq* u;
......@@ -129,16 +129,18 @@ uwq::alloc(vmap* vmap, filetable *ftable, uptr uentry)
ftable->incref();
vmap->incref();
pgmap->inc();
u = new uwq(vmap, ftable, ipc, uentry);
u = new uwq(pgmap, vmap, ftable, ipc, uentry);
if (u == nullptr) {
ftable->decref();
vmap->decref();
pgmap->dec();
ksfree(slab_userwq, ipc);
return nullptr;
}
if (mapkva(vmap->pml4, (char*)ipc, USERWQ, USERWQSIZE)) {
if (mapkva(pgmap->pml4, (char*)ipc, USERWQ, USERWQSIZE)) {
u->dec();
return nullptr;
}
......@@ -146,9 +148,10 @@ uwq::alloc(vmap* vmap, filetable *ftable, uptr uentry)
return u;
}
uwq::uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf* ipc, uptr uentry)
uwq::uwq(proc_pgmap* pgmap, vmap* vmap,
filetable* ftable, uwq_ipcbuf* ipc, uptr uentry)
: rcu_freed("uwq"),
vmap_(vmap), ftable_(ftable), ipc_(ipc),
pgmap_(pgmap), vmap_(vmap), ftable_(ftable), ipc_(ipc),
uentry_(uentry), ustack_(UWQSTACK), uref_(0)
{
for (int i = 0; i < NELEM(ipc_->len); i++)
......@@ -163,6 +166,7 @@ uwq::uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf* ipc, uptr uentry)
uwq::~uwq(void)
{
ksfree(slab_userwq, ipc_);
pgmap_->dec();
vmap_->decref();
ftable_->decref();
}
......@@ -298,7 +302,7 @@ uwq::allocworker(void)
// Include a bumper page
uptr ustack = ustack_.fetch_add((USTACKPAGES*PGSIZE)+PGSIZE);
uptr stacktop = ustack + (USTACKPAGES*PGSIZE);
if (vmap_->insert(vmn, ustack, 1) < 0) {
if (vmap_->insert(vmn, ustack, 1, pgmap_) < 0) {
delete vmn;
finishproc(p);
return nullptr;
......
......@@ -210,32 +210,20 @@ vmap::vmap() :
#if VM_RADIX
vmas(PGSHIFT),
#endif
ref(1), pml4(setupkvm()), kshared((char*) ksalloc(slab_kshared)),
brk_(0)
ref(1), kshared((char*) ksalloc(slab_kshared)), brk_(0)
{
initlock(&brklock_, "brk_lock", LOCKSTAT_VM);
if (pml4 == 0) {
cprintf("vmap_alloc: setupkvm out of memory\n");
goto err;
}
if (kshared == nullptr) {
cprintf("vmap::vmap: kshared out of memory\n");
goto err;
}
if (mapkva(pml4, kshared, KSHARED, KSHAREDSIZE)) {
cprintf("vmap::vmap: mapkva out of memory\n");
goto err;
}
return;
err:
if (kshared)
ksfree(slab_kshared, kshared);
if (pml4)
freevm(pml4);
throw_bad_alloc();
}
......@@ -243,8 +231,6 @@ vmap::~vmap()
{
if (kshared)
ksfree(slab_kshared, kshared);
if (pml4)
freevm(pml4);
destroylock(&brklock_);
}
......@@ -286,7 +272,7 @@ vmap::replace_vma(vma *a, vma *b)
}
vmap*
vmap::copy(int share)
vmap::copy(int share, proc_pgmap* pgmap)
{
vmap *nm = new vmap();
......@@ -324,7 +310,7 @@ vmap::copy(int share)
#elif VM_CRANGE
replace_vma(e, repl);
#endif
updatepages(pml4, range_start, range_end, [](atomic<pme_t>* p) {
updatepages(pgmap->pml4, range_start, range_end, [](atomic<pme_t>* p) {
for (;;) {
pme_t v = p->load();
if (v & PTE_LOCK)
......@@ -402,7 +388,7 @@ vmap::lookup(uptr start, uptr len)
}
long
vmap::insert(vmnode *n, uptr vma_start, int dotlb)
vmap::insert(vmnode *n, uptr vma_start, int dotlb, proc_pgmap* pgmap)
{
ANON_REGION("vmap::insert", &perfgroup);
......@@ -471,7 +457,9 @@ again:
bool needtlb = false;
if (replaced)
updatepages(pml4, e->vma_start, e->vma_end, [&needtlb](atomic<pme_t> *p) {
updatepages(pgmap->pml4, e->vma_start, e->vma_end,
[&needtlb](atomic<pme_t> *p)
{
for (;;) {
pme_t v = p->load();
if (v & PTE_LOCK)
......@@ -496,7 +484,7 @@ again:
}
int
vmap::remove(uptr vma_start, uptr len)
vmap::remove(uptr vma_start, uptr len, proc_pgmap* pgmap)
{
{
// new scope to release the search lock before tlbflush
......@@ -527,7 +515,7 @@ vmap::remove(uptr vma_start, uptr len)
}
bool needtlb = false;
updatepages(pml4, vma_start, vma_start + len, [&needtlb](atomic<pme_t> *p) {
updatepages(pgmap->pml4, vma_start, vma_start + len, [&needtlb](atomic<pme_t> *p) {
for (;;) {
pme_t v = p->load();
if (v & PTE_LOCK)
......@@ -555,7 +543,7 @@ vmap::remove(uptr vma_start, uptr len)
*/
int
vmap::pagefault_wcow(vma *m)
vmap::pagefault_wcow(vma *m, proc_pgmap* pgmap)
{
// Always make a copy of n, even if this process has the only ref,
// because other processes may change ref count while this process
......@@ -572,7 +560,7 @@ vmap::pagefault_wcow(vma *m)
// and space copying pages that are no longer mapped, but will only
// do that once. Fixing this requires getting rid of the vmnode.
replace_vma(m, repl);
updatepages(pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) {
updatepages(pgmap->pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) {
// XXX(austin) In radix, this may clear PTEs belonging to other
// VMAs that have replaced sub-ranges of the faulting VMA.
// That's unfortunate but okay because we'll just bring them
......@@ -591,12 +579,15 @@ vmap::pagefault_wcow(vma *m)
}
int
vmap::pagefault(uptr va, u32 err)
vmap::pagefault(uptr va, u32 err, proc_pgmap* pgmap)
{
if (pgmap == nullptr)
panic("vmap::pagefault no pgmap");
if (va >= USERTOP)
return -1;
atomic<pme_t> *pte = walkpgdir(pml4, va, 1);
atomic<pme_t> *pte = walkpgdir(pgmap->pml4, va, 1);
if (pte == nullptr)
throw_bad_alloc();
......@@ -630,7 +621,7 @@ vmap::pagefault(uptr va, u32 err)
err, va, m->va_type, m->n->ref(), myproc()->pid);
if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(m) < 0)
if (pagefault_wcow(m, pgmap) < 0)
return -1;
if (tlb_shootdown) {
......@@ -686,7 +677,7 @@ vmap::pagefault(uptr va, u32 err)
}
int
pagefault(vmap *vmap, uptr va, u32 err)
pagefault(vmap *vmap, uptr va, u32 err, proc_pgmap* pgmap)
{
#if MTRACE
mt_ascope ascope("%s(%#lx)", __func__, va);
......@@ -697,7 +688,7 @@ pagefault(vmap *vmap, uptr va, u32 err)
#if EXCEPTIONS
try {
#endif
return vmap->pagefault(va, err);
return vmap->pagefault(va, err, pgmap);
#if EXCEPTIONS
} catch (retryable& e) {
cprintf("%d: pagefault retry\n", myproc()->pid);
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论