提交 5c24b9a8 创建 作者: Frans Kaashoek's avatar Frans Kaashoek

Copy-on-write fork

上级 d9b8b154
...@@ -174,13 +174,13 @@ struct vmap * vmap_alloc(void); ...@@ -174,13 +174,13 @@ struct vmap * vmap_alloc(void);
void vmap_decref(struct vmap *); void vmap_decref(struct vmap *);
int vmap_insert(struct vmap *, struct vmnode *n, uint); int vmap_insert(struct vmap *, struct vmnode *n, uint);
struct vma * vmap_lookup(struct vmap *, uint); struct vma * vmap_lookup(struct vmap *, uint);
struct vmap * vmap_copy(struct vmap *); struct vmap * vmap_copy(struct vmap *, pde_t*, int);
void freevm(pde_t*); void freevm(pde_t*);
void switchuvm(struct proc*); void switchuvm(struct proc*);
void switchkvm(void); void switchkvm(void);
int copyout(struct vmap *, uint, void*, uint); int copyout(struct vmap *, uint, void*, uint);
int copyin(struct vmap *, uint, void*, uint); int copyin(struct vmap *, uint, void*, uint);
int pagefault(pde_t*, struct vmap *, uint); int pagefault(pde_t*, struct vmap *, uint, uint);
// number of elements in fixed-size array // number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0])) #define NELEM(x) (sizeof(x)/sizeof((x)[0]))
......
...@@ -24,6 +24,11 @@ ...@@ -24,6 +24,11 @@
#define FL_VIP 0x00100000 // Virtual Interrupt Pending #define FL_VIP 0x00100000 // Virtual Interrupt Pending
#define FL_ID 0x00200000 // ID flag #define FL_ID 0x00200000 // ID flag
// Page fault error codes
#define FEC_PR 0x1 // Page fault caused by protection violation
#define FEC_WR 0x2 // Page fault caused by a write
#define FEC_U 0x4 // Page fault occured while in user mode
// Control Register flags // Control Register flags
#define CR0_PE 0x00000001 // Protection Enable #define CR0_PE 0x00000001 // Protection Enable
#define CR0_MP 0x00000002 // Monitor coProcessor #define CR0_MP 0x00000002 // Monitor coProcessor
...@@ -134,6 +139,7 @@ struct segdesc { ...@@ -134,6 +139,7 @@ struct segdesc {
#define PTE_D 0x040 // Dirty #define PTE_D 0x040 // Dirty
#define PTE_PS 0x080 // Page Size #define PTE_PS 0x080 // Page Size
#define PTE_MBZ 0x180 // Bits must be zero #define PTE_MBZ 0x180 // Bits must be zero
#define PTE_COW 0x800 // copy-on-write
// Address in page table or page directory entry // Address in page table or page directory entry
#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF) #define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF)
......
...@@ -203,6 +203,7 @@ fork(int flags) ...@@ -203,6 +203,7 @@ fork(int flags)
{ {
int i, pid; int i, pid;
struct proc *np; struct proc *np;
uint cow = 1;
// Allocate process. // Allocate process.
if((np = allocproc()) == 0) if((np = allocproc()) == 0)
...@@ -217,7 +218,7 @@ fork(int flags) ...@@ -217,7 +218,7 @@ fork(int flags)
if(flags == 0) { if(flags == 0) {
// Copy process state from p. // Copy process state from p.
if((np->vmap = vmap_copy(proc->vmap)) == 0){ if((np->vmap = vmap_copy(proc->vmap, proc->pgdir, cow)) == 0){
freevm(np->pgdir); freevm(np->pgdir);
kfree(np->kstack); kfree(np->kstack);
np->kstack = 0; np->kstack = 0;
...@@ -248,7 +249,6 @@ fork(int flags) ...@@ -248,7 +249,6 @@ fork(int flags)
acquire(&proc->lock); acquire(&proc->lock);
SLIST_INSERT_HEAD(&proc->childq, np, child_next); SLIST_INSERT_HEAD(&proc->childq, np, child_next);
release(&proc->lock); release(&proc->lock);
return pid; return pid;
} }
...@@ -366,9 +366,7 @@ steal(void) ...@@ -366,9 +366,7 @@ steal(void)
if (p->state != RUNNABLE) if (p->state != RUNNABLE)
panic("non-runnable proc on runq"); panic("non-runnable proc on runq");
if (p->curcycles > MINCYCTHRESH) { if (p->curcycles > MINCYCTHRESH) {
// cprintf("%d: steal %d (%d) from %d\n", cpunum(), p->pid, p->curcycles, c);
cprintf("%d: steal %d (%d) from %d\n", cpunum(), p->pid, p->curcycles, c);
delrun1(&runqs[c], p); delrun1(&runqs[c], p);
release(&runqs[c].lock); release(&runqs[c].lock);
p->curcycles = 0; p->curcycles = 0;
......
...@@ -30,18 +30,20 @@ struct context { ...@@ -30,18 +30,20 @@ struct context {
}; };
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
enum vmatype { PRIVATE, COW};
// Virtual memory // Virtual memory
struct vmnode { struct vmnode {
uint npages; uint npages;
char *page[32]; char *page[32];
uint ref; uint ref;
uint alloc; uint alloc; // in use?
}; };
struct vma { struct vma {
uint va_start; // start of mapping uint va_start; // start of mapping
uint va_end; // one past the last byte uint va_end; // one past the last byte
enum vmatype va_type;
struct vmnode *n; struct vmnode *n;
struct spinlock lock; // serialize fault/unmap struct spinlock lock; // serialize fault/unmap
}; };
......
...@@ -20,9 +20,9 @@ ...@@ -20,9 +20,9 @@
int int
fetchint(uint addr, int *ip) fetchint(uint addr, int *ip)
{ {
if(pagefault(proc->pgdir, proc->vmap, addr) < 0) if(pagefault(proc->pgdir, proc->vmap, addr, 0) < 0)
return -1; return -1;
if(pagefault(proc->pgdir, proc->vmap, addr+3) < 0) if(pagefault(proc->pgdir, proc->vmap, addr+3, 0) < 0)
return -1; return -1;
*ip = *(int*)(addr); *ip = *(int*)(addr);
return 0; return 0;
...@@ -37,7 +37,7 @@ fetchstr(uint addr, char **pp) ...@@ -37,7 +37,7 @@ fetchstr(uint addr, char **pp)
char *s = (char *) addr; char *s = (char *) addr;
while(1){ while(1){
if(pagefault(proc->pgdir, proc->vmap, (uint) s) < 0) if(pagefault(proc->pgdir, proc->vmap, (uint) s, 0) < 0)
return -1; return -1;
if(*s == 0){ if(*s == 0){
*pp = (char*)addr; *pp = (char*)addr;
...@@ -66,7 +66,7 @@ argptr(int n, char **pp, int size) ...@@ -66,7 +66,7 @@ argptr(int n, char **pp, int size)
if(argint(n, &i) < 0) if(argint(n, &i) < 0)
return -1; return -1;
for(uint va = PGROUNDDOWN(i); va < i+size; va = va + PGSIZE) for(uint va = PGROUNDDOWN(i); va < i+size; va = va + PGSIZE)
if(pagefault(proc->pgdir, proc->vmap, va) < 0) if(pagefault(proc->pgdir, proc->vmap, va, 0) < 0)
return -1; return -1;
*pp = (char*)i; *pp = (char*)i;
return 0; return 0;
......
...@@ -91,8 +91,7 @@ trap(struct trapframe *tf) ...@@ -91,8 +91,7 @@ trap(struct trapframe *tf)
} }
if(tf->trapno == T_PGFLT){ if(tf->trapno == T_PGFLT){
if(pagefault(proc->pgdir, proc->vmap, rcr2()) >= 0){ if(pagefault(proc->pgdir, proc->vmap, rcr2(), tf->err) >= 0){
switchuvm(proc);
return; return;
} }
} }
......
...@@ -104,6 +104,44 @@ mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) ...@@ -104,6 +104,44 @@ mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm)
return 0; return 0;
} }
static int
updatepages(pde_t *pgdir, void *begin, void *end, int perm)
{
char *a, *last;
pte_t *pte;
a = PGROUNDDOWN(begin);
last = PGROUNDDOWN(end);
for (;;) {
pte = walkpgdir(pgdir, a, 1);
if(pte != 0)
*pte = PTE_ADDR(*pte) | perm | PTE_P;
if (a == last)
break;
a += PGSIZE;
}
return 0;
}
static int
clearpages(pde_t *pgdir, void *begin, void *end)
{
char *a, *last;
pte_t *pte;
a = PGROUNDDOWN(begin);
last = PGROUNDDOWN(end);
for (;;) {
pte = walkpgdir(pgdir, a, 1);
if(pte != 0)
*pte = 0;
if (a == last)
break;
a += PGSIZE;
}
return 0;
}
// The mappings from logical to linear are one to one (i.e., // The mappings from logical to linear are one to one (i.e.,
// segmentation doesn't do anything). // segmentation doesn't do anything).
// There is one page table per process, plus one that's used // There is one page table per process, plus one that's used
...@@ -253,9 +291,11 @@ struct vmnode * ...@@ -253,9 +291,11 @@ struct vmnode *
vmn_copy(struct vmnode *n) vmn_copy(struct vmnode *n)
{ {
struct vmnode *c = vmn_allocpg(n->npages); struct vmnode *c = vmn_allocpg(n->npages);
if(c != 0) if(c != 0) {
for(uint i = 0; i < n->npages; i++) for(uint i = 0; i < n->npages; i++) {
memmove(c->page[i], n->page[i], PGSIZE); memmove(c->page[i], n->page[i], PGSIZE);
}
}
return c; return c;
} }
...@@ -267,6 +307,7 @@ vmap_alloc(void) ...@@ -267,6 +307,7 @@ vmap_alloc(void)
if(m->alloc == 0 && __sync_bool_compare_and_swap(&m->alloc, 0, 1)) { if(m->alloc == 0 && __sync_bool_compare_and_swap(&m->alloc, 0, 1)) {
for(uint j = 0; j < sizeof(m->e) / sizeof(m->e[0]); j++){ for(uint j = 0; j < sizeof(m->e) / sizeof(m->e[0]); j++){
m->e[j].n = 0; m->e[j].n = 0;
m->e[i].va_type = PRIVATE;
m->e[j].lock.name = "vma"; m->e[j].lock.name = "vma";
} }
m->lock.name = "vmap"; m->lock.name = "vmap";
...@@ -339,7 +380,7 @@ vmap_lookup(struct vmap *m, uint va) ...@@ -339,7 +380,7 @@ vmap_lookup(struct vmap *m, uint va)
} }
struct vmap * struct vmap *
vmap_copy(struct vmap *m) vmap_copy(struct vmap *m, pde_t* pgdir, int share)
{ {
struct vmap *c = vmap_alloc(); struct vmap *c = vmap_alloc();
if(c == 0) if(c == 0)
...@@ -351,7 +392,15 @@ vmap_copy(struct vmap *m) ...@@ -351,7 +392,15 @@ vmap_copy(struct vmap *m)
continue; continue;
c->e[i].va_start = m->e[i].va_start; c->e[i].va_start = m->e[i].va_start;
c->e[i].va_end = m->e[i].va_end; c->e[i].va_end = m->e[i].va_end;
if (share) {
c->e[i].n = m->e[i].n;
c->e[i].va_type = COW;
m->e[i].va_type = COW;
updatepages(pgdir, (void *) (m->e[i].va_start), (void *) (m->e[i].va_end), PTE_COW);
} else {
c->e[i].n = vmn_copy(m->e[i].n); c->e[i].n = vmn_copy(m->e[i].n);
c->e[i].va_type = m->e[i].va_type;
}
if(c->e[i].n == 0) { if(c->e[i].n == 0) {
release(&m->lock); release(&m->lock);
vmap_decref(c); vmap_decref(c);
...@@ -359,6 +408,9 @@ vmap_copy(struct vmap *m) ...@@ -359,6 +408,9 @@ vmap_copy(struct vmap *m)
} }
__sync_fetch_and_add(&c->e[i].n->ref, 1); __sync_fetch_and_add(&c->e[i].n->ref, 1);
} }
if (share)
lcr3(PADDR(pgdir)); // Reload hardware page table
release(&m->lock); release(&m->lock);
return c; return c;
} }
...@@ -466,8 +518,9 @@ copyin(struct vmap *vmap, uint va, void *p, uint len) ...@@ -466,8 +518,9 @@ copyin(struct vmap *vmap, uint va, void *p, uint len)
} }
int int
pagefault(pde_t *pgdir, struct vmap *vmap, uint va) pagefault(pde_t *pgdir, struct vmap *vmap, uint va, uint err)
{ {
pte_t *pte = walkpgdir(pgdir, (const void *)va, 1); pte_t *pte = walkpgdir(pgdir, (const void *)va, 1);
if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W)) if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W))
return 0; return 0;
...@@ -476,8 +529,39 @@ pagefault(pde_t *pgdir, struct vmap *vmap, uint va) ...@@ -476,8 +529,39 @@ pagefault(pde_t *pgdir, struct vmap *vmap, uint va)
if(m == 0) if(m == 0)
return -1; return -1;
// cprintf("%d: pf addr=0x%x err 0x%x check = %d\n", proc->pid, va, err, check);
// cprintf("%d: pf vma type = %d refcnt %d pte=0x%x\n", proc->pid, m->va_type, m->n->ref, *pte);
uint npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE; uint npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE;
if (m->va_type == COW && (err & FEC_WR)) {
// Write to a COW page
if (m->n->ref == 1) { // if vma isn't shared any more, make it private
m->va_type = PRIVATE;
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
} else { // vma is still shared; give process its private copy
struct vmnode *c = vmn_copy(m->n);
c->ref = 1;
__sync_sub_and_fetch(&m->n->ref, 1);
if (m->n->ref == 0)
panic("cow");
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma
clearpages(pgdir, (void *) m->va_start, (void *) m->va_end);
pte = walkpgdir(pgdir, (const void *)va, 0);
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
} else if (m->va_type == COW) {
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
if (m->n->ref > 1)
panic("pagefault");
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W; *pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
lcr3(PADDR(pgdir)); // Reload hardware page tables
release(&m->lock); release(&m->lock);
return 1; return 1;
} }
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论