提交 06047fd7 创建 作者: Nickolai Zeldovich's avatar Nickolai Zeldovich

Merge branch 'scale' of git+ssh://pdos.csail.mit.edu/home/am0/6.828/xv6 into scale

Conflicts: Makefile
......@@ -174,6 +174,7 @@ UPROGS=\
_zombie\
_halt\
_thrtest\
_sleep\
_maptest\
fs.img: mkfs README $(UPROGS)
......
......@@ -154,6 +154,7 @@ void idtinit(void);
extern uint ticks;
void tvinit(void);
extern struct spinlock tickslock;
extern struct condvar cv_ticks;
// uart.c
void uartinit(void);
......@@ -166,7 +167,8 @@ void kvmalloc(void);
void vmenable(void);
pde_t* setupkvm(void);
char* uva2ka(pde_t*, char*);
struct vmnode * vmn_allocpg(uint);
struct vmnode* vmn_alloc(uint, uint);
struct vmnode* vmn_allocpg(uint);
void vmn_free(struct vmnode *);
int vmn_load(struct vmnode *, struct inode*, uint, uint);
struct vmap * vmap_alloc(void);
......@@ -174,13 +176,13 @@ void vmap_decref(struct vmap *);
int vmap_insert(struct vmap *, struct vmnode *n, uint);
int vmap_remove(struct vmap *, uint va_start, uint len);
struct vma * vmap_lookup(struct vmap *, uint);
struct vmap * vmap_copy(struct vmap *);
struct vmap * vmap_copy(struct vmap *, pde_t*, int);
void freevm(pde_t*);
void switchuvm(struct proc*);
void switchkvm(void);
int copyout(struct vmap *, uint, void*, uint);
int copyin(struct vmap *, uint, void*, uint);
int pagefault(pde_t*, struct vmap *, uint);
int pagefault(pde_t*, struct vmap *, uint, uint);
// number of elements in fixed-size array
#define NELEM(x) (sizeof(x)/sizeof((x)[0]))
......
......@@ -21,6 +21,7 @@ exec(char *path, char **argv)
pde_t *pgdir = 0, *oldpgdir;
struct vmap *vmap = 0, *oldvmap;
struct vmnode *vmn = 0;
int odp = 1;
if((ip = namei(path)) == 0)
return -1;
......@@ -58,16 +59,25 @@ exec(char *path, char **argv)
brk = va_end;
int npg = (va_end - va_start) / PGSIZE;
if ((vmn = vmn_allocpg(npg)) == 0)
goto bad;
if (odp) {
if ((vmn = vmn_alloc(npg, ONDEMAND)) == 0)
goto bad;
} else {
if ((vmn = vmn_allocpg(npg)) == 0)
goto bad;
}
if(vmn_load(vmn, ip, ph.offset, ph.filesz) < 0)
goto bad;
if(vmap_insert(vmap, vmn, ph.va) < 0)
goto bad;
vmn = 0;
}
iunlockput(ip);
ip = 0;
if (odp)
iunlock(ip);
else {
iunlockput(ip);
ip = 0;
}
// Allocate a vmnode for the heap.
// XXX pre-allocate 32 pages..
......
......@@ -24,6 +24,11 @@
#define FL_VIP 0x00100000 // Virtual Interrupt Pending
#define FL_ID 0x00200000 // ID flag
// Page fault error codes
#define FEC_PR 0x1 // Page fault caused by protection violation
#define FEC_WR 0x2 // Page fault caused by a write
#define FEC_U 0x4 // Page fault occured while in user mode
// Control Register flags
#define CR0_PE 0x00000001 // Protection Enable
#define CR0_MP 0x00000002 // Monitor coProcessor
......@@ -134,6 +139,7 @@ struct segdesc {
#define PTE_D 0x040 // Dirty
#define PTE_PS 0x080 // Page Size
#define PTE_MBZ 0x180 // Bits must be zero
#define PTE_COW 0x800 // copy-on-write
// Address in page table or page directory entry
#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF)
......
#define NPROC 64 // maximum number of processes
#define KSTACKSIZE 4096 // size of per-process kernel stack
#define NCPU 8 // maximum number of CPUs
#define NCPU 2 // maximum number of CPUs
#define NOFILE 16 // open files per process
#define NFILE 100 // open files per system
#define NBUF 10 // size of disk block cache
......@@ -8,6 +8,7 @@
#define NDEV 10 // maximum major device number
#define ROOTDEV 1 // device number of file system root disk
#define USERTOP 0xA0000 // end of user address space
#define PHYSTOP 0x10000000 // use phys mem up to here as free pool
#define PHYSTOP 0x1000000 // use phys mem up to here as free pool
#define MAXARG 32 // max exec arguments
#define MAXNAME 16 // max string names
#define MINCYCTHRESH 1000000 // min cycles a proc executes on a core before allowed to be stolen
......@@ -61,6 +61,8 @@ allocproc(void)
found:
p->state = EMBRYO;
p->pid = ptable->nextpid++;
p->cpuid = cpu->id;
p->curcycles = 0;
release(&ptable->lock);
// Allocate kernel stack if possible.
......@@ -103,10 +105,10 @@ addrun1(struct runq *rq, struct proc *p)
void
addrun(struct proc *p)
{
acquire(&runq->lock);
acquire(&runqs[p->cpuid].lock);
// cprintf("%d: addrun %d\n", cpunum(), p->pid);
addrun1(runq, p);
release(&runq->lock);
addrun1(&runqs[p->cpuid], p);
release(&runqs[p->cpuid].lock);
}
static void
......@@ -201,6 +203,9 @@ fork(int flags)
{
int i, pid;
struct proc *np;
uint cow = 1;
// cprintf("%d: fork\n", proc->pid);
// Allocate process.
if((np = allocproc()) == 0)
......@@ -215,7 +220,7 @@ fork(int flags)
if(flags == 0) {
// Copy process state from p.
if((np->vmap = vmap_copy(proc->vmap)) == 0){
if((np->vmap = vmap_copy(proc->vmap, proc->pgdir, cow)) == 0){
freevm(np->pgdir);
kfree(np->kstack);
np->kstack = 0;
......@@ -246,7 +251,7 @@ fork(int flags)
acquire(&proc->lock);
SLIST_INSERT_HEAD(&proc->childq, np, child_next);
release(&proc->lock);
// cprintf("%d: fork done (pid %d)\n", proc->pid, pid);
return pid;
}
......@@ -363,12 +368,15 @@ steal(void)
STAILQ_FOREACH(p, &runqs[c].runq, run_next) {
if (p->state != RUNNABLE)
panic("non-runnable proc on runq");
// cprintf("%d: steal %d from %d\n", cpunum(), p->pid, c);
delrun1(&runqs[c], p);
release(&runqs[c].lock);
addrun(p);
return;
if (p->curcycles > MINCYCTHRESH) {
// cprintf("%d: steal %d (%d) from %d\n", cpunum(), p->pid, p->curcycles, c);
delrun1(&runqs[c], p);
release(&runqs[c].lock);
p->curcycles = 0;
p->cpuid = cpu->id;
addrun(p);
return;
}
}
release(&runqs[c].lock);
}
......@@ -417,6 +425,7 @@ scheduler(void)
proc = p;
switchuvm(p);
p->state = RUNNING;
p->tsc = rdtsc();
mtrace_fcall_register(pid, 0, 0, mtrace_pause);
mtrace_fcall_register(proc->pid, 0, 0, mtrace_resume);
......@@ -456,7 +465,7 @@ sched(void)
if(readeflags()&FL_IF)
panic("sched interruptible");
intena = cpu->intena;
proc->curcycles += rdtsc() - proc->tsc;
mtrace_fcall_register(proc->pid, 0, 0, mtrace_pause);
mtrace_call_set(0, cpunum());
swtch(&proc->context, cpu->scheduler);
......
......@@ -30,18 +30,25 @@ struct context {
};
enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
enum vmatype { PRIVATE, COW};
enum vmntype { EAGER, ONDEMAND};
// Virtual memory
struct vmnode {
uint npages;
char *page[32];
uint ref;
uint alloc;
uint alloc; // in use?
enum vmntype type;
struct inode *ip;
uint offset;
uint sz;
};
struct vma {
uint va_start; // start of mapping
uint va_end; // one past the last byte
enum vmatype va_type;
struct vmnode *n;
struct spinlock lock; // serialize fault/unmap
};
......@@ -69,6 +76,9 @@ struct proc {
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
unsigned long long tsc;
unsigned long long curcycles;
unsigned cpuid;
struct spinlock lock;
STAILQ_ENTRY(proc) run_next;
SLIST_HEAD(childlist, proc) childq;
......
#include "types.h"
#include "stat.h"
#include "user.h"
int
main(void)
{
int i;
for (i = 0; i < 10000000; i++) {
sleep(1);
}
}
......@@ -20,9 +20,9 @@
int
fetchint(uint addr, int *ip)
{
if(pagefault(proc->pgdir, proc->vmap, addr) < 0)
if(pagefault(proc->pgdir, proc->vmap, addr, 0) < 0)
return -1;
if(pagefault(proc->pgdir, proc->vmap, addr+3) < 0)
if(pagefault(proc->pgdir, proc->vmap, addr+3, 0) < 0)
return -1;
*ip = *(int*)(addr);
return 0;
......@@ -37,7 +37,7 @@ fetchstr(uint addr, char **pp)
char *s = (char *) addr;
while(1){
if(pagefault(proc->pgdir, proc->vmap, (uint) s) < 0)
if(pagefault(proc->pgdir, proc->vmap, (uint) s, 0) < 0)
return -1;
if(*s == 0){
*pp = (char*)addr;
......@@ -66,7 +66,7 @@ argptr(int n, char **pp, int size)
if(argint(n, &i) < 0)
return -1;
for(uint va = PGROUNDDOWN(i); va < i+size; va = va + PGSIZE)
if(pagefault(proc->pgdir, proc->vmap, va) < 0)
if(pagefault(proc->pgdir, proc->vmap, va, 0) < 0)
return -1;
*pp = (char*)i;
return 0;
......
......@@ -76,7 +76,7 @@ sys_sleep(void)
release(&tickslock);
return -1;
}
sleep(&ticks, &tickslock);
cv_sleep(&cv_ticks, &tickslock);
}
release(&tickslock);
return 0;
......
......@@ -13,6 +13,7 @@
struct gatedesc idt[256];
extern uint vectors[]; // in vectors.S: array of 256 entry pointers
struct spinlock tickslock;
struct condvar cv_ticks;
uint ticks;
void
......@@ -25,6 +26,7 @@ tvinit(void)
SETGATE(idt[T_SYSCALL], 1, SEG_KCODE<<3, vectors[T_SYSCALL], DPL_USER);
initlock(&tickslock, "time");
initcondvar(&cv_ticks, "time");
}
void
......@@ -52,7 +54,7 @@ trap(struct trapframe *tf)
if(cpu->id == 0){
acquire(&tickslock);
ticks++;
wakeup(&ticks);
cv_wakeup(&cv_ticks);
release(&tickslock);
}
lapiceoi();
......@@ -89,8 +91,7 @@ trap(struct trapframe *tf)
}
if(tf->trapno == T_PGFLT){
if(pagefault(proc->pgdir, proc->vmap, rcr2()) >= 0){
switchuvm(proc);
if(pagefault(proc->pgdir, proc->vmap, rcr2(), tf->err) >= 0){
return;
}
}
......
......@@ -104,6 +104,44 @@ mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm)
return 0;
}
static int
updatepages(pde_t *pgdir, void *begin, void *end, int perm)
{
char *a, *last;
pte_t *pte;
a = PGROUNDDOWN(begin);
last = PGROUNDDOWN(end);
for (;;) {
pte = walkpgdir(pgdir, a, 1);
if(pte != 0)
*pte = PTE_ADDR(*pte) | perm | PTE_P;
if (a == last)
break;
a += PGSIZE;
}
return 0;
}
static int
clearpages(pde_t *pgdir, void *begin, void *end)
{
char *a, *last;
pte_t *pte;
a = PGROUNDDOWN(begin);
last = PGROUNDDOWN(end);
for (;;) {
pte = walkpgdir(pgdir, a, 1);
if(pte != 0)
*pte = 0;
if (a == last)
break;
a += PGSIZE;
}
return 0;
}
// The mappings from logical to linear are one to one (i.e.,
// segmentation doesn't do anything).
// There is one page table per process, plus one that's used
......@@ -200,35 +238,45 @@ struct {
} vmaps;
struct vmnode *
vmn_alloc(void)
vmn_alloc(uint npg, uint type)
{
for(uint i = 0; i < sizeof(vmnodes.n) / sizeof(vmnodes.n[0]); i++) {
struct vmnode *n = &vmnodes.n[i];
if(n->alloc == 0 && __sync_bool_compare_and_swap(&n->alloc, 0, 1)) {
n->npages = 0;
if(npg > sizeof(n->page) / sizeof(n->page[0])) {
panic("vmnode too big\n");
}
for (uint i = 0; i < sizeof(n->page) / sizeof(n->page[0]); i++)
n->page[i] = 0;
n->npages = npg;
n->ref = 0;
n->ip = 0;
n->type = type;
return n;
}
}
panic("out of vmnodes");
}
struct vmnode *
vmn_allocpg(uint npg)
static int
vmn_doallocpg(struct vmnode *n)
{
struct vmnode *n = vmn_alloc();
if(npg > sizeof(n->page) / sizeof(n->page[0])) {
cprintf("vmnode too big: %d\n", npg);
return 0;
}
for(uint i = 0; i < npg; i++) {
for(uint i = 0; i < n->npages; i++) {
if((n->page[i] = kalloc()) == 0) {
vmn_free(n);
return 0;
return -1;
}
memset((char *) n->page[i], 0, PGSIZE);
n->npages++;
}
return 0;
}
struct vmnode *
vmn_allocpg(uint npg)
{
struct vmnode *n = vmn_alloc(npg, EAGER);
if (n == 0) return 0;
if (vmn_doallocpg(n) < 0) return 0;
return n;
}
......@@ -236,9 +284,14 @@ void
vmn_free(struct vmnode *n)
{
for(uint i = 0; i < n->npages; i++) {
kfree((char *) n->page[i]);
n->page[i] = 0;
if (n->page[i]) {
kfree((char *) n->page[i]);
n->page[i] = 0;
}
}
if (n->ip)
iput(n->ip);
n->ip = 0;
n->alloc = 0;
}
......@@ -252,10 +305,23 @@ vmn_decref(struct vmnode *n)
struct vmnode *
vmn_copy(struct vmnode *n)
{
struct vmnode *c = vmn_allocpg(n->npages);
if(c != 0)
for(uint i = 0; i < n->npages; i++)
memmove(c->page[i], n->page[i], PGSIZE);
struct vmnode *c = vmn_alloc(n->npages, n->type);
if(c != 0) {
c->type = n->type;
if (n->type == ONDEMAND) {
c->ip = idup(n->ip);
c->offset = n->offset;
c->sz = c->sz;
}
if (n->page[0]) { // If the first page is present, all of them are present
if (vmn_doallocpg(c) < 0) {
panic("vmn_copy\n");
}
for(uint i = 0; i < n->npages; i++) {
memmove(c->page[i], n->page[i], PGSIZE);
}
}
}
return c;
}
......@@ -267,6 +333,7 @@ vmap_alloc(void)
if(m->alloc == 0 && __sync_bool_compare_and_swap(&m->alloc, 0, 1)) {
for(uint j = 0; j < sizeof(m->e) / sizeof(m->e[0]); j++){
m->e[j].n = 0;
m->e[j].va_type = PRIVATE;
m->e[j].lock.name = "vma";
}
m->lock.name = "vmap";
......@@ -360,7 +427,7 @@ vmap_lookup(struct vmap *m, uint va)
}
struct vmap *
vmap_copy(struct vmap *m)
vmap_copy(struct vmap *m, pde_t* pgdir, int share)
{
struct vmap *c = vmap_alloc();
if(c == 0)
......@@ -372,7 +439,15 @@ vmap_copy(struct vmap *m)
continue;
c->e[i].va_start = m->e[i].va_start;
c->e[i].va_end = m->e[i].va_end;
c->e[i].n = vmn_copy(m->e[i].n);
if (share) {
c->e[i].n = m->e[i].n;
c->e[i].va_type = COW;
m->e[i].va_type = COW;
updatepages(pgdir, (void *) (m->e[i].va_start), (void *) (m->e[i].va_end), PTE_COW);
} else {
c->e[i].n = vmn_copy(m->e[i].n);
c->e[i].va_type = m->e[i].va_type;
}
if(c->e[i].n == 0) {
release(&m->lock);
vmap_decref(c);
......@@ -380,13 +455,15 @@ vmap_copy(struct vmap *m)
}
__sync_fetch_and_add(&c->e[i].n->ref, 1);
}
if (share)
lcr3(PADDR(pgdir)); // Reload hardware page table
release(&m->lock);
return c;
}
// Load a program segment into a vmnode.
int
vmn_load(struct vmnode *vmn, struct inode *ip, uint offset, uint sz)
static int
vmn_doload(struct vmnode *vmn, struct inode *ip, uint offset, uint sz)
{
for(uint i = 0; i < sz; i += PGSIZE){
uint n;
......@@ -401,6 +478,20 @@ vmn_load(struct vmnode *vmn, struct inode *ip, uint offset, uint sz)
return 0;
}
// Load a program segment into a vmnode.
int
vmn_load(struct vmnode *vmn, struct inode *ip, uint offset, uint sz)
{
if (vmn->type == ONDEMAND) {
vmn->ip = ip;
vmn->offset = offset;
vmn->sz = sz;
return 0;
} else {
return vmn_doload(vmn, ip, offset, sz);
}
}
// Free a page table and all the physical memory pages
// in the user part.
void
......@@ -487,8 +578,9 @@ copyin(struct vmap *vmap, uint va, void *p, uint len)
}
int
pagefault(pde_t *pgdir, struct vmap *vmap, uint va)
pagefault(pde_t *pgdir, struct vmap *vmap, uint va, uint err)
{
pte_t *pte = walkpgdir(pgdir, (const void *)va, 1);
if((*pte & (PTE_P|PTE_U|PTE_W)) == (PTE_P|PTE_U|PTE_W))
return 0;
......@@ -497,8 +589,55 @@ pagefault(pde_t *pgdir, struct vmap *vmap, uint va)
if(m == 0)
return -1;
// cprintf("%d: pf addr=0x%x err 0x%x\n", proc->pid, va, err);
// cprintf("%d: pf vma type = %d refcnt %d vmn type %d pte=0x%x\n", proc->pid, m->va_type, m->n->ref, m->n->type, *pte);
uint npg = (PGROUNDDOWN(va) - m->va_start) / PGSIZE;
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
if (m->n && m->n->ip && *pte == 0x0 && m->n->page[npg] == 0) {
// cprintf("ODP\n");
if (vmn_doallocpg(m->n) < 0) {
panic("pagefault: couldn't allocate pages");
}
release(&m->lock);
if (vmn_doload(m->n, m->n->ip, m->n->offset, m->n->sz) < 0) {
panic("pagefault: couldn't load");
}
acquire(&m->lock);
pte = walkpgdir(pgdir, (const void *)va, 0);
if (pte == 0x0)
panic("pagefault: not paged in???");
// cprintf("ODP done\n");
}
if (m->va_type == COW && (err & FEC_WR)) {
// Write to a COW page
// cprintf("write to cow\n");
if (m->n->ref == 1) { // if vma isn't shared any more, make it private
m->va_type = PRIVATE;
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
} else { // vma is still shared; give process its private copy
struct vmnode *c = vmn_copy(m->n);
c->ref = 1;
__sync_sub_and_fetch(&m->n->ref, 1);
if (m->n->ref == 0)
panic("cow");
m->va_type = PRIVATE;
m->n = c;
// Update the hardware page tables to reflect the change to the vma
clearpages(pgdir, (void *) m->va_start, (void *) m->va_end);
pte = walkpgdir(pgdir, (const void *)va, 0);
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
} else if (m->va_type == COW) {
// cprintf("cow\n");
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
// cprintf("fill in pte\n");
if (m->n->ref > 1)
panic("pagefault");
*pte = PADDR(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
lcr3(PADDR(pgdir)); // Reload hardware page tables
release(&m->lock);
return 1;
}
......@@ -175,6 +175,13 @@ rcr3(void)
return val;
}
static __inline__ unsigned long long rdtsc(void)
{
unsigned hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
//PAGEBREAK: 36
// Layout of the trap frame built on the stack by the
// hardware and by trapasm.S, and passed to trap().
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论