no cilk

上级 8de2eb72
......@@ -28,7 +28,6 @@ static inline void *p2v(uptr a) {
}
struct trapframe;
struct cilkframe;
struct spinlock;
struct condvar;
struct context;
......@@ -237,28 +236,6 @@ void uartintr(void);
void switchvm(struct proc*);
int pagefault(struct vmap *, uptr, u32);
// cilk.c
void initcilkframe(struct cilkframe*);
#if CILKENABLE
void cilk_push(void (*fn)(uptr, uptr), u64 arg0, u64 arg1);
void cilk_start(void);
u64 cilk_end(void);
void cilk_dump(void);
void cilk_abort(u64 val);
#else
#define cilk_push(rip, arg0, arg1) do { \
void (*fn)(uptr, uptr) = rip; \
fn(arg0, arg1); \
} while(0)
#define cilk_start() do { } while(0)
#define cilk_end() (myproc()->cilkframe.abort)
#define cilk_dump() do { } while(0)
#define cilk_abort(val) do { \
cmpxch(&myproc()->cilkframe.abort, (u64)0, (u64)val); \
} while (0)
#endif
// other exported/imported functions
void cmain(u64 mbmagic, u64 mbaddr);
void mpboot(void);
......
......@@ -22,12 +22,6 @@ struct context {
u64 rip;
} __attribute__((packed));
// Work queue frame
struct cilkframe {
volatile std::atomic<u64> ref;
volatile std::atomic<u64> abort;
};
// Per-process, per-stack meta data for mtrace
#if MTRACE
#define MTRACE_NSTACKS 16
......@@ -77,7 +71,6 @@ struct proc : public rcu_freed {
struct mtrace_stacks mtrace_stacks;
#endif
struct runq *runq;
struct cilkframe cilkframe;
STAILQ_ENTRY(proc) runqlink;
struct condvar *oncv; // Where it is sleeping, for kill()
......
......@@ -3,7 +3,6 @@ OBJS = \
bio.o \
bootdata.o \
cga.o \
cilk.o \
condvar.o \
console.o \
cpprt.o \
......
// cilk style run queue built on wq.cc:
// A core pushes work to the head of its per-core wq.
// A core pops work from the head of its per-core wq.
// A core pops work from the tail of another core's per-core wqueue.
//
// Usage:
// void goo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cprintf("goo\n");
// arg[1] = 'g';
// }
// void foo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cilk_push(goo, a0, 0);
// arg[0] = 'f';
// cprintf("foo\n");
// }
// void example(void) {
// char arg[2];
// cilk_start();
// cilk_push(foo, (uptr)arg, 0);
// cprintf("example\n");
// cilk_end();
// cprintf("%c %c\n", arg[0], arg[1]);
// }
#include "types.h"
#include "kernel.hh"
#include "amd64.h"
#include "cpu.hh"
#include "bits.hh"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.hh"
#if CILKENABLE
#include "mtrace.h"
#include "wq.hh"
#include "percpu.hh"
struct cilkstat {
u64 push;
u64 full;
u64 steal;
};
static percpu<cilkstat> stat;
static struct cilkframe *
cilk_frame(void)
{
return mycpu()->cilkframe;
}
static void
__cilk_run(struct work *w, void *xfn, void *arg0, void *arg1, void *xframe)
{
void (*fn)(uptr arg0, uptr arg1) = (void(*)(uptr,uptr))xfn;
struct cilkframe *frame = (struct cilkframe *)xframe;
struct cilkframe *old = mycpu()->cilkframe;
if (old != frame)
stat->steal++;
mycpu()->cilkframe = frame;
if (frame->abort == 0)
fn((uptr)arg0, (uptr)arg1);
mycpu()->cilkframe = old;
frame->ref--;
}
// Add the (rip, arg0, arg1) work to the local work queue.
// Guarantees some core will at some point execute the work.
// The current core might execute the work immediately.
void
cilk_push(void (*fn)(uptr, uptr), u64 arg0, u64 arg1)
{
struct work *w;
w = allocwork();
if (w == nullptr) {
fn(arg0, arg1);
return;
}
w->rip = (void*)__cilk_run;
w->arg0 = (void*)fn;
w->arg1 = (void*)arg0;
w->arg2 = (void*)arg1;
w->arg3 = (void*)cilk_frame();
if (wq_push(w)) {
freework(w);
fn(arg0, arg1);
stat->full++;
} else {
cilk_frame()->ref++;
stat->push++;
}
}
// Start a new work queue frame.
// We don't allow nested work queue frames.
void
cilk_start(void)
{
pushcli();
if (myproc()->cilkframe.ref != 0)
panic("cilk_start");
mycpu()->cilkframe = &myproc()->cilkframe;
}
// End of the current work queue frame.
// The core works while the reference count of the current
// work queue frame is not 0.
u64
cilk_end(void)
{
u64 r;
while (cilk_frame()->ref != 0)
wq_trywork();
r = cilk_frame()->abort;
mycpu()->cilkframe = 0;
popcli();
return r;
}
void
cilk_abort(u64 val)
{
cmpxch(&cilk_frame()->abort, (u64)0, val);
}
void
cilk_dump(void)
{
int i;
for (i = 0; i < NCPU; i++)
cprintf("push %lu full %lu steal %lu\n",
stat[i].push, stat[i].full, stat[i].steal);
}
static void
__test_stub(uptr a0, uptr a1)
{
//cprintf("%lu, %lu\n", a0, a1);
}
void
testcilk(void)
{
enum { iters = 1000 };
static volatile int running = 1;
u64 e, s;
int i;
pushcli();
if (mycpu()->id == 0) {
microdelay(1);
s = rdtsc();
cilk_start();
for (i = 0; i < iters; i++)
cilk_push(__test_stub, i, i);
cilk_end();
e = rdtsc();
cprintf("testcilk: %lu\n", (e-s)/iters);
cilk_dump();
running = 0;
} else {
while (running)
wq_trywork();
}
popcli();
}
#endif // CILKENABLE
void
initcilkframe(struct cilkframe *cilk)
{
cilk->ref = 0;
cilk->abort = 0;
}
......@@ -17,14 +17,6 @@
#define BRK (USERTOP >> 1)
struct eargs {
struct proc *proc;
struct inode *ip;
struct vmap *vmap;
const char *path;
char **argv;
};
static int
donotes(struct inode *ip, uwq *uwq, u64 off)
{
......@@ -55,50 +47,42 @@ donotes(struct inode *ip, uwq *uwq, u64 off)
return -1;
}
static void
dosegment(struct eargs *args, u64 off)
static int
dosegment(inode* ip, vmap* vmp, u64 off)
{
struct vmnode *vmn = nullptr;
struct proghdr ph;
uptr va_start, va_end;
uptr in_off;
uptr in_sz;
int npg;
if(readi(args->ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
goto bad;
if(readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
return -1;
if(ph.type != ELF_PROG_LOAD)
goto bad;
return -1;
if(ph.memsz < ph.filesz)
goto bad;
return -1;
if (ph.offset < PGOFFSET(ph.vaddr))
goto bad;
return -1;
va_start = PGROUNDDOWN(ph.vaddr);
va_end = PGROUNDUP(ph.vaddr + ph.memsz);
in_off = ph.offset - PGOFFSET(ph.vaddr);
in_sz = ph.filesz + PGOFFSET(ph.vaddr);
uptr va_start = PGROUNDDOWN(ph.vaddr);
uptr va_end = PGROUNDUP(ph.vaddr + ph.memsz);
off_t in_off = ph.offset - PGOFFSET(ph.vaddr);
size_t in_sz = ph.filesz + PGOFFSET(ph.vaddr);
npg = (va_end - va_start) / PGSIZE;
if ((vmn = new vmnode(npg, ONDEMAND,
args->ip, in_off, in_sz)) == 0)
goto bad;
size_t npg = (va_end - va_start) / PGSIZE;
vmnode* node = new vmnode(npg, ONDEMAND, ip, in_off, in_sz);
if (node == nullptr)
return -1;
if(args->vmap->insert(vmn, va_start, 1) < 0)
goto bad;
if (vmp->insert(node, va_start, 1) < 0) {
delete node;
return -1;
}
return;
bad:
cilk_abort(-1);
return 0;
}
static void
dostack(struct eargs *args)
static long
dostack(vmap* vmp, char** argv, const char* path)
{
struct vmnode *vmn = nullptr;
uptr argstck[1+MAXARG];
const char *s, *last;
s64 argc;
uptr sp;
......@@ -112,65 +96,48 @@ dostack(struct eargs *args)
// Allocate a one-page stack at the top of the (user) address space
if((vmn = new vmnode(USTACKPAGES)) == 0)
goto bad;
if(args->vmap->insert(vmn, USERTOP-(USTACKPAGES*PGSIZE), 1) < 0)
goto bad;
return -1;
if(vmp->insert(vmn, USERTOP-(USTACKPAGES*PGSIZE), 1) < 0)
return -1;
for (argc = 0; args->argv[argc]; argc++)
for (argc = 0; argv[argc]; argc++)
if(argc >= MAXARG)
goto bad;
return -1;
// Push argument strings
sp = USERTOP;
for(int i = argc-1; i >= 0; i--) {
sp -= strlen(args->argv[i]) + 1;
sp -= strlen(argv[i]) + 1;
sp &= ~7;
if(args->vmap->copyout(sp, args->argv[i], strlen(args->argv[i]) + 1) < 0)
goto bad;
if(vmp->copyout(sp, argv[i], strlen(argv[i]) + 1) < 0)
return -1;
argstck[i] = sp;
}
argstck[argc] = 0;
sp -= (argc+1) * 8;
if(args->vmap->copyout(sp, argstck, (argc+1)*8) < 0)
goto bad;
if(vmp->copyout(sp, argstck, (argc+1)*8) < 0)
return -1;
sp -= 8;
if(args->vmap->copyout(sp, &argc, 8) < 0)
goto bad;
// Save program name for debugging.
for(last=s=args->path; *s; s++)
if(*s == '/')
last = s+1;
// XXX(sbw) Oops, don't want to do this, unless we have abort
safestrcpy(args->proc->name, last, sizeof(args->proc->name));
args->proc->tf->rsp = sp;
return;
if(vmp->copyout(sp, &argc, 8) < 0)
return -1;
bad:
cilk_abort(-1);
return sp;
}
static void
doheap(struct eargs *args)
static int
doheap(vmap* vmp)
{
struct vmnode *vmn = nullptr;
// Allocate a vmnode for the heap.
// XXX pre-allocate 32 pages..
if((vmn = new vmnode(32)) == 0)
goto bad;
if(args->vmap->insert(vmn, BRK, 1) < 0)
goto bad;
args->vmap->brk_ = BRK + 8; // XXX so that brk-1 points within heap vma..
struct vmnode *vmn;
return;
if((vmn = new vmnode(32)) == nullptr)
return -1;
if(vmp->insert(vmn, BRK, 1) < 0)
return -1;
vmp->brk_ = BRK + 8; // XXX so that brk-1 points within heap vma..
bad:
cilk_abort(-1);
return 0;
}
static void
......@@ -187,6 +154,7 @@ exec(const char *path, char **argv)
struct inode *ip = nullptr;
struct vmap *vmp = nullptr;
uwq* newuwq = nullptr;
const char *s, *last;
struct elfhdr elf;
struct proghdr ph;
u64 off;
......@@ -194,6 +162,7 @@ exec(const char *path, char **argv)
vmap* oldvmap;
uwq* olduwq;
cwork* w;
long sp;
myproc()->exec_cpuid_ = mycpuid();
......@@ -223,38 +192,33 @@ exec(const char *path, char **argv)
if((newuwq = uwq::alloc(vmp, myproc()->ftable)) == 0)
goto bad;
// Arguments for work queue
struct eargs args;
args.proc = myproc();
args.ip = ip;
args.vmap = vmp;
args.path = path;
args.argv = argv;
cilk_start();
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
Elf64_Word type;
if(readi(ip, (char*)&type,
off+__offsetof(struct proghdr, type),
sizeof(type)) != sizeof(type))
goto bad;
if (type == ELF_PROG_NOTE) {
if (donotes(ip, newuwq, off) < 0) {
cilk_abort(-1);
break;
}
} if(type != ELF_PROG_LOAD)
switch (type) {
case ELF_PROG_NOTE:
if (donotes(ip, newuwq, off) < 0)
goto bad;
break;
case ELF_PROG_LOAD:
if (dosegment(ip, vmp, off) < 0)
goto bad;
break;
default:
continue;
cilk_call(dosegment, &args, off);
}
}
cilk_call(doheap, &args);
if (doheap(vmp) < 0)
goto bad;
// dostack reads from the user vm space. wq workers don't switch
// the user vm.
dostack(&args);
if (cilk_end())
if ((sp = dostack(vmp, argv, path)) < 0)
goto bad;
// Commit to the user image.
......@@ -263,6 +227,13 @@ exec(const char *path, char **argv)
myproc()->vmap = vmp;
myproc()->uwq = newuwq;
myproc()->tf->rip = elf.entry;
myproc()->tf->rsp = sp;
for(last=s=path; *s; s++)
if(*s == '/')
last = s+1;
safestrcpy(myproc()->name, last, sizeof(myproc()->name));
switchvm(myproc());
......
......@@ -215,8 +215,6 @@ proc::alloc(void)
p->mtrace_stacks.curr = -1;
#endif
initcilkframe(&p->cilkframe);
if (xnspid->insert(p->pid, p) < 0)
panic("allocproc: ns_insert");
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论