提交 ae76c346 创建 作者: Silas Boyd-Wickizer's avatar Silas Boyd-Wickizer

Merge 'flicts

......@@ -20,7 +20,8 @@ NM = $(TOOLPREFIX)nm
OBJCOPY = $(TOOLPREFIX)objcopy
CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb \
-m64 -Werror -std=c99 -fms-extensions -mno-sse -mcmodel=large -I$(QEMUSRC) \
-m64 -Werror -std=c99 -fms-extensions -mno-sse -mcmodel=large -mno-red-zone \
-I$(QEMUSRC) \
-fno-omit-frame-pointer -DHW_$(HW) -include param.h -include compiler.h
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
ASFLAGS = -m64 -gdwarf-2 -MD
......@@ -29,6 +30,7 @@ LDFLAGS += -m elf_x86_64
OBJS = \
bio.o \
cga.o \
cilk.o \
condvar.o \
console.o \
crange.o \
......
......@@ -115,12 +115,6 @@ rep_nop(void)
}
static inline void
barrier(void)
{
__asm volatile("" ::: "memory");
}
static inline void
lidt(void *p)
{
__asm volatile("lidt (%0)" : : "r" (p) : "memory");
......
// cilk style run queue
// A work queue is built from NCPU per-core wqueues.
// A core pushes work to the head of its per-core wqueue.
// A core pops work from the head of its per-core wqueue.
// A core pops work from the tail of another core's per-core wqueue.
//
// Usage:
// void goo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cprintf("goo\n");
// arg[1] = 'g';
// }
// void foo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cilk_push(goo, a0, 0);
// arg[0] = 'f';
// cprintf("foo\n");
// }
// void example(void) {
// char arg[2];
// cilk_start();
// cilk_push(foo, (uptr)arg, 0);
// cprintf("example\n");
// cilk_end();
// cprintf("%c %c\n", arg[0], arg[1]);
// }
#if CILKENABLE
#include "types.h"
#include "kernel.h"
#include "amd64.h"
#include "cpu.h"
#include "bits.h"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.h"
#include "mtrace.h"
#include "qlock.h"
#define NSLOTS (1 << CILKSHIFT)
struct cilkqueue {
struct cilkthread *thread[NSLOTS];
volatile int head __mpalign__;
qlock_t lock;
volatile int tail;
__padout__;
} __mpalign__;
struct cilkthread {
u64 rip;
u64 arg0;
u64 arg1;
struct cilkframe *frame; // parent cilkframe
__padout__;
} __mpalign__;
struct cilkstat {
u64 push;
u64 full;
u64 pop;
u64 steal;
__padout__;
} __mpalign__;
struct cilkqueue queue[NCPU] __mpalign__;
struct cilkstat stat[NCPU] __mpalign__;
static struct cilkqueue *
cilk_cur(void)
{
return &queue[mycpu()->id];
}
static struct cilkframe *
cilk_frame(void)
{
return mycpu()->cilkframe;
}
static struct cilkstat *
cilk_stat(void)
{
return &stat[mycpu()->id];
}
static int
__cilk_push(struct cilkqueue *q, struct cilkthread *t)
{
int i;
i = q->head;
if ((i - q->tail) == NSLOTS) {
cilk_stat()->full++;
return -1;
}
i = i & (NSLOTS-1);
q->thread[i] = t;
q->head++;
cilk_stat()->push++;
return 0;
}
static struct cilkthread *
__cilk_pop(struct cilkqueue *q)
{
struct qnode qn;
int i;
ql_lock(&q->lock, &qn);
i = q->head;
if ((i - q->tail) == 0) {
ql_unlock(&q->lock, &qn);
return NULL;
}
i = (i-1) & (NSLOTS-1);
q->head--;
ql_unlock(&q->lock, &qn);
cilk_stat()->pop++;
return q->thread[i];
}
static struct cilkthread *
__cilk_steal(struct cilkqueue *q)
{
struct qnode qn;
int i;
ql_lock(&q->lock, &qn);
i = q->tail;
if ((i - q->head) == 0) {
ql_unlock(&q->lock, &qn);
return NULL;
}
i = i & (NSLOTS-1);
q->tail++;
ql_unlock(&q->lock, &qn);
cilk_stat()->steal++;
return q->thread[i];
}
static void
__cilk_run(struct cilkthread *th)
{
void (*fn)(uptr arg0, uptr arg1) = (void*)th->rip;
struct cilkframe *old = mycpu()->cilkframe;
mycpu()->cilkframe = th->frame;
fn(th->arg0, th->arg1);
mycpu()->cilkframe = old;
subfetch(&th->frame->ref, 1);
kfree(th);
}
// Add the (rip, arg0, arg1) work to the local work queue.
// Guarantees some core will at some point execute the work.
// The current core might execute the work immediately.
void
cilk_push(void *rip, u64 arg0, u64 arg1)
{
void (*fn)(uptr, uptr) = rip;
struct cilkthread *th;
th = (struct cilkthread *) kalloc();
if (th == NULL) {
fn(arg0, arg1);
return;
}
th->rip = (uptr) rip;
th->arg0 = arg0;
th->arg1 = arg1;
th->frame = cilk_frame();
if (__cilk_push(cilk_cur(), th)) {
kfree(th);
fn(arg0, arg1);
} else
fetchadd(&cilk_frame()->ref, 1);
}
// Try to execute one cilkthread.
// Check local queue then steal from other queues.
int
cilk_trywork(void)
{
struct cilkthread *th;
int i;
pushcli();
th = __cilk_pop(cilk_cur());
if (th != NULL) {
__cilk_run(th);
popcli();
return 1;
}
// XXX(sbw) should be random
for (i = 0; i < NCPU; i++) {
if (i == mycpu()->id)
continue;
th = __cilk_steal(&queue[i]);
if (th != NULL) {
__cilk_run(th);
popcli();
return 1;
}
}
popcli();
return 0;
}
// Start a new work queue frame.
// We don't allow nested work queue frames.
void
cilk_start(void)
{
pushcli();
if (myproc()->cilkframe.ref != 0)
panic("cilk_start");
mycpu()->cilkframe = &myproc()->cilkframe;
}
// End of the current work queue frame.
// The core works while the reference count of the current
// work queue frame is not 0.
void
cilk_end(void)
{
while (cilk_frame()->ref != 0) {
struct cilkthread *th;
int i;
while ((th = __cilk_pop(cilk_cur())) != NULL)
__cilk_run(th);
for (i = 0; i < NCPU; i++) {
th = __cilk_steal(&queue[i]);
if (th != NULL) {
__cilk_run(th);
break;
}
}
}
mycpu()->cilkframe = NULL;
popcli();
}
void
cilk_dump(void)
{
int i;
for (i = 0; i < NCPU; i++)
cprintf("push %lu full %lu pop %lu steal %lu\n",
stat[i].push, stat[i].full, stat[i].pop, stat[i].steal);
}
static void
__test_stub(uptr a0, uptr a1)
{
//cprintf("%lu, %lu\n", a0, a1);
}
void
testcilk(void)
{
enum { iters = 1000 };
static volatile int running = 1;
u64 e, s;
int i;
pushcli();
if (mycpu()->id == 0) {
microdelay(1);
s = rdtsc();
cilk_start();
for (i = 0; i < iters; i++)
cilk_push(__test_stub, i, i);
cilk_end();
e = rdtsc();
cprintf("testcilk: %lu\n", (e-s)/iters);
cilk_dump();
running = 0;
} else {
while (running)
cilk_trywork();
}
popcli();
}
void
initcilkframe(struct cilkframe *cilk)
{
memset(cilk, 0, sizeof(*cilk));
}
void
initcilk(void)
{
int i;
for (i = 0; i < NCPU; i++)
ql_init(&queue[i].lock, "queue lock");
}
#endif // CILKENABLE
#define __padout__ char __padout[0] __attribute__((aligned(CACHELINE)))
#define __mpalign__ __attribute__((aligned(CACHELINE)))
#define __noret__ __attribute__((noreturn))
static inline void
barrier(void)
{
__asm volatile("" ::: "memory");
}
......@@ -162,10 +162,10 @@ kerneltrap(struct trapframe *tf)
kstack = myproc()->kstack;
}
__cprintf("kernel trap %u cpu %u\n"
" tf: rip %p rsp %p cr2 %p\n"
" tf: rip %p rsp %p rbp %p cr2 %p cs %p\n"
" proc: name %s pid %u kstack %p\n",
tf->trapno, mycpu()->id,
tf->rip, tf->rsp, rcr2(),
tf->rip, tf->rsp, tf->rbp, rcr2(), tf->cs,
name, pid, kstack);
printtrace(tf->rbp);
......@@ -235,6 +235,14 @@ consoleintr(int (*getc)(void))
case C('P'): // Process listing.
procdumpall();
break;
case C('E'): // Print user-space PCs.
for (u32 i = 0; i < NCPU; i++)
cpus[i].timer_printpc = 1;
break;
case C('T'): // Print user-space PCs and stack traces.
for (u32 i = 0; i < NCPU; i++)
cpus[i].timer_printpc = 2;
break;
case C('U'): // Kill line.
while(input.e != input.w &&
input.buf[(input.e-1) % INPUT_BUF] != '\n'){
......@@ -248,8 +256,8 @@ consoleintr(int (*getc)(void))
consputc(BACKSPACE);
}
break;
case C('W'): // Work queue stats
wq_dump();
case C('C'): // cilk stats
cilk_dump();
break;
case C('L'): // Prof stats
profdump();
......
#include "mmu.h"
struct wqframe;
struct cilkframe;
// Per-CPU state
struct cpu {
......@@ -10,7 +10,8 @@ struct cpu {
struct segdesc gdt[NSEGS]; // x86 global descriptor table
struct taskstate ts; // Used by x86 to find stack for interrupt
struct context *scheduler; // swtch() here to enter scheduler
struct wqframe *wqframe;
struct cilkframe *cilkframe;
int timer_printpc;
// Cpu-local storage variables; see below
struct cpu *cpu;
......
......@@ -190,7 +190,7 @@ exec(char *path, char **argv)
args.path = path;
args.argv = argv;
wq_start();
cilk_start();
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
Elf64_Word type;
if(readi(ip, (char*)&type,
......@@ -199,7 +199,7 @@ exec(char *path, char **argv)
goto bad;
if(type != ELF_PROG_LOAD)
continue;
wq_push(dosegment, (uptr)&args, (uptr)off);
cilk_push(dosegment, (uptr)&args, (uptr)off);
}
if (odp) {
......@@ -210,14 +210,14 @@ exec(char *path, char **argv)
ip = 0;
}
wq_push(doheap, (uptr)&args, (uptr)0);
cilk_push(doheap, (uptr)&args, (uptr)0);
// dostack reads from the user address space. The wq
// stuff doesn't switch to the user address space.
//wq_push(dostack, (uptr)&args, (uptr)0);
//cilk_push(dostack, (uptr)&args, (uptr)0);
dostack((uptr)&args, (uptr)0);
wq_end();
cilk_end();
// Commit to the user image.
oldvmap = myproc()->vmap;
......
......@@ -12,9 +12,9 @@ static inline uptr v2p(void *a) { return (uptr) a - KBASE; }
static inline void *p2v(uptr a) { return (void *) a + KBASE; }
struct trapframe;
struct cilkframe;
struct spinlock;
struct condvar;
struct wqframe;
struct context;
struct vmnode;
struct inode;
......@@ -328,20 +328,27 @@ struct vmap * vmap_copy(struct vmap *, int);
// wq.c
#if WQENABLE
void wq_push(void *rip, u64 arg0, u64 arg1);
void wq_start(void);
void wq_end(void);
void wq_dump(void);
int wq_trywork(void);
void initwqframe(struct wqframe *wq);
#else
#define wq_push(rip, arg0, arg1) do { \
#define wq_trywork() 0
#endif
// cilk.c
#if CILKENABLE
void cilk_push(void *rip, u64 arg0, u64 arg1);
void cilk_start(void);
void cilk_end(void);
void cilk_dump(void);
int cilk_trywork(void);
void initcilkframe(struct cilkframe *wq);
#else
#define cilk_push(rip, arg0, arg1) do { \
void (*fn)(uptr, uptr) = rip; \
fn(arg0, arg1); \
} while(0)
#define wq_start() do { } while(0)
#define wq_end() do { } while(0)
#define wq_dump() do { } while(0)
#define wq_trywork() 0
#define initwqframe(x) do { } while (0)
#define cilk_start() do { } while(0)
#define cilk_end() do { } while(0)
#define cilk_dump() do { } while(0)
#define cilk_trywork() 0
#define initcilkframe(x) do { } while (0)
#endif
......@@ -42,3 +42,4 @@ struct klockstat {
#define LOCKSTAT_PROC 1
#define LOCKSTAT_SCHED 1
#define LOCKSTAT_VM 1
#define LOCKSTAT_WQ 1
......@@ -24,6 +24,7 @@ extern void initdisk(void);
extern void inituser(void);
extern void inithz(void);
extern void initwq(void);
extern void initcilk(void);
extern void initsamp(void);
extern void initpci(void);
extern void initnet(void);
......@@ -103,8 +104,9 @@ cmain(u64 mbmagic, u64 mbaddr)
initbio(); // buffer cache
initinode(); // inode cache
initdisk(); // disk
#if WQENABLE
initwq(); // work queues
initwq();
#if CILKENABLE
initcilk();
#endif
initsamp();
initlockstat();
......
......@@ -51,7 +51,7 @@ main(int ac, char **av)
int nthread = atoi(av[1]);
acquire(&l);
printf(1, "mapbench[%d]: start esp %x, nthread=%d\n", getpid(), rrsp(), nthread);
// printf(1, "mapbench[%d]: start esp %x, nthread=%d\n", getpid(), rrsp(), nthread);
for(int i = 0; i < nthread; i++) {
sbrk(8192);
......@@ -72,7 +72,7 @@ main(int ac, char **av)
acquire(&l);
}
release(&l);
printf(1, "mapbench[%d]: done\n", getpid());
// printf(1, "mapbench[%d]: done\n", getpid());
for(int i = 0; i < nthread; i++)
wait();
......
......@@ -13,7 +13,7 @@
#define CACHELINE 64 // cache line size
#define CPUKSTACKS (NPROC + NCPU)
#define QUANTUM 10 // scheduling time quantum and tick length (in msec)
#define WQSHIFT 4 // 2^WORKSHIFT work queue slots
#define CILKSHIFT 4 // 2^WORKSHIFT work queue slots
#define VICTIMAGE 1000000 // cycles a proc executes before an eligible victim
#define VERBOSE 0 // print kernel diagnostics
#define SPINLOCK_DEBUG 1 // Debug spin locks
......@@ -21,20 +21,19 @@
#define VERIFYFREE LOCKSTAT
#define ALLOC_MEMSET 1
#define KSHAREDSIZE (32 << 10)
#define WQENABLE 1
#define WQSHIFT 4
#if defined(HW_josmp)
#define NCPU 16 // maximum number of CPUs
#define MTRACE 0
#define WQENABLE 0 // Enable work queue
#define PERFSIZE (1<<30ull)
#elif defined(HW_qemu)
#define NCPU 4 // maximum number of CPUs
#define MTRACE 0
#define WQENABLE 1 // Enable work queue
#define PERFSIZE (16<<20ull)
#elif defined(HW_ud0)
#define NCPU 4 // maximum number of CPUs
#define MTRACE 0
#define WQENABLE 0 // Enable work queue
#define PERFSIZE (512<<20ull)
#else
#error "Unknown HW"
......
......@@ -206,7 +206,7 @@ allocproc(void)
snprintf(p->lockname, sizeof(p->lockname), "cv:proc:%d", p->pid);
initlock(&p->lock, p->lockname+3, LOCKSTAT_PROC);
initcondvar(&p->cv, p->lockname);
initwqframe(&p->wqframe);
initcilkframe(&p->cilkframe);
if (ns_insert(nspid, KI(p->pid), (void *) p) < 0)
panic("allocproc: ns_insert");
......
......@@ -13,7 +13,7 @@ struct context {
} __attribute__((packed));
// Work queue frame
struct wqframe {
struct cilkframe {
volatile u64 ref;
};
......@@ -63,7 +63,7 @@ struct proc {
struct mtrace_stacks mtrace_stacks;
#endif
struct runq *runq;
struct wqframe wqframe;
struct cilkframe cilkframe;
STAILQ_ENTRY(proc) runqlink;
struct condvar *oncv; // Where it is sleeping, for kill()
......
......@@ -71,6 +71,19 @@ trap(struct trapframe *tf)
switch(tf->trapno){
case T_IRQ0 + IRQ_TIMER:
if (mycpu()->timer_printpc) {
cprintf("cpu%d: proc %s rip %lx rsp %lx cs %x\n",
mycpu()->id,
myproc() ? myproc()->name : "(none)",
tf->rip, tf->rsp, tf->cs);
if (mycpu()->timer_printpc == 2 && tf->rbp > KBASE) {
uptr pc[10];
getcallerpcs((void *) tf->rbp, pc, NELEM(pc));
for (int i = 0; i < 10 && pc[i]; i++)
cprintf("cpu%d: %lx\n", mycpu()->id, pc[i]);
}
mycpu()->timer_printpc = 0;
}
if (mycpu()->id == 0)
cv_tick();
lapiceoi();
......
// cilk style run queue
// A work queue is built from NCPU per-core wqueues.
// A core pushes work to the head of its per-core wqueue.
// A core pops work from the head of its per-core wqueue.
// A core pops work from the tail of another core's per-core wqueue.
//
// Usage:
// void goo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cprintf("goo\n");
// arg[1] = 'g';
// }
// void foo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// wq_push(goo, a0, 0);
// arg[0] = 'f';
// cprintf("foo\n");
// }
// void example(void) {
// char arg[2];
// wq_start();
// wq_push(foo, (uptr)arg, 0);
// cprintf("example\n");
// wq_end();
// cprintf("%c %c\n", arg[0], arg[1]);
// }
#if WQENABLE
#include "types.h"
#include "kernel.h"
#include "spinlock.h"
#include "amd64.h"
#include "cpu.h"
#include "bits.h"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.h"
#include "mtrace.h"
#include "qlock.h"
#include "wq.h"
#define NSLOTS (1 << WQSHIFT)
struct wqueue {
struct wqthread *thread[NSLOTS];
volatile int head __mpalign__;
struct work *w[NSLOTS];
qlock_t lock;
volatile int head __mpalign__;
volatile int tail;
struct spinlock lock;
__padout__;
} __mpalign__;
struct wqthread {
u64 rip;
u64 arg0;
u64 arg1;
struct wqframe *frame; // parent wqframe
__padout__;
} __mpalign__;
} __mpalign__;;
struct wqstat {
u64 push;
......@@ -68,145 +28,163 @@ struct wqstat {
struct wqueue queue[NCPU] __mpalign__;
struct wqstat stat[NCPU] __mpalign__;
static struct wqueue *
wq_cur(void)
static inline struct wqueue *
getwq(void)
{
return &queue[mycpu()->id];
pushcli();
return &queue[cpunum()];
}
static struct wqframe *
wq_frame(void)
static inline void
putwq(struct wqueue *wq)
{
return mycpu()->wqframe;
popcli();
}
static struct wqstat *
static inline struct wqstat *
wq_stat(void)
{
return &stat[mycpu()->id];
return &stat[cpunum()];
}
static struct work *
allocwork(void)
{
return (struct work *)kalloc();
}
static void
freework(struct work *w)
{
kfree(w);
}
static int
__wq_push(struct wqueue *q, struct wqthread *t)
int
wq_push(struct work *w)
{
int i;
i = q->head;
if ((i - q->tail) == NSLOTS) {
struct wqueue *wq = getwq();
i = wq->head;
if ((i - wq->tail) == NSLOTS) {
wq_stat()->full++;
return -1;
}
i = i & (NSLOTS-1);
q->thread[i] = t;
q->head++;
wq->w[i] = w;
barrier();
wq->head++;
wq_stat()->push++;
putwq(wq);
return 0;
}
int
wq_push1(void (*fn)(struct work *w, void *a0), void *a0)
{
struct work *w = allocwork();
if (w == NULL)
return -1;
w->rip = fn;
w->arg0 = a0;
if (wq_push(w) < 0) {
freework(w);
return -1;
}
return 0;
}
int
wq_push2(void (*fn)(struct work*, void*, void*), void *a0, void *a1)
{
struct work *w = allocwork();
if (w == NULL)
return -1;
w->rip = fn;
w->arg0 = a0;
w->arg1 = a1;
if (wq_push(w) < 0) {
freework(w);
return -1;
}
return 0;
}
static struct wqthread *
__wq_pop(struct wqueue *q)
static struct work *
__wq_pop(int c)
{
struct qnode qn;
// Called with cli
struct wqueue *wq = &queue[c];
struct work *w;
int i;
ql_lock(&q->lock, &qn);
i = q->head;
if ((i - q->tail) == 0) {
ql_unlock(&q->lock, &qn);
acquire(&wq->lock);
i = wq->head;
if ((i - wq->tail) == 0) {
release(&wq->lock);
return NULL;
}
i = (i-1) & (NSLOTS-1);
q->head--;
ql_unlock(&q->lock, &qn);
w = wq->w[i];
wq->head--;
release(&wq->lock);
wq_stat()->pop++;
return q->thread[i];
return w;
}
static struct wqthread *
__wq_steal(struct wqueue *q)
static struct work *
__wq_steal(int c)
{
struct qnode qn;
// Called with cli
struct wqueue *wq = &queue[c];
struct work *w;
int i;
ql_lock(&q->lock, &qn);
i = q->tail;
if ((i - q->head) == 0) {
ql_unlock(&q->lock, &qn);
acquire(&wq->lock);
i = wq->tail;
if ((i - wq->head) == 0) {
release(&wq->lock);
return NULL;
}
i = i & (NSLOTS-1);
q->tail++;
ql_unlock(&q->lock, &qn);
w = wq->w[i];
wq->tail++;
release(&wq->lock);
wq_stat()->steal++;
return q->thread[i];
return w;
}
static void
__wq_run(struct wqthread *th)
__wq_run(struct work *w)
{
void (*fn)(uptr arg0, uptr arg1) = (void*)th->rip;
struct wqframe *old = mycpu()->wqframe;
mycpu()->wqframe = th->frame;
fn(th->arg0, th->arg1);
mycpu()->wqframe = old;
subfetch(&th->frame->ref, 1);
kfree(th);
void (*fn)(struct work*, void*, void*) = w->rip;
fn(w, w->arg0, w->arg1);
freework(w);
}
// Add the (rip, arg0, arg1) work to the local work queue.
// Guarantees some core will at some point execute the work.
// The current core might execute the work immediately.
void
wq_push(void *rip, u64 arg0, u64 arg1)
{
void (*fn)(uptr, uptr) = rip;
struct wqthread *th;
th = (struct wqthread *) kalloc();
if (th == NULL) {
fn(arg0, arg1);
return;
}
th->rip = (uptr) rip;
th->arg0 = arg0;
th->arg1 = arg1;
th->frame = wq_frame();
if (__wq_push(wq_cur(), th)) {
kfree(th);
fn(arg0, arg1);
} else
fetchadd(&wq_frame()->ref, 1);
}
// Try to execute one wqthread.
// Check local queue then steal from other queues.
int
wq_trywork(void)
{
struct wqthread *th;
struct work *w;
int i;
pushcli();
th = __wq_pop(wq_cur());
if (th != NULL) {
__wq_run(th);
w = __wq_pop(mycpu()->id);
if (w != NULL) {
__wq_run(w);
popcli();
return 1;
}
// XXX(sbw) should be random
for (i = 0; i < NCPU; i++) {
if (i == mycpu()->id)
continue;
th = __wq_steal(&queue[i]);
if (th != NULL) {
__wq_run(th);
w = __wq_steal(i);
if (w != NULL) {
__wq_run(w);
popcli();
return 1;
}
......@@ -216,42 +194,6 @@ wq_trywork(void)
return 0;
}
// Start a new work queue frame.
// We don't allow nested work queue frames.
void
wq_start(void)
{
pushcli();
if (myproc()->wqframe.ref != 0)
panic("wq_start");
mycpu()->wqframe = &myproc()->wqframe;
}
// End of the current work queue frame.
// The core works while the reference count of the current
// work queue frame is not 0.
void
wq_end(void)
{
while (wq_frame()->ref != 0) {
struct wqthread *th;
int i;
while ((th = __wq_pop(wq_cur())) != NULL)
__wq_run(th);
for (i = 0; i < NCPU; i++) {
th = __wq_steal(&queue[i]);
if (th != NULL) {
__wq_run(th);
break;
}
}
}
mycpu()->wqframe = NULL;
popcli();
}
void
wq_dump(void)
{
......@@ -262,31 +204,35 @@ wq_dump(void)
}
static void
__test_stub(uptr a0, uptr a1)
__test_stub(struct work *w, void *a0, void *a1)
{
//cprintf("%lu, %lu\n", a0, a1);
//long i = (long)a0;
//cprintf("%u: %lu\n", cpunum(), i);
volatile int *running = a1;
subfetch(running, 1);
}
void
testwq(void)
{
enum { iters = 1000 };
static volatile int running = 1;
enum { iters = 10 };
static volatile int running = iters;
u64 e, s;
int i;
long i;
pushcli();
if (mycpu()->id == 0) {
microdelay(1);
s = rdtsc();
wq_start();
for (i = 0; i < iters; i++)
wq_push(__test_stub, i, i);
wq_end();
for (i = 0; i < iters; i++) {
if (wq_push2(__test_stub, (void*)i, (void*)&running) < 0)
panic("testwq: oops");
}
e = rdtsc();
cprintf("testwq: %lu\n", (e-s)/iters);
while (running)
nop_pause();
wq_dump();
running = 0;
} else {
while (running)
wq_trywork();
......@@ -295,17 +241,12 @@ testwq(void)
}
void
initwqframe(struct wqframe *wq)
{
memset(wq, 0, sizeof(*wq));
}
void
initwq(void)
{
int i;
for (i = 0; i < NCPU; i++)
ql_init(&queue[i].lock, "queue lock");
initlock(&queue[i].lock, "wq lock", LOCKSTAT_WQ);
}
#endif // WQENABLE
struct work {
void *rip;
void *arg0;
void *arg1;
char data[];
};
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论