提交 6fb270fb 创建 作者: Frans Kaashoek's avatar Frans Kaashoek

Adopt high-performance gc plan from user-level phash impl

Kernel is trickier because there are processes that don't call begin/end_epoch Premature anyway, since it won't help until we have per-core process lists But, we do garbage collect memory now
上级 e65d0098
...@@ -24,9 +24,7 @@ ...@@ -24,9 +24,7 @@
// node for deletion by marking its next pointer. // node for deletion by marking its next pointer.
// //
#define assert(c) if (!(c)) { panic("assertion failure"); } #define CRANGE_CHECKING 0
#define CRANGE_CHECKING 1
#define MINNLEVEL 10 #define MINNLEVEL 10
#define MARKED(x) (((uintptr) (x)) & 0x1) #define MARKED(x) (((uintptr) (x)) & 0x1)
...@@ -158,7 +156,7 @@ static void ...@@ -158,7 +156,7 @@ static void
clist_range_free(void *p) clist_range_free(void *p)
{ {
struct clist_range *e = (struct clist_range *) p; struct clist_range *e = (struct clist_range *) p;
// cprintf("crange_free: %u(%u)\n", e->key, e->size); if (crange_debug) cprintf("crange_free: %u(%u)\n", e->key, e->size);
crange_check(e->cr, 0, p); crange_check(e->cr, 0, p);
for (int l = 0; l < e->nlevel; l++) { for (int l = 0; l < e->nlevel; l++) {
e->next[l] = (struct clist_range *) 0xDEADBEEF; e->next[l] = (struct clist_range *) 0xDEADBEEF;
......
...@@ -9,14 +9,20 @@ ...@@ -9,14 +9,20 @@
#include "cpu.h" #include "cpu.h"
#include "kmtrace.h" #include "kmtrace.h"
#define NEPOCH 4 #define NGC 1
static struct { struct spinlock l __mpalign__; } rcu_lock[NCPU]; // 1 worker in total. more workers makes sense, if we have per-core process
static struct { struct condvar cv __mpalign__; } rcu_cv[NCPU]; // lists.
#define NWORKER 1
static struct { struct condvar cv __mpalign__; } rcu_cv[NWORKER];
static struct { struct spinlock l __mpalign__; } gc_lock;
enum { gc_debug = 0 };
struct gc { struct gc {
u64 epoch; u64 epoch;
struct gc *next; struct gc *next;
struct gc *free;
union { union {
struct { struct {
void (*dofree)(void *); void (*dofree)(void *);
...@@ -31,29 +37,31 @@ struct gc { ...@@ -31,29 +37,31 @@ struct gc {
}; };
int type; int type;
} __mpalign__; } __mpalign__;
struct gc gc_epoch[NEPOCH][NCPU] __mpalign__;
u64 global_epoch __mpalign__; u64 global_epoch __mpalign__;
int ndelayed __mpalign__;
enum { rcu_debug = 0 }; enum { rcu_debug = 0 };
struct gc * struct gc *
gc_alloc() gc_alloc()
{ {
return kmalloc(sizeof(struct gc)); struct gc *r = kmalloc(sizeof(struct gc));
assert(r);
myproc()->ndelayed++;
return r;
} }
void * static void *
gc_min(void *vkey, void *v, void *arg){ gc_min(void *vkey, void *v, void *arg){
u64 *min_epoch_p = arg; u64 *min_epoch_p = arg;
struct proc *p = (struct proc *) v; struct proc *p = (struct proc *) v;
if (*min_epoch_p > p->epoch) { if (*min_epoch_p > p->epoch) {
*min_epoch_p = p->epoch; *min_epoch_p = p->epoch;
} }
return 0; return NULL;
} }
void static void
gc_free_elem(struct gc *r) gc_free_elem(struct gc *r)
{ {
switch (r->type) { switch (r->type) {
...@@ -69,79 +77,77 @@ gc_free_elem(struct gc *r) ...@@ -69,79 +77,77 @@ gc_free_elem(struct gc *r)
kmfree(r); kmfree(r);
} }
// Fraser's reclaimation scheme: free all delayed-free items in global_epoch-2 static int
static void gc_free_list(struct gc *head, u64 epoch)
gc_free_epoch(u64 epoch)
{ {
cprintf("free epoch %d\n", epoch); int nfree = 0;
struct gc *r, *nr;
for (int j = 0; j < NCPU; j++) {
if (__sync_bool_compare_and_swap(&global_epoch, epoch, epoch+1)) { for (r = head; r != NULL; r = nr) {
// only one core succeeds; that core in charge of freeing epoch if (r->epoch > epoch) {
struct gc *head; cprintf("%lu %lu\n", r->epoch, epoch);
struct gc *r, *nr; assert(0);
uint32 fe = (epoch - (NEPOCH-2)) % NEPOCH;
int cas;
if (gc_epoch[fe][j].epoch != epoch - (NEPOCH-2))
panic("gc_free_epoch");
// unhook list for fe epoch atomically
head = gc_epoch[fe][j].next;
// this shouldn't fail, because no core is modifying it.
cas = __sync_bool_compare_and_swap(&gc_epoch[fe][j].next, head, 0);
if (!cas) panic("gc_free_epoch");
// free list items on the delayed list
for (r = head; r != NULL; r = nr) {
if (r->epoch > epoch-(NEPOCH-2)) {
cprintf("%lu %lu\n", r->epoch, epoch-(NEPOCH-2));
panic("gc_free_epoch");
}
nr = r->next;
gc_free_elem(r);
int x = __sync_fetch_and_sub(&ndelayed, 1);
if (x < 0) panic("gc_free_epoch");
}
if (gc_epoch[fe][j].next != 0)
panic("gc_free_epoch");
gc_epoch[fe][j].epoch = gc_epoch[fe][j].epoch + NEPOCH;
} }
nr = r->next;
gc_free_elem(r);
nfree++;
} }
return nfree;
} }
void // move to free delayed list to free list so that a process can do its own freeing
gc(void) void *
{ gc_move_to_free_proc(void *vkey, void *v, void *arg){
u64 global = global_epoch; u64 *epoch = arg;
u64 min = global; struct proc *p = (struct proc *) v;
ns_enumerate(nspid, gc_min, &min); struct gc *head;
// cprintf("gc: global %lu min %lu ndelay %d\n", global_epoch, min, ndelayed); uint32 fe = (*epoch - (NEPOCH-2)) % NEPOCH;
if (min >= global) { int cas;
gc_free_epoch(min); assert(p->gc_epoch[fe].epoch == *epoch-(NEPOCH-2)); // XXX race with setting epoch = 0
} // unhook list for fe epoch atomically
head = p->gc_epoch[fe].next;
// this shouldn't fail, because no core is modifying it.
cas = __sync_bool_compare_and_swap(&(p->gc_epoch[fe].next), head, 0);
assert(cas);
// insert list into local free list so that each core can do its own frees
assert (p->gc_epoch[fe].free == 0);
cas = __sync_bool_compare_and_swap(&(p->gc_epoch[fe].free), 0, head);
assert(cas);
assert(p->gc_epoch[fe].next == 0);
return 0;
} }
// Fraser's reclaimation scheme: free all delayed-free items in global_epoch-2
// only one thread should call this function
static void static void
gc_worker(void *x) gc_move_to_free(u64 epoch)
{ {
struct spinlock wl; if (gc_debug)
cprintf("%d: free epoch %ld\n", myproc()->pid, epoch);
initlock(&wl, "rcu_gc_worker"); // dummy lock myproc()->rcu_read_depth++; // ensure ns_enumate's call to gc_begin_epoch doesn't call gc()
ns_enumerate(nspid, gc_move_to_free_proc, &epoch);
for (;;) { myproc()->rcu_read_depth--;
gc(); int ok = __sync_bool_compare_and_swap(&global_epoch, epoch, epoch+1);
assert(ok);
acquire(&wl);
cv_sleep(&rcu_cv[mycpu()->id].cv, &wl);
release(&wl);
}
} }
void // If all threads have seen global_epoch, we can free elements in global_epoch-2
gc_start(void) static void
gc(void)
{ {
cv_wakeup(&rcu_cv[mycpu()->id].cv); int r = tryacquire(&gc_lock.l);
if (r == 0) return;
assert(r == 1);
u64 global = global_epoch;
u64 min = global;
myproc()->rcu_read_depth++; // ensure ns_enumate's call to gc_begin_epoch doesn't call gc()
ns_enumerate(nspid, gc_min, &min);
myproc()->rcu_read_depth--;
if (min >= global) {
gc_move_to_free(min);
}
release(&gc_lock.l);
} }
static void static void
...@@ -149,17 +155,18 @@ gc_delayed_int(struct gc *r) ...@@ -149,17 +155,18 @@ gc_delayed_int(struct gc *r)
{ {
pushcli(); pushcli();
u64 myepoch = myproc()->epoch; u64 myepoch = myproc()->epoch;
u64 minepoch = gc_epoch[myepoch % NEPOCH][mycpu()->id].epoch; u64 minepoch = myproc()->gc_epoch[myepoch % NEPOCH].epoch;
// cprintf("%d: gc_delayed: %lu ndelayed %d\n", myproc()->pid, global_epoch, ndelayed); if (gc_debug)
cprintf("%d: gc_delayed: %lu ndelayed %d\n", myproc()->pid, global_epoch, myproc()->ndelayed);
if (myepoch != minepoch) { if (myepoch != minepoch) {
cprintf("%d: myepoch %lu minepoch %lu\n", myproc()->pid, myepoch, minepoch); cprintf("%d: myepoch %lu minepoch %lu\n", myproc()->pid, myepoch, minepoch);
panic("gc_delayed_int"); panic("gc_delayed_int");
} }
r->epoch = myepoch; r->epoch = myepoch;
do { do {
r->next = gc_epoch[myepoch % NEPOCH][mycpu()->id].next; r->next = myproc()->gc_epoch[myepoch % NEPOCH].next;
} while (!__sync_bool_compare_and_swap(&(gc_epoch[myepoch % NEPOCH][mycpu()->id].next), r->next, r)); } while (!__sync_bool_compare_and_swap(&(myproc()->gc_epoch[myepoch % NEPOCH].next), r->next, r));
popcli(); popcli();
} }
void void
...@@ -187,34 +194,113 @@ gc_delayed2(int a1, u64 a2, void (*dofree)(int,u64)) ...@@ -187,34 +194,113 @@ gc_delayed2(int a1, u64 a2, void (*dofree)(int,u64))
gc_delayed_int(r); gc_delayed_int(r);
} }
static void*
gc_free(void *vkey, void *v, void *arg)
{
struct proc *p = (struct proc *) v;
acquire(&p->gc_lock);
u64 global = global_epoch;
for (u64 epoch = p->epoch; epoch < global; epoch++) {
int j = (epoch - (NEPOCH - 2)) % NEPOCH;
assert(p->gc_epoch[j].epoch == epoch-2);
struct gc *free = p->gc_epoch[j].free;
int ok = __sync_bool_compare_and_swap(&(p->gc_epoch[j].free), free, NULL);
assert(ok);
int nfree = gc_free_list(free, epoch - 2);
p->ndelayed -= nfree;
if (gc_debug && nfree > 0)
cprintf("%d: epoch %d freed %d\n", p->pid, epoch - 2, nfree);
p->gc_epoch[j].epoch = p->gc_epoch[j].epoch + NEPOCH;
}
p->epoch = global; // not atomic, but it never goes backwards
__sync_synchronize();
release(&p->gc_lock);
return NULL;
}
void
gc_start(void)
{
cv_wakeup(&rcu_cv[0].cv); // NWORKER = 1
// cv_wakeup(&rcu_cv[mycpu()->id].cv);
}
void void
gc_begin_epoch(void) gc_begin_epoch(void)
{ {
if (myproc() && myproc()->rcu_read_depth++ == 0) if (myproc() == NULL) return;
myproc()->epoch = global_epoch; if (myproc()->rcu_read_depth++ > 0)
__sync_synchronize(); return;
gc_free(NULL, (void *) myproc(), NULL);
} }
void void
gc_end_epoch(void) gc_end_epoch(void)
{ {
if (myproc() && myproc()->rcu_read_depth > 0) if (myproc() == NULL) return;
if (--myproc()->rcu_read_depth > 0)
return;
#if 0
// kick gcc early if under memory pressure
int free = 0;
for (int j = 0; j < NEPOCH; j++) {
if (myproc()->gc_epoch[j].free)
free = 1;
}
u64 nd = myproc()->ndelayed;
if (!free && nd > NGC) {
gc_start();
}
#endif
}
static void
gc_worker(void *x)
{
struct spinlock wl;
initlock(&wl, "rcu_gc_worker dummy"); // dummy lock
for (;;) {
acquire(&wl);
myproc()->rcu_read_depth++; // call gc_free once for gc_worker
ns_enumerate(nspid, gc_free, NULL);
myproc()->rcu_read_depth--; myproc()->rcu_read_depth--;
gc();
cv_sleep(&rcu_cv[0].cv, &wl); // NWORKER = 1
release(&wl);
}
}
void
initprocgc(struct proc *p)
{
p->epoch = global_epoch;
p->gc_epoch = kmalloc(sizeof(struct gc) * NEPOCH);
initlock(&p->gc_lock, "per process gc_lock");
for (u64 i = global_epoch-2; i < global_epoch+2; i++) {
p->gc_epoch[i % NEPOCH].epoch = i;
p->gc_epoch[i % NEPOCH].free = NULL;
p->gc_epoch[i % NEPOCH].next = NULL;
}
} }
void void
initgc(void) initgc(void)
{ {
for (int i = 0; i < NCPU; i++) { initlock(&gc_lock.l, "gc");
initlock(&rcu_lock[i].l, "rcu"); global_epoch = NEPOCH-2;
for (int i = 0; i < NWORKER; i++) {
initcondvar(&rcu_cv[i].cv, "rcu_gc_cv"); initcondvar(&rcu_cv[i].cv, "rcu_gc_cv");
} }
global_epoch = NEPOCH-2;
for (int i = 0; i < NEPOCH; i++)
for (int j = 0; j < NEPOCH; j++)
gc_epoch[i][j].epoch = i;
for (u32 c = 0; c < NCPU; c++) { // one worker for now
for (u32 c = 0; c < NWORKER; c++) {
struct proc *gcp; struct proc *gcp;
gcp = threadalloc(gc_worker, NULL); gcp = threadalloc(gc_worker, NULL);
......
...@@ -23,6 +23,7 @@ struct stat; ...@@ -23,6 +23,7 @@ struct stat;
struct proc; struct proc;
struct vmap; struct vmap;
struct pipe; struct pipe;
struct gc;
// bio.c // bio.c
void binit(void); void binit(void);
...@@ -49,6 +50,7 @@ void panic(const char*) __attribute__((noreturn)); ...@@ -49,6 +50,7 @@ void panic(const char*) __attribute__((noreturn));
void snprintf(char *buf, u32 n, char *fmt, ...); void snprintf(char *buf, u32 n, char *fmt, ...);
void consoleintr(int(*)(void)); void consoleintr(int(*)(void));
#define assert(c) if (!(c)) { cprintf("%s:%d: ", __FILE__, __LINE__); panic("assertion failure"); }
// crange.c // crange.c
...@@ -113,6 +115,7 @@ void dir_flush(struct inode *dp); ...@@ -113,6 +115,7 @@ void dir_flush(struct inode *dp);
// gc.c // gc.c
void initgc(void); void initgc(void);
void initprocgc(struct proc *);
void gc_begin_epoch(); void gc_begin_epoch();
void gc_end_epoch(); void gc_end_epoch();
void gc_delayed(void*, void (*dofree)(void*)); void gc_delayed(void*, void (*dofree)(void*));
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#define ROOTDEV 1 // device number of file system root disk #define ROOTDEV 1 // device number of file system root disk
#define MAXARG 32 // max exec arguments #define MAXARG 32 // max exec arguments
#define MAXNAME 16 // max string names #define MAXNAME 16 // max string names
#define INF (~0UL) #define NEPOCH 4
#define CACHELINE 64 // cache line size #define CACHELINE 64 // cache line size
#define CPUKSTACKS (NPROC + NCPU) #define CPUKSTACKS (NPROC + NCPU)
#define QUANTUM 10 // scheduling time quantum and tick length (in msec) #define QUANTUM 10 // scheduling time quantum and tick length (in msec)
......
...@@ -189,10 +189,10 @@ allocproc(void) ...@@ -189,10 +189,10 @@ allocproc(void)
p->state = EMBRYO; p->state = EMBRYO;
p->pid = ns_allockey(nspid); p->pid = ns_allockey(nspid);
p->epoch = 0;
p->cpuid = mycpu()->id; p->cpuid = mycpu()->id;
p->on_runq = -1; p->on_runq = -1;
p->cpu_pin = 0; p->cpu_pin = 0;
initprocgc(p);
#if MTRACE #if MTRACE
p->mtrace_stacks.curr = -1; p->mtrace_stacks.curr = -1;
#endif #endif
......
...@@ -54,6 +54,9 @@ struct proc { ...@@ -54,6 +54,9 @@ struct proc {
SLIST_ENTRY(proc) child_next; SLIST_ENTRY(proc) child_next;
struct condvar cv; struct condvar cv;
u64 epoch; u64 epoch;
u64 ndelayed;
struct gc *gc_epoch;
struct spinlock gc_lock;
u64 rcu_read_depth; u64 rcu_read_depth;
char lockname[16]; char lockname[16];
int on_runq; int on_runq;
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论