提交 4e6609e5 创建 作者: Nickolai Zeldovich's avatar Nickolai Zeldovich

no memory allocation in gc.cc, but a bug somewhere

上级 7ff543f9
...@@ -104,7 +104,7 @@ bget(u32 dev, u64 sector, int *writer) ...@@ -104,7 +104,7 @@ bget(u32 dev, u64 sector, int *writer)
bufns->remove(mkpair(victim->dev, victim->sector), &victim); bufns->remove(mkpair(victim->dev, victim->sector), &victim);
release(&victim->lock); release(&victim->lock);
destroylock(&victim->lock); destroylock(&victim->lock);
gc_delayed(victim, kmfree); gc_delayed(victim);
b = (buf*) kmalloc(sizeof(*b)); b = (buf*) kmalloc(sizeof(*b));
b->dev = dev; b->dev = dev;
...@@ -117,7 +117,7 @@ bget(u32 dev, u64 sector, int *writer) ...@@ -117,7 +117,7 @@ bget(u32 dev, u64 sector, int *writer)
gc_begin_epoch(); gc_begin_epoch();
if (bufns->insert(mkpair(b->dev, b->sector), b) < 0) { if (bufns->insert(mkpair(b->dev, b->sector), b) < 0) {
destroylock(&b->lock); destroylock(&b->lock);
gc_delayed(b, kmfree); gc_delayed(b);
goto loop; goto loop;
} }
// rcu_end_read() happens in brelse // rcu_end_read() happens in brelse
......
struct buf { #include "gc.hh"
struct buf : public rcu_freed {
int flags; int flags;
u32 dev; u32 dev;
u64 sector; u64 sector;
......
...@@ -9,6 +9,8 @@ extern "C" { ...@@ -9,6 +9,8 @@ extern "C" {
#include "cpu.h" #include "cpu.h"
} }
#include "gc.hh"
// //
// Concurrent atomic range operations using skip lists. An insert may split an // Concurrent atomic range operations using skip lists. An insert may split an
// existing range in several ranges. A delete may remove a sequence of ranges // existing range in several ranges. A delete may remove a sequence of ranges
...@@ -54,6 +56,17 @@ extern "C" { ...@@ -54,6 +56,17 @@ extern "C" {
enum { crange_debug = 0 }; enum { crange_debug = 0 };
enum { crange_checking = 0 }; enum { crange_checking = 0 };
struct range {
u64 key;
u64 size;
void *value;
int curlevel; // the current levels it appears on
int nlevel; // the number of levels this range should appear
struct crange *cr; // the crange this range is part of
struct range** next; // one next pointer per level
struct spinlock *lock; // on separate cache line?
} __mpalign__;
struct crange { struct crange {
int nlevel; // number of levels in the crange skip list int nlevel; // number of levels in the crange skip list
struct range crange_head; // a crange skip list starts with a sentinel range (key 0, sz 0) struct range crange_head; // a crange skip list starts with a sentinel range (key 0, sz 0)
...@@ -108,6 +121,17 @@ range_free(void *p) ...@@ -108,6 +121,17 @@ range_free(void *p)
kmalignfree(e); kmalignfree(e);
} }
class range_delayed : public rcu_freed {
private:
struct range *_e;
public:
range_delayed(range *e) : _e(e) {}
virtual ~range_delayed() {
range_free(_e);
}
};
static void static void
range_free_delayed(struct range *e) range_free_delayed(struct range *e)
{ {
...@@ -115,7 +139,9 @@ range_free_delayed(struct range *e) ...@@ -115,7 +139,9 @@ range_free_delayed(struct range *e)
cprintf("%d: range_free_delayed: 0x%lx 0x%lx-0x%lx(%lu) %lu\n", myproc()->pid, (long) e, e->key, e->key + (e)->size, e->size, myproc()->epoch); cprintf("%d: range_free_delayed: 0x%lx 0x%lx-0x%lx(%lu) %lu\n", myproc()->pid, (long) e, e->key, e->key + (e)->size, e->size, myproc()->epoch);
crange_check(e->cr, e); crange_check(e->cr, e);
assert(e->curlevel == -1); assert(e->curlevel == -1);
gc_delayed(e, range_free);
range_delayed *rd = new range_delayed(e);
gc_delayed(rd);
} }
static void static void
......
#include "cpputil.hh" #include "cpputil.hh"
#include "ns.hh" #include "ns.hh"
#include "gc.hh"
u64 namehash(const strbuf<DIRSIZ>&); u64 namehash(const strbuf<DIRSIZ>&);
...@@ -18,7 +19,7 @@ struct file { ...@@ -18,7 +19,7 @@ struct file {
// in-core file system types // in-core file system types
struct inode { struct inode : public rcu_freed {
u32 dev; // Device number u32 dev; // Device number
u32 inum; // Inode number u32 inum; // Inode number
u32 gen; // Generation number u32 gen; // Generation number
...@@ -36,6 +37,9 @@ struct inode { ...@@ -36,6 +37,9 @@ struct inode {
short nlink; short nlink;
u32 size; u32 size;
u32 addrs[NDIRECT+1]; u32 addrs[NDIRECT+1];
inode();
virtual ~inode();
}; };
#define I_BUSYR 0x1 #define I_BUSYR 0x1
......
...@@ -220,20 +220,21 @@ iupdate(struct inode *ip) ...@@ -220,20 +220,21 @@ iupdate(struct inode *ip)
// But it has a ref count, so it won't be freed or reused. // But it has a ref count, so it won't be freed or reused.
// Though unlocked, all fields will be present, // Though unlocked, all fields will be present,
// so looking a ip->inum and ip->gen are OK even w/o lock. // so looking a ip->inum and ip->gen are OK even w/o lock.
static void inode::inode()
ifree(void *arg)
{ {
struct inode *ip = (inode*) arg; dir = 0;
}
if (ip->dir) { inode::~inode()
ip->dir->remove(strbuf<DIRSIZ>(".")); {
ip->dir->remove(strbuf<DIRSIZ>("..")); if (dir) {
gc_delayed(ip->dir, del_rcu_freed); dir->remove(strbuf<DIRSIZ>("."));
ip->dir = 0; dir->remove(strbuf<DIRSIZ>(".."));
gc_delayed(dir);
dir = 0;
} }
destroylock(&ip->lock); destroylock(&lock);
kmfree(ip);
} }
struct inode* struct inode*
...@@ -295,13 +296,13 @@ iget(u32 dev, u32 inum) ...@@ -295,13 +296,13 @@ iget(u32 dev, u32 inum)
} }
release(&victim->lock); release(&victim->lock);
ins->remove(mkpair(victim->dev, victim->inum), &victim); ins->remove(mkpair(victim->dev, victim->inum), &victim);
gc_delayed(victim, ifree); gc_delayed(victim);
} else { } else {
if (!__sync_bool_compare_and_swap(&icache_free[mycpu()->id].x, cur_free, cur_free-1)) if (!__sync_bool_compare_and_swap(&icache_free[mycpu()->id].x, cur_free, cur_free-1))
goto retry_evict; goto retry_evict;
} }
ip = (inode*) kmalloc(sizeof(*ip)); ip = new inode();
ip->dev = dev; ip->dev = dev;
ip->inum = inum; ip->inum = inum;
ip->ref = 1; ip->ref = 1;
...@@ -310,10 +311,8 @@ iget(u32 dev, u32 inum) ...@@ -310,10 +311,8 @@ iget(u32 dev, u32 inum)
snprintf(ip->lockname, sizeof(ip->lockname), "cv:ino:%d", ip->inum); snprintf(ip->lockname, sizeof(ip->lockname), "cv:ino:%d", ip->inum);
initlock(&ip->lock, ip->lockname+3, LOCKSTAT_FS); initlock(&ip->lock, ip->lockname+3, LOCKSTAT_FS);
initcondvar(&ip->cv, ip->lockname); initcondvar(&ip->cv, ip->lockname);
ip->dir = 0;
if (ins->insert(mkpair(ip->dev, ip->inum), ip) < 0) { if (ins->insert(mkpair(ip->dev, ip->inum), ip) < 0) {
destroylock(&ip->lock); gc_delayed(ip);
gc_delayed(ip, kmfree);
goto retry; goto retry;
} }
...@@ -419,7 +418,7 @@ iput(struct inode *ip) ...@@ -419,7 +418,7 @@ iput(struct inode *ip)
iupdate(ip); iupdate(ip);
ins->remove(mkpair(ip->dev, ip->inum), &ip); ins->remove(mkpair(ip->dev, ip->inum), &ip);
gc_delayed(ip, ifree); gc_delayed(ip);
__sync_fetch_and_add(&icache_free[mycpu()->id].x, 1); __sync_fetch_and_add(&icache_free[mycpu()->id].x, 1);
return; return;
} }
...@@ -478,6 +477,18 @@ bmap(struct inode *ip, u32 bn) ...@@ -478,6 +477,18 @@ bmap(struct inode *ip, u32 bn)
// Truncate inode (discard contents). // Truncate inode (discard contents).
// Only called after the last dirent referring // Only called after the last dirent referring
// to this inode has been erased on disk. // to this inode has been erased on disk.
class diskblock : public rcu_freed {
private:
int _dev;
u64 _block;
public:
diskblock(int dev, u64 block) : _dev(dev), _block(block) {}
virtual ~diskblock() {
bfree(_dev, _block);
}
};
static void static void
itrunc(struct inode *ip) itrunc(struct inode *ip)
{ {
...@@ -487,7 +498,8 @@ itrunc(struct inode *ip) ...@@ -487,7 +498,8 @@ itrunc(struct inode *ip)
for(i = 0; i < NDIRECT; i++){ for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){ if(ip->addrs[i]){
gc_delayed2(ip->dev, ip->addrs[i], bfree); diskblock *db = new diskblock(ip->dev, ip->addrs[i]);
gc_delayed(db);
ip->addrs[i] = 0; ip->addrs[i] = 0;
} }
} }
...@@ -496,11 +508,15 @@ itrunc(struct inode *ip) ...@@ -496,11 +508,15 @@ itrunc(struct inode *ip)
bp = bread(ip->dev, ip->addrs[NDIRECT], 0); bp = bread(ip->dev, ip->addrs[NDIRECT], 0);
a = (u32*)bp->data; a = (u32*)bp->data;
for(j = 0; j < NINDIRECT; j++){ for(j = 0; j < NINDIRECT; j++){
if(a[j]) if(a[j]) {
gc_delayed2(ip->dev, a[j], bfree); diskblock *db = new diskblock(ip->dev, a[j]);
gc_delayed(db);
}
} }
brelse(bp, 0); brelse(bp, 0);
gc_delayed2(ip->dev, ip->addrs[NDIRECT], bfree);
diskblock *db = new diskblock(ip->dev, ip->addrs[NDIRECT]);
gc_delayed(db);
ip->addrs[NDIRECT] = 0; ip->addrs[NDIRECT] = 0;
} }
......
...@@ -32,28 +32,15 @@ enum { gc_debug = 0 }; ...@@ -32,28 +32,15 @@ enum { gc_debug = 0 };
#define NGC 10000 #define NGC 10000
struct gc { struct headinfo {
rcu_freed *head;
u64 epoch; u64 epoch;
struct gc *next; };
union {
struct {
void (*dofree)(void *);
void *item;
} f1;
struct {
void (*dofree)(int, u64);
int arg1;
u64 arg2;
} f2;
};
int type;
} __mpalign__;
static struct gc_state { static struct gc_state {
struct condvar cv; struct condvar cv;
struct gc delayed[NEPOCH]; headinfo delayed[NEPOCH];
struct gc tofree[NEPOCH]; headinfo tofree[NEPOCH];
int ndelayed; int ndelayed;
int min_epoch; int min_epoch;
int nrun; int nrun;
...@@ -64,44 +51,21 @@ static struct gc_state { ...@@ -64,44 +51,21 @@ static struct gc_state {
static struct { struct spinlock l __mpalign__; } gc_lock; static struct { struct spinlock l __mpalign__; } gc_lock;
u64 global_epoch __mpalign__; u64 global_epoch __mpalign__;
struct gc *
gc_alloc()
{
struct gc *r = (gc*) kmalloc(sizeof(struct gc));
assert(r);
__sync_fetch_and_add(&gc_state[mycpu()->id].ndelayed, 1);
return r;
}
static void
gc_free_elem(struct gc *r)
{
switch (r->type) {
case 1:
r->f1.dofree(r->f1.item);
break;
case 2:
r->f2.dofree(r->f2.arg1, r->f2.arg2);
break;
default:
panic("rcu type");
}
kmfree(r);
}
static int static int
gc_free_tofreelist(struct gc **head, u64 epoch) gc_free_tofreelist(rcu_freed **head, u64 epoch)
{ {
int nfree = 0; int nfree = 0;
struct gc *r, *nr; rcu_freed *r, *nr;
for (r = *head; r != NULL; r = nr) { for (r = *head; r != NULL; r = nr) {
if (r->epoch > epoch) { if (r->_rcu_epoch > epoch) {
cprintf("gc_free_tofreelist: r->epoch %ld > epoch %ld\n", r->epoch, epoch); cprintf("gc_free_tofreelist: r->epoch %ld > epoch %ld\n", r->_rcu_epoch, epoch);
assert(0); assert(0);
} }
nr = r->next; nr = r->_rcu_next;
gc_free_elem(r); cprintf("about to delete %p\n", r);
delete r;
cprintf("delete done\n");
nfree++; nfree++;
} }
*head = r; *head = r;
...@@ -113,13 +77,13 @@ gc_free_tofreelist(struct gc **head, u64 epoch) ...@@ -113,13 +77,13 @@ gc_free_tofreelist(struct gc **head, u64 epoch)
void * void *
gc_move_to_tofree_cpu(int c, u64 epoch) gc_move_to_tofree_cpu(int c, u64 epoch)
{ {
struct gc *head; rcu_freed *head;
u32 fe = (epoch - (NEPOCH-2)) % NEPOCH; u32 fe = (epoch - (NEPOCH-2)) % NEPOCH;
int cas; int cas;
assert(gc_state[c].delayed[fe].epoch == epoch-(NEPOCH-2)); // XXX race with setting epoch = 0 assert(gc_state[c].delayed[fe].epoch == epoch-(NEPOCH-2)); // XXX race with setting epoch = 0
// unhook list for fe epoch atomically; this shouldn't fail // unhook list for fe epoch atomically; this shouldn't fail
head = gc_state[c].delayed[fe].next; head = gc_state[c].delayed[fe].head;
cas = __sync_bool_compare_and_swap(&(gc_state[c].delayed[fe].next), head, 0); cas = __sync_bool_compare_and_swap(&(gc_state[c].delayed[fe].head), head, 0);
assert(cas); assert(cas);
// insert list into tofree list so that each core can free in parallel and free its elements // insert list into tofree list so that each core can free in parallel and free its elements
...@@ -128,12 +92,12 @@ gc_move_to_tofree_cpu(int c, u64 epoch) ...@@ -128,12 +92,12 @@ gc_move_to_tofree_cpu(int c, u64 epoch)
gc_state[c].delayed[fe].epoch); gc_state[c].delayed[fe].epoch);
assert(0); assert(0);
} }
cas = __sync_bool_compare_and_swap(&(gc_state[c].tofree[fe].next), 0, head); cas = __sync_bool_compare_and_swap(&(gc_state[c].tofree[fe].head), 0, head);
assert(cas); assert(cas);
// move delayed NEPOCH's adhead // move delayed NEPOCH's adhead
gc_state[c].delayed[fe].epoch += NEPOCH; gc_state[c].delayed[fe].epoch += NEPOCH;
assert(gc_state[c].delayed[fe].next == 0); assert(gc_state[c].delayed[fe].head == 0);
return 0; return 0;
} }
...@@ -191,9 +155,10 @@ gc_delayfreelist(void) ...@@ -191,9 +155,10 @@ gc_delayfreelist(void)
release(&gc_lock.l); release(&gc_lock.l);
} }
static void void
gc_delayed_int(struct gc *r) gc_delayed(rcu_freed *e)
{ {
__sync_fetch_and_add(&gc_state[mycpu()->id].ndelayed, 1);
pushcli(); pushcli();
int c = mycpu()->id; int c = mycpu()->id;
u64 myepoch = myproc()->epoch; u64 myepoch = myproc()->epoch;
...@@ -204,39 +169,14 @@ gc_delayed_int(struct gc *r) ...@@ -204,39 +169,14 @@ gc_delayed_int(struct gc *r)
cprintf("%d: myepoch %lu minepoch %lu\n", myproc()->pid, myepoch, minepoch); cprintf("%d: myepoch %lu minepoch %lu\n", myproc()->pid, myepoch, minepoch);
panic("gc_delayed_int"); panic("gc_delayed_int");
} }
r->epoch = myepoch; e->_rcu_epoch = myepoch;
do { do {
r->next = gc_state[c].delayed[myepoch % NEPOCH].next; e->_rcu_next = gc_state[c].delayed[myepoch % NEPOCH].head;
} while (!__sync_bool_compare_and_swap(&(gc_state[c].delayed[myepoch % NEPOCH].next), r->next, r)); } while (!__sync_bool_compare_and_swap(&(gc_state[c].delayed[myepoch % NEPOCH].head), e->_rcu_next, e));
popcli(); popcli();
} }
void void
gc_delayed(void *e, void (*dofree)(void *))
{
struct gc *r = gc_alloc();
if (r == 0)
panic("gc_delayed");
r->f1.dofree = dofree;
r->f1.item = e;
r->type = 1;
gc_delayed_int(r);
}
void
gc_delayed2(int a1, u64 a2, void (*dofree)(int,u64))
{
struct gc *r = gc_alloc();
if (r == 0)
panic("gc_delayed2");
r->f2.dofree = dofree;
r->f2.arg1 = a1;
r->f2.arg2 = a2;
r->type = 2;
gc_delayed_int(r);
}
void
gc_begin_epoch(void) gc_begin_epoch(void)
{ {
if (myproc() == NULL) return; if (myproc() == NULL) return;
...@@ -286,7 +226,7 @@ gc_worker(void *x) ...@@ -286,7 +226,7 @@ gc_worker(void *x)
u64 global = global_epoch; u64 global = global_epoch;
myproc()->epoch = global_epoch; // move the gc thread to next epoch myproc()->epoch = global_epoch; // move the gc thread to next epoch
for (i = gc_state[mycpu()->id].min_epoch; i < global-2; i++) { for (i = gc_state[mycpu()->id].min_epoch; i < global-2; i++) {
int nfree = gc_free_tofreelist(&(gc_state[mycpu()->id].tofree[i%NEPOCH].next), i); int nfree = gc_free_tofreelist(&(gc_state[mycpu()->id].tofree[i%NEPOCH].head), i);
gc_state[mycpu()->id].tofree[i%NEPOCH].epoch += NEPOCH; gc_state[mycpu()->id].tofree[i%NEPOCH].epoch += NEPOCH;
__sync_fetch_and_sub(&gc_state[mycpu()->id].ndelayed, nfree); __sync_fetch_and_sub(&gc_state[mycpu()->id].ndelayed, nfree);
if (0 && nfree > 0) { if (0 && nfree > 0) {
...@@ -337,10 +277,3 @@ initgc(void) ...@@ -337,10 +277,3 @@ initgc(void)
release(&gcp->lock); release(&gcp->lock);
} }
} }
void
del_rcu_freed(void *arg)
{
rcu_freed *rf = (rcu_freed*) arg;
delete rf;
}
#pragma once #pragma once
class rcu_freed { class rcu_freed {
private: public:
u64 _rcu_epoch; u64 _rcu_epoch;
rcu_freed *_rcu_next; rcu_freed *_rcu_next;
public:
virtual ~rcu_freed() {} virtual ~rcu_freed() {}
}; } __mpalign__;
void del_rcu_freed(void*);
...@@ -56,17 +56,7 @@ void consoleintr(int(*)(void)); ...@@ -56,17 +56,7 @@ void consoleintr(int(*)(void));
#define assert(c) if (!(c)) { cprintf("%s:%d: ", __FILE__, __LINE__); panic("assertion failure"); } #define assert(c) if (!(c)) { cprintf("%s:%d: ", __FILE__, __LINE__); panic("assertion failure"); }
// crange.c // crange.c
struct range;
struct range {
u64 key;
u64 size;
void *value;
int curlevel; // the current levels it appears on
int nlevel; // the number of levels this range should appear
struct crange *cr; // the crange this range is part of
struct range** next; // one next pointer per level
struct spinlock *lock; // on separate cache line?
} __mpalign__;
struct crange* crange_alloc(int nlevel); struct crange* crange_alloc(int nlevel);
void crange_free(struct crange *cr); void crange_free(struct crange *cr);
...@@ -120,10 +110,13 @@ void initgc(void); ...@@ -120,10 +110,13 @@ void initgc(void);
void initprocgc(struct proc *); void initprocgc(struct proc *);
void gc_begin_epoch(); void gc_begin_epoch();
void gc_end_epoch(); void gc_end_epoch();
void gc_delayed(void*, void (*dofree)(void*));
void gc_delayed2(int, u64, void (*dofree)(int, u64));
void gc_start(void); void gc_start(void);
#ifdef __cplusplus
class rcu_freed;
void gc_delayed(rcu_freed *);
#endif
// hwvm.c // hwvm.c
void freevm(pml4e_t*); void freevm(pml4e_t*);
pml4e_t* setupkvm(void); pml4e_t* setupkvm(void);
......
...@@ -76,7 +76,7 @@ class xns : public rcu_freed { ...@@ -76,7 +76,7 @@ class xns : public rcu_freed {
if (!allowdup) { if (!allowdup) {
for (auto x = root; x; x = x->next) { for (auto x = root; x; x = x->next) {
if (x->key == key) { if (x->key == key) {
gc_delayed(e, del_rcu_freed); gc_delayed(e);
return -1; return -1;
} }
} }
...@@ -131,7 +131,7 @@ class xns : public rcu_freed { ...@@ -131,7 +131,7 @@ class xns : public rcu_freed {
} }
*pelock = 0; *pelock = 0;
gc_delayed(e, del_rcu_freed); gc_delayed(e);
return true; return true;
} }
......
...@@ -179,11 +179,24 @@ exit(void) ...@@ -179,11 +179,24 @@ exit(void)
panic("zombie exit"); panic("zombie exit");
} }
class delayedfree : public rcu_freed {
private:
proc *_p;
public:
delayedfree(proc *p) : _p(p) {}
virtual ~delayedfree() {
kmfree(_p);
}
};
static void static void
freeproc(struct proc *p) freeproc(struct proc *p)
{ {
destroylock(&p->lock); destroylock(&p->lock);
gc_delayed(p, kmfree);
delayedfree *df = new delayedfree(p);
gc_delayed(df);
} }
// Look in the process table for an UNUSED proc. // Look in the process table for an UNUSED proc.
......
...@@ -13,6 +13,8 @@ extern "C" { ...@@ -13,6 +13,8 @@ extern "C" {
#include "vm.h" #include "vm.h"
} }
#include "gc.hh"
static void vmap_free(void *p); static void vmap_free(void *p);
enum { vm_debug = 0 }; enum { vm_debug = 0 };
...@@ -47,6 +49,17 @@ vma_free(void *p) ...@@ -47,6 +49,17 @@ vma_free(void *p)
kmfree(e); kmfree(e);
} }
class vma_delayed : public rcu_freed {
private:
vma *_e;
public:
vma_delayed(vma *e) : _e(e) {}
virtual ~vma_delayed() {
vma_free(_e);
}
};
static int static int
vmn_doallocpg(struct vmnode *n) vmn_doallocpg(struct vmnode *n)
{ {
...@@ -469,7 +482,9 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len) ...@@ -469,7 +482,9 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len)
return -1; return -1;
} }
crange_del(m->cr, va_start, len); crange_del(m->cr, va_start, len);
gc_delayed(e, vma_free);
vma_delayed *vd = new vma_delayed(e);
gc_delayed(vd);
release(&m->lock); release(&m->lock);
return 0; return 0;
} }
...@@ -601,7 +616,8 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len) ...@@ -601,7 +616,8 @@ vmap_remove(struct vmap *m, uptr va_start, u64 len)
cprintf("vmap_remove: partial unmap unsupported\n"); cprintf("vmap_remove: partial unmap unsupported\n");
return -1; return -1;
} }
gc_delayed(m->e[i], vma_free); vma_delayed *vd = new vma_delayed(m->e[i]);
gc_delayed(vd);
m->e[i] = 0; m->e[i] = 0;
} }
} }
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论