More scalable futex and some hacks to get ok performance.

The only locking is "point-to-point" between condvar sleep in one proc and condvar wakeup in another.
上级 39064a18
...@@ -77,7 +77,7 @@ struct proc : public rcu_freed, public sched_link { ...@@ -77,7 +77,7 @@ struct proc : public rcu_freed, public sched_link {
u64 cv_wakeup; // Wakeup time for this process u64 cv_wakeup; // Wakeup time for this process
LIST_ENTRY(proc) cv_waiters; // Linked list of processes waiting for oncv LIST_ENTRY(proc) cv_waiters; // Linked list of processes waiting for oncv
LIST_ENTRY(proc) cv_sleep; // Linked list of processes sleeping on a cv LIST_ENTRY(proc) cv_sleep; // Linked list of processes sleeping on a cv
LIST_ENTRY(proc) futex_link; struct spinlock futex_lock;
u64 user_fs_; u64 user_fs_;
u64 unmap_tlbreq_; u64 unmap_tlbreq_;
int exec_cpuid_; int exec_cpuid_;
...@@ -97,6 +97,8 @@ struct proc : public rcu_freed, public sched_link { ...@@ -97,6 +97,8 @@ struct proc : public rcu_freed, public sched_link {
static int kill(int pid); static int kill(int pid);
int kill(); int kill();
static u64 hash(const u32& p);
virtual void do_gc(void) { delete this; } virtual void do_gc(void) { delete this; }
private: private:
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "condvar.h" #include "condvar.h"
#include "proc.hh" #include "proc.hh"
#include "cpu.hh" #include "cpu.hh"
#include "percpu.hh"
// //
// futexkey // futexkey
...@@ -45,11 +46,68 @@ futexkey(const u64* useraddr, vmap* vmap, futexkey_t* key) ...@@ -45,11 +46,68 @@ futexkey(const u64* useraddr, vmap* vmap, futexkey_t* key)
} }
// //
// nscache
//
struct nscache {
struct spinlock lock_;
volatile u64 head_;
volatile u64 tail_;
xns<u32, proc*, proc::hash>* ns_[16];
nscache();
xns<u32, proc*, proc::hash>* alloc();
bool cache(xns<u32, proc*, proc::hash>* ns);
NEW_DELETE_OPS(nscache);
};
percpu<nscache> nscache_;
nscache::nscache(void)
{
initlock(&lock_, "nscache::lock_", LOCKSTAT_FUTEX);
head_ = 0;
tail_ = 0;
}
xns<u32, proc*, proc::hash>*
nscache::alloc(void)
{
xns<u32, proc*, proc::hash>* ns = nullptr;
acquire(&lock_);
if (head_ - tail_ > 0) {
u64 i = tail_%NELEM(ns_);
++tail_;
ns = ns_[i];
}
release(&lock_);
return ns;
}
bool
nscache::cache(xns<u32, proc*, proc::hash>* ns)
{
bool cached = false;
acquire(&lock_);
if (head_ - tail_ < NELEM(ns_)) {
u64 i = head_%NELEM(ns_);
++head_;
ns_[i] = ns;
cached = true;
}
release(&lock_);
return cached;
}
//
// futexaddr // futexaddr
// //
struct futexaddr : public referenced, public rcu_freed struct futexaddr : public referenced, public rcu_freed
{ {
futexaddr(futexkey_t key); static futexaddr* alloc(futexkey_t key);
virtual void do_gc(); virtual void do_gc();
virtual void onzero() const; virtual void onzero() const;
...@@ -57,18 +115,40 @@ struct futexaddr : public referenced, public rcu_freed ...@@ -57,18 +115,40 @@ struct futexaddr : public referenced, public rcu_freed
bool inserted_; bool inserted_;
struct spinlock lock_; struct spinlock lock_;
LIST_HEAD(proclist, proc) list_; xns<u32, proc*, proc::hash>* const nspid_;
private:
futexaddr(futexkey_t key, xns<u32, proc*, proc::hash>* nspid);
NEW_DELETE_OPS(futexaddr); NEW_DELETE_OPS(futexaddr);
}; };
xns<futexkey_t, futexaddr*, futexkey_hash> *nsfutex __mpalign__; xns<futexkey_t, futexaddr*, futexkey_hash> *nsfutex __mpalign__;
futexaddr::futexaddr(futexkey_t key) futexaddr*
: rcu_freed("futexaddr"), key_(key), inserted_(false) futexaddr::alloc(futexkey_t key)
{
xns<u32, proc*, proc::hash>* nspid;
futexaddr* fa;
nspid = nscache_->alloc();
if (nspid == nullptr)
nspid = new xns<u32, proc*, proc::hash>(false);
if (nspid == nullptr)
return nullptr;
fa = new futexaddr(key, nspid);
if (fa == nullptr) {
delete nspid;
return nullptr;
}
return fa;
}
futexaddr::futexaddr(futexkey_t key, xns<u32, proc*, proc::hash>* nspid)
: rcu_freed("futexaddr"), key_(key), inserted_(false), nspid_(nspid)
{ {
initlock(&lock_, "futexaddr::lock_", LOCKSTAT_FUTEX); initlock(&lock_, "futexaddr::lock_", LOCKSTAT_FUTEX);
LIST_INIT(&list_);
} }
void void
...@@ -82,6 +162,10 @@ futexaddr::onzero(void) const ...@@ -82,6 +162,10 @@ futexaddr::onzero(void) const
{ {
if (inserted_) if (inserted_)
assert(nsfutex->remove(key_, nullptr)); assert(nsfutex->remove(key_, nullptr));
// Normally deallocate members in the destructor, but in this case
// we don't want to wait for the gc to fill the cache
if (!nscache_->cache(nspid_))
delete nspid_;
gc_delayed((futexaddr*)this); gc_delayed((futexaddr*)this);
} }
...@@ -95,7 +179,11 @@ futexwait(futexkey_t key, u64 val, u64 timer) ...@@ -95,7 +179,11 @@ futexwait(futexkey_t key, u64 val, u64 timer)
again: again:
fa = nsfutex->lookup(key); fa = nsfutex->lookup(key);
if (fa == nullptr) { if (fa == nullptr) {
fa = new futexaddr(key); fa = futexaddr::alloc(key);
if (fa == nullptr) {
cprintf("futexwait futexaddr::alloc failed\n");
return -1;
}
if (nsfutex->insert(key, fa) < 0) { if (nsfutex->insert(key, fa) < 0) {
fa->dec(); fa->dec();
goto again; goto again;
...@@ -109,20 +197,28 @@ futexwait(futexkey_t key, u64 val, u64 timer) ...@@ -109,20 +197,28 @@ futexwait(futexkey_t key, u64 val, u64 timer)
} }
assert(fa->key_ == key); assert(fa->key_ == key);
acquire(&fa->lock_); acquire(&myproc()->futex_lock);
auto cleanup = scoped_cleanup([&fa](){ auto cleanup = scoped_cleanup([&fa](){
release(&fa->lock_); release(&myproc()->futex_lock);
fa->dec(); fa->dec();
}); });
// This first check is an optimization
if (futexkey_val(fa->key_) != val) if (futexkey_val(fa->key_) != val)
return -EWOULDBLOCK; return -EWOULDBLOCK;
LIST_INSERT_HEAD(&fa->list_, myproc(), futex_link);
if (fa->nspid_->insert(myproc()->pid, myproc()) < 0)
return -1;
if (futexkey_val(fa->key_) != val) {
fa->nspid_->remove(myproc()->pid, nullptr);
return -EWOULDBLOCK;
}
u64 nsecto = timer == 0 ? 0 : timer+nsectime(); u64 nsecto = timer == 0 ? 0 : timer+nsectime();
cv_sleepto(&myproc()->cv, &fa->lock_, nsecto); cv_sleepto(&myproc()->cv, &myproc()->futex_lock, nsecto);
LIST_REMOVE(myproc(), futex_link); assert(fa->nspid_->remove(myproc()->pid, nullptr));
return 0; return 0;
} }
...@@ -131,20 +227,29 @@ futexwake(futexkey_t key, u64 nwake) ...@@ -131,20 +227,29 @@ futexwake(futexkey_t key, u64 nwake)
{ {
futexaddr* fa; futexaddr* fa;
u64 nwoke = 0; u64 nwoke = 0;
proc* p;
if (nwake == 0)
return -1;
scoped_gc_epoch gc; scoped_gc_epoch gc;
fa = nsfutex->lookup(key); fa = nsfutex->lookup(key);
if (fa == nullptr) if (fa == nullptr || !fa->tryinc())
return 0; return 0;
acquire(&fa->lock_);
LIST_FOREACH(p, &fa->list_, futex_link) { auto cleanup = scoped_cleanup([&fa](){
if (nwoke >= nwake) fa->dec();
break; });
fa->nspid_->enumerate([&nwoke, &nwake](u32 pid, proc* p) {
acquire(&p->futex_lock);
cv_wakeup(&p->cv); cv_wakeup(&p->cv);
nwoke++; release(&p->futex_lock);
} ++nwoke;
release(&fa->lock_); if (nwoke >= nwake)
return 1;
return 0;
});
return 0; return 0;
} }
...@@ -154,4 +259,7 @@ initfutex(void) ...@@ -154,4 +259,7 @@ initfutex(void)
nsfutex = new xns<futexkey_t, futexaddr*, futexkey_hash>(false); nsfutex = new xns<futexkey_t, futexaddr*, futexkey_hash>(false);
if (nsfutex == 0) if (nsfutex == 0)
panic("initfutex"); panic("initfutex");
for (int i = 0; i < NCPU; i++)
new (&nscache_[i]) nscache();
} }
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "wq.hh" #include "wq.hh"
u64 u64
proc_hash(const u32 &p) proc::hash(const u32 &p)
{ {
return p; return p;
} }
...@@ -27,7 +27,7 @@ mycpuid(void) ...@@ -27,7 +27,7 @@ mycpuid(void)
return mycpu()->id; return mycpu()->id;
} }
xns<u32, proc*, proc_hash> *xnspid __mpalign__; xns<u32, proc*, proc::hash> *xnspid __mpalign__;
struct proc *bootproc __mpalign__; struct proc *bootproc __mpalign__;
#if MTRACE #if MTRACE
...@@ -46,6 +46,7 @@ proc::proc(int npid) : ...@@ -46,6 +46,7 @@ proc::proc(int npid) :
{ {
snprintf(lockname, sizeof(lockname), "cv:proc:%d", pid); snprintf(lockname, sizeof(lockname), "cv:proc:%d", pid);
initlock(&lock, lockname+3, LOCKSTAT_PROC); initlock(&lock, lockname+3, LOCKSTAT_PROC);
initlock(&futex_lock, "proc::futex_lock", LOCKSTAT_PROC);
initcondvar(&cv, lockname); initcondvar(&cv, lockname);
memset(&childq, 0, sizeof(childq)); memset(&childq, 0, sizeof(childq));
...@@ -257,7 +258,7 @@ proc::alloc(void) ...@@ -257,7 +258,7 @@ proc::alloc(void)
void void
initproc(void) initproc(void)
{ {
xnspid = new xns<u32, proc*, proc_hash>(false); xnspid = new xns<u32, proc*, proc::hash>(false);
if (xnspid == 0) if (xnspid == 0)
panic("pinit"); panic("pinit");
} }
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论