提交 cc82ee1f 创建 作者: Frans Kaashoek's avatar Frans Kaashoek

Merge branch 'scale-amd64' of ssh://amsterdam.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

...@@ -46,6 +46,7 @@ include lib/Makefrag ...@@ -46,6 +46,7 @@ include lib/Makefrag
include bin/Makefrag include bin/Makefrag
include kernel/Makefrag include kernel/Makefrag
include tools/Makefrag include tools/Makefrag
-include user/Makefrag.$(HW)
$(O)/%.o: %.c $(O)/%.o: %.c
@echo " CC $@" @echo " CC $@"
...@@ -57,6 +58,11 @@ $(O)/%.o: %.cc ...@@ -57,6 +58,11 @@ $(O)/%.o: %.cc
$(Q)mkdir -p $(@D) $(Q)mkdir -p $(@D)
$(Q)$(CXX) $(CXXFLAGS) $(XXFLAGS) -c -o $@ $< $(Q)$(CXX) $(CXXFLAGS) $(XXFLAGS) -c -o $@ $<
$(O)/%.o: %.S
@echo " CC $@"
$(Q)mkdir -p $(@D)
$(Q)$(CC) $(ASFLAGS) -c -o $@ $<
xv6memfs.img: bootblock kernelmemfs xv6memfs.img: bootblock kernelmemfs
dd if=/dev/zero of=xv6memfs.img count=10000 dd if=/dev/zero of=xv6memfs.img count=10000
dd if=bootblock of=xv6memfs.img conv=notrunc dd if=bootblock of=xv6memfs.img conv=notrunc
......
...@@ -50,3 +50,8 @@ ...@@ -50,3 +50,8 @@
$ CC=gcc CXX=g++ ./configure --prefix=[PREFIX] \ $ CC=gcc CXX=g++ ./configure --prefix=[PREFIX] \
--enable-targets=x86_64 --enable-optimized --enable-targets=x86_64 --enable-optimized
$ CC=gcc CXX=g++ make && make install $ CC=gcc CXX=g++ make && make install
* user-space version
$ make HW=user o.user/utest
extern "C" {
#include "types.h" #include "types.h"
#include "stat.h" #include "stat.h"
#include "user.h" #include "user.h"
}
int int
main(int argc, char **argv) main(int argc, char **argv)
......
extern "C" {
#include "types.h" #include "types.h"
#include "stat.h" #include "stat.h"
#include "user.h" #include "user.h"
}
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
......
...@@ -18,6 +18,7 @@ UPROGS= \ ...@@ -18,6 +18,7 @@ UPROGS= \
usertests \ usertests \
lockstat \ lockstat \
preadtest \ preadtest \
ftest \
perf perf
ifeq ($(HAVE_LWIP),y) ifeq ($(HAVE_LWIP),y)
......
#include "types.h"
#include "stat.h"
#include "fcntl.h"
#include "user.h"
#include "lib.h"
#include "amd64.h"
#include "ipc.hh"
#include "stream.h"
#define FSIZE (64 << 10)
#define BSIZE 4096
static char wbuf[512];
static char rbuf[BSIZE];
static int check = 0;
int
main(int ac, char **av)
{
size_t count;
off_t off;
FILE *fp;
int fd;
int i;
memset(ipcctl, 0, sizeof(*ipcctl));
for (i = 0; i < sizeof(wbuf); i++)
wbuf[i] = i % 16;
unlink("ftest.x");
fd = open("ftest.x", O_CREATE|O_RDWR);
for (i = 0; i < FSIZE; ) {
count = MIN(sizeof(wbuf), FSIZE-i);
if (write(fd, wbuf, count) != count)
die("write failed");
i += count;
}
fp = fdopen(fd, "r");
if (fp == 0)
die("fdopen");
off = 0;
while ((count = fread(rbuf, 1, BSIZE, fp))) {
if (check) {
for (i = 0; i < count; i++)
if (rbuf[i] != (i+off)%16)
die("ftest %u: %u != %u", i, (int)(rbuf[i]), (off+i)%16);
off += count;
}
}
fclose(fp);
exit();
}
extern "C" {
#include "types.h" #include "types.h"
#include "user.h" #include "user.h"
}
int int
main(int argc, char *argv[]) main(int argc, char *argv[])
......
...@@ -7,9 +7,6 @@ ...@@ -7,9 +7,6 @@
#include "amd64.h" #include "amd64.h"
#include "ipc.hh" #include "ipc.hh"
// XXX(sbw) add a memlayout.h?
#define KSHARED 0xFFFFF00000000000ull
#define FSIZE (64 << 10) #define FSIZE (64 << 10)
#define BSIZE 4096 #define BSIZE 4096
#define PSIZE (4*BSIZE) #define PSIZE (4*BSIZE)
...@@ -18,8 +15,6 @@ static int use_async; ...@@ -18,8 +15,6 @@ static int use_async;
static char buf[BSIZE]; static char buf[BSIZE];
struct ipcctl *ipcctl = (struct ipcctl*)KSHARED;
struct { struct {
u64 acount; u64 acount;
u64 atot; u64 atot;
...@@ -28,50 +23,6 @@ struct { ...@@ -28,50 +23,6 @@ struct {
u64 ptot; u64 ptot;
} stats; } stats;
static msgid_t
ipc_msg_alloc(void)
{
if (ipcctl->msghead - ipcctl->msgtail == IPC_NMSG)
return NULL_MSGID;
msgid_t i = ipcctl->msghead % IPC_NMSG;
ipcctl->msghead++;
return i;
}
static void
ipc_msg_free(int msgid)
{
msgid_t i;
i = ipcctl->msgtail % IPC_NMSG;
if (i != msgid)
die("ipc_free_msg: oops");
ipcctl->msgtail++;
}
static pageid_t
ipc_page_alloc(void)
{
if (ipcctl->pagehead - ipcctl->pagetail == IPC_NPAGE)
return NULL_PAGEID;
pageid_t i = ipcctl->pagehead % IPC_NPAGE;
ipcctl->pagehead++;
return i;
}
static void
ipc_page_free(pageid_t pageid)
{
pageid_t i;
i = ipcctl->pagetail % IPC_NPAGE;
if (i != pageid)
die("ipc_free_page: oops");
ipcctl->pagetail++;
}
static void static void
kernlet_pread(int fd, size_t count, off_t off) kernlet_pread(int fd, size_t count, off_t off)
{ {
......
class arc4 {
public:
arc4(const u8 *key, size_t nbytes) {
reset();
for (size_t n = 0; n < nbytes; n += 128)
addkey(&key[n], nbytes > n + 128 ? 128 : n + 128 - nbytes);
j = i;
}
u8 getbyte() {
uint8_t si, sj;
i = (i + 1) & 0xff;
si = s[i];
j = (j + si) & 0xff;
sj = s[j];
s[i] = sj;
s[j] = si;
return s[(si + sj) & 0xff];
}
template<class T> T rand() {
T v;
for (u32 i = 0; i < sizeof(v); i++)
*(u8*) &v = getbyte();
return v;
}
private:
void reset() {
i = 0xff;
j = 0;
for (u32 n = 0; n < 0x100; n++)
s[n] = n;
}
void addkey(const u8 *key, size_t nbytes) {
size_t n, keypos;
uint8_t si;
for (n = 0, keypos = 0; n < 256; n++, keypos++) {
if (keypos >= nbytes)
keypos = 0;
i = (i + 1) & 0xff;
si = s[i];
j = (j + si + key[keypos]) & 0xff;
s[i] = s[j];
s[j] = si;
}
}
u8 i;
u8 j;
u8 s[256];
};
...@@ -13,31 +13,4 @@ ...@@ -13,31 +13,4 @@
#define _GLIBCXX_ATOMIC_BUILTINS_8 1 #define _GLIBCXX_ATOMIC_BUILTINS_8 1
#include "atomic_std.h" #include "atomic_std.h"
#include "atomic_util.hh"
template<class T>
bool
cmpxch(std::atomic<T> *a, T expected, T desired)
{
return a->compare_exchange_weak(expected, desired);
}
template<class T>
bool
cmpxch(volatile std::atomic<T> *a, T expected, T desired)
{
return a->compare_exchange_weak(expected, desired);
}
template<class T>
bool
cmpxch_update(std::atomic<T> *a, T *expected, T desired)
{
return a->compare_exchange_weak(*expected, desired);
}
template<class T>
bool
cmpxch_update(volatile std::atomic<T> *a, T *expected, T desired)
{
return a->compare_exchange_weak(*expected, desired);
}
#pragma once
template<class T>
bool
cmpxch(std::atomic<T> *a, T expected, T desired)
{
return a->compare_exchange_weak(expected, desired);
}
template<class T>
bool
cmpxch(volatile std::atomic<T> *a, T expected, T desired)
{
return a->compare_exchange_weak(expected, desired);
}
template<class T>
bool
cmpxch_update(std::atomic<T> *a, T *expected, T desired)
{
return a->compare_exchange_weak(*expected, desired);
}
template<class T>
bool
cmpxch_update(volatile std::atomic<T> *a, T *expected, T desired)
{
return a->compare_exchange_weak(*expected, desired);
}
#pragma once #pragma once
#include "atomic.hh"
using std::atomic;
struct crange; struct crange;
struct crange_locked; struct crange_locked;
struct range; struct range;
...@@ -18,7 +14,7 @@ class markptr_mark; ...@@ -18,7 +14,7 @@ class markptr_mark;
template<class T> template<class T>
class markptr { class markptr {
protected: protected:
atomic<uptr> _p; std::atomic<uptr> _p;
public: public:
markptr() : _p(0) {} markptr() : _p(0) {}
...@@ -88,7 +84,7 @@ struct range : public rcu_freed { ...@@ -88,7 +84,7 @@ struct range : public rcu_freed {
private: private:
const u64 key; const u64 key;
const u64 size; const u64 size;
atomic<int> curlevel; // the current levels it appears on std::atomic<int> curlevel; // the current levels it appears on
const int nlevel; // the number of levels this range should appear const int nlevel; // the number of levels this range should appear
crange *const cr; // the crange this range is part of crange *const cr; // the crange this range is part of
markptr<range>* const next; // one next pointer per level markptr<range>* const next; // one next pointer per level
......
#include "types.h"
#include "kernel.hh"
#include "spinlock.h"
#include "condvar.h"
#include "cpputil.hh"
#include "atomic.hh"
#include "proc.hh"
#include "cpu.hh"
...@@ -7,7 +7,7 @@ u64 namehash(const strbuf<DIRSIZ>&); ...@@ -7,7 +7,7 @@ u64 namehash(const strbuf<DIRSIZ>&);
struct file { struct file {
enum { FD_NONE, FD_PIPE, FD_INODE, FD_SOCKET } type; enum { FD_NONE, FD_PIPE, FD_INODE, FD_SOCKET } type;
atomic<int> ref; // reference count std::atomic<int> ref; // reference count
char readable; char readable;
char writable; char writable;
...@@ -24,13 +24,13 @@ struct inode : public rcu_freed { ...@@ -24,13 +24,13 @@ struct inode : public rcu_freed {
u32 dev; // Device number u32 dev; // Device number
u32 inum; // Inode number u32 inum; // Inode number
u32 gen; // Generation number u32 gen; // Generation number
atomic<int> ref; // Reference count std::atomic<int> ref; // Reference count
int flags; // I_BUSY, I_VALID int flags; // I_BUSY, I_VALID
atomic<int> readbusy; std::atomic<int> readbusy;
struct condvar cv; struct condvar cv;
struct spinlock lock; struct spinlock lock;
char lockname[16]; char lockname[16];
atomic<xns<strbuf<DIRSIZ>, u32, namehash>*> dir; std::atomic<xns<strbuf<DIRSIZ>, u32, namehash>*> dir;
short type; // copy of disk inode short type; // copy of disk inode
short major; short major;
......
...@@ -37,3 +37,8 @@ class scoped_gc_epoch { ...@@ -37,3 +37,8 @@ class scoped_gc_epoch {
} }
}; };
void initgc(void);
void initprocgc(struct proc *);
void gc_start(void);
void gc_delayed(rcu_freed *);
#define IPC_NMSG 16 // XXX(sbw) add a memlayout.h?
typedef u32 msgid_t; #define KSHARED 0xFFFFF00000000000ull
#define NULL_MSGID (-1)
#define IPC_NPAGE ((KSHAREDSIZE/PGSIZE) - 1)
typedef u32 pageid_t; typedef u32 pageid_t;
typedef u32 msgid_t;
#define IPC_CTLSIZE 4096
#define IPC_PGSIZE 4096
#define IPC_NMSG 16
#define NULL_MSGID (-1)
#define NULL_PAGEID (-1) #define NULL_PAGEID (-1)
#define IPC_NPAGE ((KSHAREDSIZE/IPC_PGSIZE) - 1)
struct ipcmsg { struct ipcmsg {
volatile char done:1; volatile char done:1;
...@@ -16,10 +21,28 @@ struct ipcmsg { ...@@ -16,10 +21,28 @@ struct ipcmsg {
}; };
struct ipcctl { struct ipcctl {
int msghead; volatile int msghead;
int msgtail; volatile int msgtail;
struct ipcmsg msg[IPC_NMSG]; struct ipcmsg msg[IPC_NMSG];
int pagehead; volatile int pagehead;
int pagetail; volatile int pagetail;
}; };
extern struct ipcctl *ipcctl;
msgid_t ipc_msg_alloc(void);
void ipc_msg_free(int msgid);
pageid_t ipc_page_alloc(void);
void ipc_page_free(pageid_t pageid);
static inline struct ipcmsg*
getmsg(msgid_t id)
{
return &ipcctl->msg[id];
}
static inline char*
getpage(pageid_t id)
{
return (char*)(KSHARED+IPC_CTLSIZE+(id*IPC_PGSIZE));
}
...@@ -100,16 +100,6 @@ int dirlink(struct inode*, const char*, u32); ...@@ -100,16 +100,6 @@ int dirlink(struct inode*, const char*, u32);
void dir_init(struct inode *dp); void dir_init(struct inode *dp);
void dir_flush(struct inode *dp); void dir_flush(struct inode *dp);
// gc.c
void initgc(void);
void initprocgc(struct proc *);
void gc_start(void);
#ifdef __cplusplus
class rcu_freed;
void gc_delayed(rcu_freed *);
#endif
// hz.c // hz.c
void microdelay(u64); void microdelay(u64);
u64 nsectime(void); u64 nsectime(void);
...@@ -182,16 +172,13 @@ void userinit(void); ...@@ -182,16 +172,13 @@ void userinit(void);
int wait(void); int wait(void);
void yield(void); void yield(void);
struct proc* threadalloc(void (*fn)(void*), void *arg); struct proc* threadalloc(void (*fn)(void*), void *arg);
void threadpin(void (*fn)(void*), void *arg, const char *name, int cpu);
// prof.c // prof.c
extern int profenable; extern int profenable;
void profreset(void); void profreset(void);
void profdump(void); void profdump(void);
// rnd.c
u64 rnd();
// sampler.c // sampler.c
void sampstart(void); void sampstart(void);
int sampintr(struct trapframe*); int sampintr(struct trapframe*);
......
#pragma once #pragma once
#include "gc.hh" #include "gc.hh"
#include "atomic.hh"
using std::atomic;
// name spaces // name spaces
// XXX maybe use open hash table, no chain, better cache locality // XXX maybe use open hash table, no chain, better cache locality
...@@ -18,8 +15,8 @@ template<class K, class V> ...@@ -18,8 +15,8 @@ template<class K, class V>
class xelem : public rcu_freed { class xelem : public rcu_freed {
public: public:
V val; V val;
atomic<int> next_lock; std::atomic<int> next_lock;
atomic<xelem<K, V>*> volatile next; std::atomic<xelem<K, V>*> volatile next;
K key; K key;
xelem(const K &k, const V &v) : rcu_freed("xelem"), val(v), next_lock(0), next(0), key(k) {} xelem(const K &k, const V &v) : rcu_freed("xelem"), val(v), next_lock(0), next(0), key(k) {}
...@@ -28,14 +25,14 @@ class xelem : public rcu_freed { ...@@ -28,14 +25,14 @@ class xelem : public rcu_freed {
template<class K, class V> template<class K, class V>
struct xbucket { struct xbucket {
atomic<xelem<K, V>*> volatile chain; std::atomic<xelem<K, V>*> volatile chain;
} __attribute__((aligned (CACHELINE))); } __attribute__((aligned (CACHELINE)));
template<class K, class V, u64 (*HF)(const K&)> template<class K, class V, u64 (*HF)(const K&)>
class xns : public rcu_freed { class xns : public rcu_freed {
private: private:
bool allowdup; bool allowdup;
atomic<u64> nextkey; std::atomic<u64> nextkey;
xbucket<K, V> table[NHASH]; xbucket<K, V> table[NHASH];
public: public:
...@@ -109,8 +106,8 @@ class xns : public rcu_freed { ...@@ -109,8 +106,8 @@ class xns : public rcu_freed {
scoped_gc_epoch gc; scoped_gc_epoch gc;
for (;;) { for (;;) {
atomic<int> fakelock(0); std::atomic<int> fakelock(0);
atomic<int> *pelock = &fakelock; std::atomic<int> *pelock = &fakelock;
auto pe = &table[i].chain; auto pe = &table[i].chain;
for (;;) { for (;;) {
......
#pragma once
u64 rnd();
typedef struct fstream {
int fd;
off_t off;
off_t poff;
struct stat stat;
int err:1;
int eof:1;
int pfill:1;
} FILE;
FILE *fdopen(int fd, const char *mode);
int fclose(FILE *fp);
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *fp);
int feof(FILE *fp);
int ferror(FILE *fp);
...@@ -67,10 +67,6 @@ $(O)/kernel/%.o: CXXFLAGS+=-mcmodel=large ...@@ -67,10 +67,6 @@ $(O)/kernel/%.o: CXXFLAGS+=-mcmodel=large
$(O)/kernel/incbin.o: ASFLAGS+=-DMAKE_OUT=$(O) $(O)/kernel/incbin.o: ASFLAGS+=-DMAKE_OUT=$(O)
$(O)/kernel/incbin.o: $(O)/kernel/initcode $(O)/kernel/bootother $(O)/fs.img $(O)/kernel/incbin.o: $(O)/kernel/initcode $(O)/kernel/bootother $(O)/fs.img
$(O)/kernel/%.o: kernel/%.S
@echo " CC $@"
$(Q)mkdir -p $(@D)
$(Q)$(CC) $(ASFLAGS) -c -o $@ $<
$(O)/kernel/initcode: TTEXT = 0x0 $(O)/kernel/initcode: TTEXT = 0x0
$(O)/kernel/bootother: TTEXT = 0x7000 $(O)/kernel/bootother: TTEXT = 0x7000
......
...@@ -27,6 +27,12 @@ operator delete(void *p) ...@@ -27,6 +27,12 @@ operator delete(void *p)
} }
void void
operator delete[](void *p)
{
kmfree(p);
}
void
__cxa_pure_virtual(void) __cxa_pure_virtual(void)
{ {
panic("__cxa_pure_virtual"); panic("__cxa_pure_virtual");
......
#include "types.h" #include "crange_arch.hh"
#include "kernel.hh"
#include "mmu.h"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.hh"
#include "cpu.hh"
#include "gc.hh" #include "gc.hh"
#include "crange.hh" #include "crange.hh"
#include "cpputil.hh" #include "rnd.hh"
// //
// Concurrent atomic range operations using skip lists. An insert may split an // Concurrent atomic range operations using skip lists. An insert may split an
...@@ -92,14 +85,14 @@ range::print(int l) ...@@ -92,14 +85,14 @@ range::print(int l)
range::~range() range::~range()
{ {
dprintf("%d: range_free: 0x%lx 0x%lx-0x%lx(%ld)\n", myproc()->cpuid, (u64) this, key, key+size, size); //dprintf("%d: range_free: 0x%lx 0x%lx-0x%lx(%ld)\n", myproc()->cpuid, (u64) this, key, key+size, size);
cr->check(this); cr->check(this);
// assert(curlevel == -1); // assert(curlevel == -1);
for (int l = 0; l < nlevel; l++) { for (int l = 0; l < nlevel; l++) {
next[l] = (struct range *) 0xDEADBEEF; next[l] = (struct range *) 0xDEADBEEF;
} }
kmalignfree(lock); kmalignfree(lock);
kmfree(next); delete[] next;
} }
void void
...@@ -107,7 +100,7 @@ range::dec_ref(void) ...@@ -107,7 +100,7 @@ range::dec_ref(void)
{ {
int n = curlevel--; int n = curlevel--;
if (n == 0) { // now removed from all levels. if (n == 0) { // now removed from all levels.
dprintf("%d: free_delayed: 0x%lx 0x%lx-0x%lx(%lu) %lu\n", myproc()->pid, (long) this, key, key + size, size, myproc()->epoch); //dprintf("%d: free_delayed: 0x%lx 0x%lx-0x%lx(%lu) %lu\n", myproc()->pid, (long) this, key, key + size, size, myproc()->epoch);
cr->check(this); cr->check(this);
assert(curlevel == -1); assert(curlevel == -1);
gc_delayed(this); gc_delayed(this);
...@@ -206,7 +199,7 @@ crange::check(struct range *absent) ...@@ -206,7 +199,7 @@ crange::check(struct range *absent)
{ {
if (!crange_checking) if (!crange_checking)
return; return;
int t = mycpu()->id; int t = -1; //mycpu()->id;
struct range *e, *s; struct range *e, *s;
for (int l = 0; l < nlevel; l++) { for (int l = 0; l < nlevel; l++) {
for (e = crange_head->next[l].ptr(); e; e = s) { for (e = crange_head->next[l].ptr(); e; e = s) {
...@@ -288,7 +281,7 @@ crange::add_index(int l, range *e, range *p1, markptr<range> s1) ...@@ -288,7 +281,7 @@ crange::add_index(int l, range *e, range *p1, markptr<range> s1)
if (l >= e->nlevel-1) return; if (l >= e->nlevel-1) return;
if (e->next[l+1].mark()) return; if (e->next[l+1].mark()) return;
// crange_check(cr, NULL); // crange_check(cr, NULL);
if (cmpxch(&e->curlevel, l, l+1)) { if (std::atomic_compare_exchange_strong(&e->curlevel, &l, l+1)) {
assert(e->curlevel < e->nlevel); assert(e->curlevel < e->nlevel);
// this is the core inserting at level l+1, but some core may be deleting // this is the core inserting at level l+1, but some core may be deleting
struct range *s = s1.ptr(); // XXX losing the mark bit ??? struct range *s = s1.ptr(); // XXX losing the mark bit ???
......
#include "types.h" #include "crange_arch.hh"
#include "kernel.hh" #include "gc.hh"
#include "mmu.h" #include "atomic_util.hh"
#include "amd64.h"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.hh"
#include "cpu.hh"
#include "ns.hh" #include "ns.hh"
#include "atomic.hh"
using std::atomic;
extern u64 proc_hash(const u32&); extern u64 proc_hash(const u32&);
extern xns<u32, proc*, proc_hash> *xnspid; extern xns<u32, proc*, proc_hash> *xnspid;
...@@ -79,7 +74,8 @@ gc_move_to_tofree_cpu(int c, u64 epoch) ...@@ -79,7 +74,8 @@ gc_move_to_tofree_cpu(int c, u64 epoch)
assert(gc_state[c].delayed[fe].epoch == epoch-(NEPOCH-2)); // XXX race with setting epoch = 0 assert(gc_state[c].delayed[fe].epoch == epoch-(NEPOCH-2)); // XXX race with setting epoch = 0
// unhook list for fe epoch atomically; this shouldn't fail // unhook list for fe epoch atomically; this shouldn't fail
head = gc_state[c].delayed[fe].head; head = gc_state[c].delayed[fe].head;
while (!cmpxch_update(&gc_state[c].delayed[fe].head, &head, (rcu_freed*) 0)) {} while (!std::atomic_compare_exchange_strong(&gc_state[c].delayed[fe].head,
&head, (rcu_freed*) 0)) {}
// insert list into tofree list so that each core can free in parallel and free its elements // insert list into tofree list so that each core can free in parallel and free its elements
if(gc_state[c].tofree[fe].epoch != gc_state[c].delayed[fe].epoch) { if(gc_state[c].tofree[fe].epoch != gc_state[c].delayed[fe].epoch) {
...@@ -256,18 +252,8 @@ initgc(void) ...@@ -256,18 +252,8 @@ initgc(void)
} }
for (int c = 0; c < ncpu; c++) { for (int c = 0; c < ncpu; c++) {
struct proc *gcp; char namebuf[32];
snprintf(namebuf, sizeof(namebuf), "gc_%u", c);
gcp = threadalloc(gc_worker, NULL); threadpin(gc_worker, 0, namebuf, c);
if (gcp == NULL)
panic("threadalloc: gc_worker");
snprintf(gcp->name, sizeof(gcp->name), "gc_%u", c);
gcp->cpuid = c;
gcp->cpu_pin = 1;
acquire(&gcp->lock);
gcp->state = RUNNABLE;
addrun(gcp);
release(&gcp->lock);
} }
} }
...@@ -674,3 +674,21 @@ threadalloc(void (*fn)(void *), void *arg) ...@@ -674,3 +674,21 @@ threadalloc(void (*fn)(void *), void *arg)
p->cwd = 0; p->cwd = 0;
return p; return p;
} }
void
threadpin(void (*fn)(void*), void *arg, const char *name, int cpu)
{
struct proc *p;
p = threadalloc(fn, arg);
if (p == NULL)
panic("threadpin: alloc");
snprintf(p->name, sizeof(p->name), "%s", name);
p->cpuid = cpu;
p->cpu_pin = 1;
acquire(&p->lock);
p->state = RUNNABLE;
addrun(p);
release(&p->lock);
}
#include "types.h" #include "crange_arch.hh"
#include "kernel.hh" #include "rnd.hh"
#include "cpu.hh"
struct seed { struct seed {
u64 v; u64 v;
......
ULIB = ulib.o usys.o printf.o umalloc.o uthread.o fmt.o ULIB = ulib.o usys.o printf.o umalloc.o uthread.o fmt.o stream.o ipc.o
ULIB := $(addprefix $(O)/lib/, $(ULIB)) ULIB := $(addprefix $(O)/lib/, $(ULIB))
$(O)/lib/%.o: lib/%.S
@echo " CC $@"
$(Q)$(CC) $(ASFLAGS) -c -o $@ $<
.PRECIOUS: $(O)/lib/%.o .PRECIOUS: $(O)/lib/%.o
-include $(O)/lib/*.d -include $(O)/lib/*.d
#include "types.h"
#include "user.h"
#include "ipc.hh"
struct ipcctl *ipcctl = (struct ipcctl*)KSHARED;
msgid_t
ipc_msg_alloc(void)
{
if (ipcctl->msghead - ipcctl->msgtail == IPC_NMSG)
return NULL_MSGID;
msgid_t i = ipcctl->msghead % IPC_NMSG;
ipcctl->msghead++;
return i;
}
void
ipc_msg_free(int msgid)
{
msgid_t i;
i = ipcctl->msgtail % IPC_NMSG;
if (i != msgid)
die("ipc_free_msg: oops %u %u", i, msgid);
ipcctl->msgtail++;
}
pageid_t
ipc_page_alloc(void)
{
if (ipcctl->pagehead - ipcctl->pagetail == IPC_NPAGE)
return NULL_PAGEID;
pageid_t i = ipcctl->pagehead % IPC_NPAGE;
ipcctl->pagehead++;
return i;
}
void
ipc_page_free(pageid_t pageid)
{
pageid_t i;
i = ipcctl->pagetail % IPC_NPAGE;
if (i != pageid)
die("ipc_free_page: oops");
ipcctl->pagetail++;
}
#include "types.h"
#include "stat.h"
#include "user.h"
#include "stream.h"
#include "lib.h"
#include "amd64.h"
#include "ipc.hh"
static const size_t pstride = 4096*4;
static ssize_t
fasync(FILE *fp, size_t count, off_t off)
{
struct ipcmsg *msg;
msgid_t msgid;
pageid_t pageid;
msgid = ipc_msg_alloc();
if (msgid == NULL_MSGID) {
fprintf(2, "fasync: ipc_msg_alloc failed\n");
return -1;
}
pageid = ipc_page_alloc();
if (pageid == NULL_PAGEID) {
fprintf(2, "fasync: ipc_alloc_page failed\n");
return -1;
}
msg = &ipcctl->msg[msgid];
msg->done = 0;
msg->pageid = pageid;
if (async(fp->fd, count, off, msgid, pageid) != 0) {
fprintf(2, "fasync: async failed\n");
return -1;
}
return count;
}
static void
fprefill(FILE *fp)
{
size_t target;
if (!fp->pfill)
return;
target = MIN(fp->off + pstride, fp->stat.size);
while (target - fp->poff >= IPC_PGSIZE)
{
size_t count;
int r;
count = MIN(target - fp->poff, IPC_PGSIZE);
r = fasync(fp, count, fp->poff);
if (r < 0)
return;
fp->poff += r;
}
}
FILE*
fdopen(int fd, const char *mode)
{
FILE *fp;
if (mode[0] != 'r')
return 0;
fp = (FILE*)malloc(sizeof(*fp));
if (fp == 0)
return 0;
if (fstat(fd, &fp->stat))
return 0;
fp->fd = fd;
fp->off = 0;
fp->poff = 0;
fp->pfill = mode[1] == 'p';
fprefill(fp);
return fp;
}
int
fclose(FILE *fp)
{
int r;
r = close(fp->fd);
free(fp);
return r;
// XXX(sbw) free ipcmsgs
}
static ssize_t
fpostfill(void *ptr, size_t count, FILE*fp)
{
struct ipcmsg *msg;
msgid_t msgid;
if (!fp->pfill)
return -2;
again:
msgid = ipcctl->msgtail % IPC_NMSG;
msg = getmsg(msgid);
if (!msg->submitted)
return -2;
while (msg->done == 0)
nop_pause(); // XXX(sbw) yield somewhere?
if (msg->result == -1)
return -1;
if (msg->off > fp->off) {
return -2;
} else if ((msg->off + msg->result) < fp->off) {
msg->submitted = 0;
ipc_page_free(msg->pageid);
ipc_msg_free(msgid);
goto again;
}
char *buf = getpage(msg->pageid);
off_t boff = fp->off - msg->off;
size_t bcount = MIN(count, msg->result-boff);
memmove(ptr, buf+boff, bcount);
msg->submitted = 0;
ipc_page_free(msg->pageid);
ipc_msg_free(msgid);
return bcount;
}
size_t
fread(void *ptr, size_t size, size_t nmemb, FILE *fp)
{
ssize_t r;
r = fpostfill(ptr, size*nmemb, fp);
if (r == -2)
r = pread(fp->fd, ptr, size*nmemb, fp->off);
if (r < 0) {
fp->err = 1;
return 0;
} else if (r == 0) {
fp->eof = 1;
return 0;
}
fp->off += r;
fprefill(fp);
return r;
}
int
feof(FILE *fp)
{
return fp->eof;
}
int
ferror(FILE *fp)
{
return fp->err;
}
...@@ -14,8 +14,7 @@ LFLAGS += -llwip ...@@ -14,8 +14,7 @@ LFLAGS += -llwip
CFLAGS += -Ilwip/src/include -Inet -Ilwip/src/include/ipv4 -DLWIP CFLAGS += -Ilwip/src/include -Inet -Ilwip/src/include/ipv4 -DLWIP
CXXFLAGS += -Ilwip/src/include -Inet -Ilwip/src/include/ipv4 -DLWIP CXXFLAGS += -Ilwip/src/include -Inet -Ilwip/src/include/ipv4 -DLWIP
LWIP_CFLAGS = $(COMFLAGS) -std=c99 \ LWIP_CFLAGS = -Wno-attributes \
-Wno-attributes \
-Wno-address \ -Wno-address \
-Wno-char-subscripts \ -Wno-char-subscripts \
-Wno-unused-but-set-variable \ -Wno-unused-but-set-variable \
...@@ -26,8 +25,7 @@ LWIP_INCLUDES := \ ...@@ -26,8 +25,7 @@ LWIP_INCLUDES := \
-Ilwip/src/include \ -Ilwip/src/include \
-Ilwip/src/include/ipv4 \ -Ilwip/src/include/ipv4 \
-Inet \ -Inet \
-Inet/arch \ -Inet/arch
-I.
LWIP_SRCFILES += \ LWIP_SRCFILES += \
lwip/src/api/api_lib.c \ lwip/src/api/api_lib.c \
...@@ -59,23 +57,11 @@ LWIP_SRCFILES += \ ...@@ -59,23 +57,11 @@ LWIP_SRCFILES += \
lwip/src/core/udp.c \ lwip/src/core/udp.c \
lwip/src/netif/etharp.c \ lwip/src/netif/etharp.c \
net/sys_arch.c \ net/sys_arch.c \
net/if.c \ net/if.c
LWIP_OBJFILES := $(patsubst %.c, $(O)/%.o, $(LWIP_SRCFILES)) LWIP_OBJFILES := $(patsubst %.c, $(O)/%.o, $(LWIP_SRCFILES))
LWIP_OBJFILES := $(patsubst %.S, $(O)/%.o, $(LWIP_OBJFILES))
$(O)/net/%.o: CFLAGS+=-mcmodel=large
$(O)/net/%.o: CXXFLAGS+=-mcmodel=large $(O)/net/%.o: CXXFLAGS+=-mcmodel=large
$(O)/lwip/src/%.o: CFLAGS+=-mcmodel=large $(LWIP_CFLAGS) $(LWIP_INCLUDES)
$(O)/net/%.o: net/%.c
@echo " CC $@"
$(Q)mkdir -p $(@D)
$(Q)$(CC) $(LWIP_CFLAGS) $(LWIP_INCLUDES) -c -o $@ $<
$(O)/lwip/src/%.o: lwip/src/%.c
@echo " CC $@"
$(Q)mkdir -p $(@D)
$(Q)$(CC) $(LWIP_CFLAGS) $(LWIP_INCLUDES) -c -o $@ $<
$(O)/liblwip.a: $(LWIP_OBJFILES) $(O)/liblwip.a: $(LWIP_OBJFILES)
@echo " AR $@" @echo " AR $@"
......
...@@ -34,6 +34,10 @@ ...@@ -34,6 +34,10 @@
#define NCPU 4 // maximum number of CPUs #define NCPU 4 // maximum number of CPUs
#define MTRACE 0 #define MTRACE 0
#define PERFSIZE (512<<20ull) #define PERFSIZE (512<<20ull)
#elif defined(HW_user)
#define NCPU 256
#define MTRACE 0
#define PERFSIZE (16<<20ull)
#else #else
#error "Unknown HW" #error "Unknown HW"
#endif #endif
CXXFLAGS := -Iuser $(CXXFLAGS) -msse
$(O)/utest: $(O)/kernel/crange.o \
$(O)/kernel/gc.o \
$(O)/kernel/rnd.o \
$(O)/user/umain.o
@echo " LD $@"
$(Q)mkdir -p $(@D)
$(Q)$(CXX) -o $@ $^ -lpthread -lrt
#include <inttypes.h>
#include <stdio.h>
#include <assert.h>
#include <pthread.h>
#include <stdlib.h>
#include <atomic>
#include <utility>
extern "C" {
#include <string.h>
}
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef uintptr_t uptr;
#define cprintf(...) printf(__VA_ARGS__)
#define panic(...) do { printf(__VA_ARGS__); assert(0); } while (0)
#define LOCKSTAT_CRANGE 0
#define LOCKSTAT_GC 0
struct spinlock {
pthread_mutex_t mu;
};
struct condvar {
pthread_cond_t cv;
};
static inline void
acquire(spinlock *s)
{
pthread_mutex_lock(&s->mu);
}
static inline void
release(spinlock *s)
{
pthread_mutex_unlock(&s->mu);
}
static inline int
tryacquire(spinlock *s)
{
return !pthread_mutex_trylock(&s->mu);
}
static inline void
initlock(spinlock *s, const char *m, int lockstat)
{
memset(s, 0, sizeof(*s));
}
static inline void
cv_wakeup(condvar *c)
{
pthread_cond_signal(&c->cv);
}
static inline void
cv_sleepto(condvar *c, spinlock *s, u64 ns)
{
timespec ts;
ts.tv_sec = ns / 1000000000;
ts.tv_nsec = ns % 1000000000;
pthread_cond_timedwait(&c->cv, &s->mu, &ts);
}
static inline u64
nsectime()
{
timespec ts;
clock_gettime(CLOCK_REALTIME, &ts);
return (((u64) 1000000000) * ts.tv_sec) + ts.tv_nsec;
}
static inline void
initcondvar(condvar *c, const char *m)
{
memset(c, 0, sizeof(*c));
}
static inline int
kmalign(void **ptr, size_t align, size_t size)
{
return posix_memalign(ptr, align, size);
}
static inline void
kmalignfree(void *ptr)
{
free(ptr);
}
struct proc {
spinlock gc_epoch_lock;
u64 epoch;
u64 epoch_depth;
u32 cpuid;
u32 pid;
char name[32];
void (*f) (void*);
void *farg;
};
struct cpu {
u32 id;
};
extern pthread_key_t myproc_key;
extern cpu cpus[];
extern proc procs[];
extern u32 ncpu;
extern u64 ticks;
static inline proc*
myproc()
{
return (proc*) pthread_getspecific(myproc_key);
}
static inline cpu*
mycpu()
{
return (cpu*) &cpus[myproc()->cpuid];
}
static inline void pushcli() {}
static inline void popcli() {}
void threadpin(void (*fn)(void*), void *arg, const char *name, int cpu);
namespace intelctr {
using scopedperf::tsc_ctr;
using scopedperf::pmc_setup;
static tsc_ctr tsc;
static pmc_setup<48> l2_ld_hit(0x00410124, "l2 ld hit");
static pmc_setup<48> l2_ld_miss(0x00410224, "l2 ld miss");
// rfo: request for ownership (~write)
static pmc_setup<48> l2_rfo_hit(0x00410424, "l2 rfo hit");
static pmc_setup<48> l2_rfo_miss(0x00410824, "l2 rfo miss");
static pmc_setup<48> l2_i_hit(0x00411024, "l2 i hit");
static pmc_setup<48> l2_i_miss(0x00412024, "l2 i miss");
static pmc_setup<48> l2_prefetch_hit(0x00414024, "l2 pref hit");
static pmc_setup<48> l2_prefetch_miss(0x00418024, "l2 pref miss");
static pmc_setup<48> l2_prefetch(0x0041c024, "l2 prefetch"); // ~zero
static pmc_setup<48> l2_miss(0x0041aa24, "l2 all miss");
static pmc_setup<48> l2_refs(0x0041ff24, "l2 all refs");
// ---
static pmc_setup<48> l2_ld_demand(0x00410f26, "l2 demand ld");
static pmc_setup<48> l2_ld_demand_i(0x00410126, "l2 dem ld i"); // ~l2_ld_miss
static pmc_setup<48> l2_ld_demand_s(0x00410226, "l2 dem ld s"); // ~l2_rfo_miss
static pmc_setup<48> l2_ld_demand_e(0x00410426, "l2 dem ld e");
static pmc_setup<48> l2_ld_demand_m(0x00410826, "l2 dem ld m");
static pmc_setup<48> l2_ld_prefetch(0x0041f026, "l2 prefetch ld"); // ~zero
// ---
static pmc_setup<48> l2_wr_i(0x00410127, "l2 write i");
static pmc_setup<48> l2_wr_s(0x00410227, "l2 write s");
static pmc_setup<48> l2_wr_m(0x00410827, "l2 write m");
static pmc_setup<48> l2_wr_sem(0x00410e27, "l2 write sem");
static pmc_setup<48> l2_wr(0x00410f27, "l2 write"); // l2_wr_i + l2_wr_sem
static pmc_setup<48> l2_wrlk(0x0041f027, "l2 wrlk"); // ??
// ---
// where do loads come from? interesting, but maybe inaccurate?
// doesn't add up to other l2 counters..
static pmc_setup<48> ld_l1hit(0x004101cb, "ld l1 hit");
static pmc_setup<48> ld_l2hit(0x004102cb, "ld l2 hit");
static pmc_setup<48> ld_l3hit_unsh(0x004104cb, "ld l3 unsh");
static pmc_setup<48> ld_l2other(0x004108cb, "ld l2 other");
static pmc_setup<48> ld_offdie(0x004110cb, "ld offdie");
static pmc_setup<48> ld_lfb(0x004140cb, "ld lfb");
static pmc_setup<48> ld_dtlbmiss(0x004180cb, "ld dtlb-miss");
// ---
static pmc_setup<48> uops(0x0041010e, "uops_issued");
static pmc_setup<48> mem_loads(0x0041010b, "mem load ins");
static pmc_setup<48> mem_stores(0x0041020b, "mem store ins");
static pmc_setup<48> dtlb_miss(0x00410149, "dtlb miss");
static pmc_setup<48> itlb_miss(0x00410185, "itlb miss");
}
#pragma once
/*
* Canonical location:
* git+ssh://amsterdam.csail.mit.edu/home/am1/prof/proftools.git
* under spmc/lib/scopedperf.hh
*/
#include <string>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
#include <algorithm>
#include <assert.h>
#include <string.h>
#include <stdint.h>
#include <sys/time.h>
namespace scopedperf {
/*
* statically enable/disable most of the generated code for profiling.
*/
class default_enabler {
public:
bool enabled() const { return true; }
};
class always_enabled {
public:
bool enabled() const { return true; }
};
class always_disabled {
public:
bool enabled() const { return false; }
};
/*
* spinlock: mostly to avoid pthread mutex sleeping.
*/
class spinlock {
public:
spinlock() : x(0) {}
void acquire() {
while (!__sync_bool_compare_and_swap(&x, 0, 1))
;
}
void release() {
x = 0;
}
private:
volatile uint x;
};
class scoped_spinlock {
public:
scoped_spinlock(spinlock *larg) : l(larg) {
l->acquire();
held = true;
}
void release() {
if (held)
l->release();
held = false;
}
~scoped_spinlock() { release(); }
private:
spinlock *const l;
bool held;
};
/*
* ctrgroup: a group of performance counters.
*/
template<typename... Counters>
class ctrgroup_chain;
template<>
class ctrgroup_chain<> {
public:
ctrgroup_chain() {}
static const uint nctr = 0;
void get_samples(uint64_t *v) const {}
void get_delta(uint64_t *delta, uint64_t *prev) const {}
std::vector<std::string> get_names() const { return {}; }
};
template<typename One, typename... Others>
class ctrgroup_chain<One, Others...> : ctrgroup_chain<Others...> {
public:
ctrgroup_chain(One *x, Others*... y)
: ctrgroup_chain<Others...>(y...), ctr(x)
{
x->setup();
}
static const uint nctr = 1 + ctrgroup_chain<Others...>::nctr;
void get_samples(uint64_t *v) const {
v[0] = ctr->sample();
ctrgroup_chain<Others...>::get_samples(v+1);
}
void get_delta(uint64_t *delta, uint64_t *prev) const {
uint64_t x = ctr->sample();
*delta = (x - *prev) & ctr->mask;
*prev = x;
ctrgroup_chain<Others...>::get_delta(delta+1, prev+1);
}
std::vector<std::string> get_names() const {
std::vector<std::string> v = ctrgroup_chain<Others...>::get_names();
v.insert(v.begin(), ctr->name);
return v;
}
private:
const One *const ctr;
};
template<typename... Counters>
ctrgroup_chain<Counters...>
ctrgroup(Counters*... args)
{
return ctrgroup_chain<Counters...>(args...);
}
/*
* perfsum: aggregating counter deltas across multiple CPUs.
*/
class perfsum_base {
public:
enum display_opt { show, hide };
perfsum_base(const std::string &n, display_opt d) : name(n), disp(d) {
scoped_spinlock x(get_sums_lock());
get_sums()->push_back(this);
}
static void printall(int w0 = 17, int w = 13) {
scoped_spinlock x(get_sums_lock());
auto sums = get_sums();
std::sort(sums->begin(), sums->end(),
[](perfsum_base *a, perfsum_base *b) { return a->name < b->name; });
for (perfsum_base *ps: *sums) {
if (ps->disp == hide || !ps->get_enabled())
continue;
auto p = ps->get_stats();
print_row(ps->name, ps->get_names(), w0, w, [](const std::string &name)
{ return name; });
print_row(" avg", p, w0, w, [](const std::pair<uint64_t, uint64_t> &e)
{ return ((double) e.second) / (double) e.first; });
print_row(" total", p, w0, w, [](const std::pair<uint64_t, uint64_t> &e)
{ return e.second; });
print_row(" count", p, w0, w, [](const std::pair<uint64_t, uint64_t> &e)
{ return e.first; });
}
}
static void resetall() {
scoped_spinlock x(get_sums_lock());
for (perfsum_base *ps: *get_sums())
ps->reset();
}
virtual std::vector<std::pair<uint64_t, uint64_t> > get_stats() const = 0;
virtual std::vector<std::string> get_names() const = 0;
virtual bool get_enabled() const = 0;
virtual void reset() = 0;
private:
template<class Row, class Callback>
static void print_row(const std::string &rowname, const Row &r,
int w0, int w, Callback f)
{
std::cout << std::left << std::setw(w0) << rowname;
for (const auto &elem: r)
std::cout << std::left << std::setw(w) << f(elem) << " ";
std::cout << std::endl;
}
static std::vector<perfsum_base*> *get_sums() {
static std::vector<perfsum_base*> v;
return &v;
}
static spinlock *get_sums_lock() {
static spinlock l;
return &l;
}
const std::string name;
const display_opt disp;
};
static inline void
compiler_barrier()
{
/* Avoid compile-time reordering across performance counter reads */
__asm __volatile("" ::: "memory");
}
template<typename Enabler, typename... Counters>
class perfsum_ctr : public perfsum_base, public Enabler {
public:
perfsum_ctr(const ctrgroup_chain<Counters...> *c,
const std::string &n, display_opt d)
: perfsum_base(n, d), cg(c), base(0)
{
reset();
}
perfsum_ctr(const std::string &n,
const perfsum_ctr<Enabler, Counters...> *basesum, display_opt d)
: perfsum_base(n, d), cg(basesum->cg), base(basesum)
{
reset();
}
void get_samples(uint64_t *s) const {
compiler_barrier();
cg->get_samples(s);
compiler_barrier();
}
void record(uint cpuid, uint64_t *s) {
uint64_t delta[cg->nctr];
compiler_barrier();
cg->get_delta(delta, s);
compiler_barrier();
for (uint i = 0; i < cg->nctr; i++)
stat[cpuid].sum[i] += delta[i];
stat[cpuid].count++;
}
std::vector<std::pair<uint64_t, uint64_t> > get_stats() const /* override */ {
std::vector<std::pair<uint64_t, uint64_t> > v;
for (uint i = 0; i < cg->nctr; i++) {
uint64_t b =
base ? base->addcpus([i](const stats *s) { return s->sum[i]; })
: addcpus([](const stats *s) { return s->count; });
v.push_back(std::make_pair(b,
addcpus([i](const stats *s) { return s->sum[i]; })));
}
return v;
}
std::vector<std::string> get_names() const /* override */ {
return cg->get_names();
}
bool get_enabled() const /* override */ {
return Enabler::enabled();
}
void reset() /* override */ {
memset(stat, 0, sizeof(stat));
}
private:
enum { maxcpu = 256 };
struct stats {
uint64_t count;
uint64_t sum[ctrgroup_chain<Counters...>::nctr];
} __attribute__((aligned (128)));
struct stats stat[maxcpu];
const struct ctrgroup_chain<Counters...> *const cg;
const struct perfsum_ctr<Enabler, Counters...> *const base;
template<class T>
uint64_t addcpus(T f) const {
uint64_t tot = 0;
for (uint i = 0; i < maxcpu; i++)
tot += f(&stat[i]);
return tot;
}
};
template<typename Enabler, typename... Counters>
class perfsum_ctr_inlinegroup :
public ctrgroup_chain<Counters...>,
public perfsum_ctr<Enabler, Counters...>
{
public:
perfsum_ctr_inlinegroup(const std::string &n, perfsum_base::display_opt d,
Counters*... ctrs)
: ctrgroup_chain<Counters...>(ctrs...),
perfsum_ctr<Enabler, Counters...>(this, n, d) {}
};
template<typename Enabler = default_enabler, typename... Counters>
perfsum_ctr<Enabler, Counters...>
perfsum(const std::string &name, const ctrgroup_chain<Counters...> *c,
const perfsum_base::display_opt d = perfsum_base::show)
{
return perfsum_ctr<Enabler, Counters...>(c, name, d);
}
template<typename Enabler = default_enabler, typename... Counters>
perfsum_ctr_inlinegroup<Enabler, Counters...>
perfsum_group(const std::string &name, Counters*... c)
{
return perfsum_ctr_inlinegroup<Enabler, Counters...>(name, perfsum_base::show, c...);
}
template<typename Enabler, typename... Counters>
perfsum_ctr<Enabler, Counters...>
perfsum_frac(const std::string &name,
const perfsum_ctr<Enabler, Counters...> *base)
{
return perfsum_ctr<Enabler, Counters...>(name, base, perfsum_base::show);
}
/*
* namedctr &c: actual counter implementations.
*/
template<uint64_t CounterWidth>
class namedctr {
public:
namedctr(const std::string &n) : name(n) {}
void setup() {}
const std::string name;
static const uint64_t mask =
((1ULL << (CounterWidth - 1)) - 1) << 1 | 1;
};
class tsc_ctr : public namedctr<64> {
public:
tsc_ctr() : namedctr("tsc") {}
static uint64_t sample() {
uint64_t a, d;
__asm __volatile("rdtsc" : "=a" (a), "=d" (d));
return a | (d << 32);
}
};
class tscp_ctr : public namedctr<64> {
public:
tscp_ctr() : namedctr("tscp") {}
static uint64_t sample() {
uint64_t a, d, c;
__asm __volatile("rdtscp" : "=a" (a), "=d" (d), "=c" (c));
return a | (d << 32);
}
};
template<uint64_t CounterWidth>
class pmc_ctr : public namedctr<CounterWidth> {
public:
pmc_ctr(int n) : namedctr<CounterWidth>(mkname(n)), cn(n) {}
pmc_ctr(const std::string &nm) : namedctr<CounterWidth>(nm), cn(-1) {}
uint64_t sample() const {
uint64_t a, d;
__asm __volatile("rdpmc" : "=a" (a), "=d" (d) : "c" (cn));
return a | (d << 32);
}
int cn;
private:
static std::string mkname(int n) {
std::stringstream ss;
ss << "pmc" << n;
return ss.str();
}
};
template<uint64_t CounterWidth = 64>
class pmc_setup : public pmc_ctr<CounterWidth> {
public:
pmc_setup(uint64_t v, const std::string &nm)
: pmc_ctr<CounterWidth>(nm), pmc_v(v) {}
void setup() {
if (pmc_ctr<CounterWidth>::cn >= 0)
return;
/*
* XXX detect how many counters the hardware has
*/
static bool pmcuse[4];
static spinlock pmcuselock;
int n = 0;
scoped_spinlock x(&pmcuselock);
while (n < 4 && pmcuse[n])
n++;
assert(n < 4);
pmcuse[n] = true;
x.release();
// ugly but effective
std::stringstream ss;
ss << "for f in /sys/kernel/spmc/cpu*/" << n << "; do "
<< "echo " << std::hex << pmc_v << " > $f; done";
assert(0 == system(ss.str().c_str()));
pmc_ctr<CounterWidth>::cn = n;
}
private:
uint64_t pmc_v;
};
class tod_ctr : public namedctr<64> {
public:
tod_ctr() : namedctr("tod-usec") {}
uint64_t sample() const {
struct timeval tv;
gettimeofday(&tv, 0);
return ((uint64_t) tv.tv_usec) + ((uint64_t) tv.tv_sec) * 1000000;
}
};
class zero_ctr : public namedctr<64> {
public:
zero_ctr() : namedctr("zero") {}
uint64_t sample() const { return 0; }
};
/*
* scoped performance-counting regions, which record samples into a perfsum.
*/
template<typename Enabler, typename... Counters>
class base_perf_region {
public:
base_perf_region(perfsum_ctr<Enabler, Counters...> *psarg)
: ps(psarg), enabled(ps->enabled()), cpuid(enabled ? sched_getcpu() : 0)
{
if (enabled)
ps->get_samples(s);
}
// invoke lap multiple times to precisely measure iterations
// (use same measurement for end of one & start of next round)
void lap() {
if (enabled)
ps->record(cpuid, s);
}
private:
perfsum_ctr<Enabler, Counters...> *const ps;
const bool enabled;
const uint cpuid;
uint64_t s[ctrgroup_chain<Counters...>::nctr];
};
template<typename Enabler, typename... Counters>
class scoped_perf_region : public base_perf_region<Enabler, Counters...> {
public:
scoped_perf_region(perfsum_ctr<Enabler, Counters...> *psarg)
: base_perf_region<Enabler, Counters...>(psarg) {}
~scoped_perf_region() { base_perf_region<Enabler, Counters...>::lap(); }
};
template<typename Enabler, typename... Counters>
class killable_perf_region : public base_perf_region<Enabler, Counters...> {
public:
killable_perf_region(perfsum_ctr<Enabler, Counters...> *psarg)
: base_perf_region<Enabler, Counters...>(psarg), active(true) {}
~killable_perf_region() { stop(); }
// perform a final measurement, if needed before destructor
void stop() {
if (active)
base_perf_region<Enabler, Counters...>::lap();
active = false;
}
// prevent destructor from performing a measurement
void kill() { active = false; }
private:
bool active;
};
template<typename Enabler, typename... Counters>
scoped_perf_region<Enabler, Counters...>
perf_region(perfsum_ctr<Enabler, Counters...> *ps)
{
return scoped_perf_region<Enabler, Counters...>(ps);
}
template<typename Enabler, typename... Counters>
killable_perf_region<Enabler, Counters...>
killable_region(perfsum_ctr<Enabler, Counters...> *ps)
{
return killable_perf_region<Enabler, Counters...>(ps);
}
/*
* macros for the common case of putting in a scoped perf-counting region.
*/
#define __PERF_CONCAT2(a, b) a ## b
#define __PERF_CONCAT(a, b) __PERF_CONCAT2(a, b)
#define __PERF_ANON __PERF_CONCAT(__anon_id_, __COUNTER__)
#define __PERF_REGION(region_var, sum_var, region_type, text, group) \
static auto __PERF_CONCAT(sum_var, _sum) = scopedperf::perfsum(text, group); \
auto region_var = region_type(&__PERF_CONCAT(sum_var, _sum));
#define ANON_REGION(text, group) \
__PERF_REGION(__PERF_ANON, __PERF_ANON, scopedperf::perf_region, text, group)
#define PERF_REGION(var, text, group) \
__PERF_REGION(var, __PERF_ANON, scopedperf::perf_region, text, group)
#define KILLABLE_REGION(var, text, group) \
__PERF_REGION(var, __PERF_ANON, scopedperf::killable_region, text, group)
} /* namespace scopedperf */
#include <unistd.h>
#include <signal.h>
#include <getopt.h>
#include "crange_arch.hh"
#include "gc.hh"
#include "crange.hh"
#include "atomic_util.hh"
#include "ns.hh"
#include "scopedperf.hh"
#include "intelctr.hh"
#include "arc4.hh"
#include "amd64.h"
static auto perfgroup = ctrgroup(&intelctr::tsc
// ,&intelctr::l2_refs
// ,&intelctr::l2_miss
);
u64
proc_hash(const u32 &pid)
{
return pid;
}
pthread_key_t myproc_key, arc4_key;
cpu cpus[NCPU];
u32 ncpu;
u64 ticks;
xns<u32, proc*, proc_hash> *xnspid;
static auto rnd_perfsum = scopedperf::perfsum("arc4 rnd", &perfgroup);
template<class T>
T rnd()
{
auto __PERF_ANON = scopedperf::perf_region(&rnd_perfsum);
arc4 *a = (arc4*) pthread_getspecific(arc4_key);
if (!a) {
struct seed { u64 a, b; } s = { rdtsc(), pthread_self() };
a = new arc4((u8*) &s, sizeof(s));
pthread_setspecific(arc4_key, a);
}
return a->rand<T>();
}
static void*
proc_start(void *arg)
{
proc *p = (proc *) arg;
pthread_setspecific(myproc_key, p);
p->pid = pthread_self();
initprocgc(p);
xnspid->insert(p->pid, p);
p->f(p->farg);
return 0;
}
void
makeproc(proc *p)
{
pthread_t tid;
pthread_create(&tid, 0, &proc_start, p);
}
void
threadpin(void (*fn)(void*), void *arg, const char *name, int cpu)
{
proc *p = new proc();
memset(p, 0, sizeof(*p));
p->f = fn;
p->farg = arg;
snprintf(p->name, sizeof(p->name), "%s", name);
p->cpuid = cpu;
makeproc(p);
}
static pthread_barrier_t worker_b, populate_b;
enum { iter_total = 1000000 };
enum { crange_items = 1024 };
static void
worker(void *arg)
{
crange *cr = (crange*) arg;
for (u32 i = 0; i < iter_total / ncpu; i++) {
ANON_REGION("worker op", &perfgroup);
u64 k = 1 + rnd<u32>() % (crange_items * 2);
auto span = cr->search_lock(k, 1);
if (rnd<u8>() & 1) {
ANON_REGION("worker del", &perfgroup);
span.replace(0);
} else {
ANON_REGION("worker add", &perfgroup);
span.replace(new range(cr, k, 1));
}
}
pthread_barrier_wait(&worker_b);
}
static void
populate(void *arg)
{
crange *cr = (crange*) arg;
for (u32 i = 0; i < crange_items; i++)
cr->search_lock(1 + 2*i, 1).replace(new range(cr, 1+2*i, 1));
pthread_barrier_wait(&populate_b);
}
static const struct option long_opts[] = {
{ "ncpu", required_argument, 0, 'n' },
{ 0, no_argument, 0, 0 }
};
static u32
l2(u64 v)
{
u32 l = 0;
while (v) {
v = v>>1;
l++;
}
return l;
}
int
main(int ac, char **av)
{
ncpu = NCPU;
for (;;) {
int long_idx;
int opt = getopt_long(ac, av, "n:", long_opts, &long_idx);
if (opt == -1)
break;
switch (opt) {
case 'n':
ncpu = atoi(optarg);
assert(ncpu <= NCPU);
break;
case '?':
printf("Options:\n");
for (u32 i = 0; long_opts[i].name; i++)
printf(" -%c / --%s%s\n",
long_opts[i].val,
long_opts[i].name,
long_opts[i].has_arg == required_argument ? " ARG" :
long_opts[i].has_arg == optional_argument ? " [ARG]" : "");
exit(-1);
}
}
assert(0 == pthread_key_create(&myproc_key, 0));
assert(0 == pthread_key_create(&arc4_key, 0));
for (u32 i = 0; i < NCPU; i++)
cpus[i].id = i;
xnspid = new xns<u32, proc*, proc_hash>(false);
initgc();
pthread_barrier_init(&populate_b, 0, 2);
crange cr(l2(crange_items));
threadpin(populate, &cr, "populate", 0);
pthread_barrier_wait(&populate_b);
pthread_barrier_init(&worker_b, 0, ncpu+1);
for (u32 i = 0; i < ncpu; i++) {
char buf[32];
sprintf(buf, "worker%d", i);
threadpin(worker, &cr, buf, i);
}
pthread_barrier_wait(&worker_b);
scopedperf::perfsum_base::printall();
}
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论