提交 d41ef66a 创建 作者: Silas Boyd-Wickizer's avatar Silas Boyd-Wickizer

Merge branch 'scale-amd64' of git+ssh://amsterdam.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

class arc4 {
public:
arc4(const u8 *key, size_t nbytes) {
reset();
for (size_t n = 0; n < nbytes; n += 128)
addkey(&key[n], nbytes > n + 128 ? 128 : n + 128 - nbytes);
j = i;
}
u8 getbyte() {
uint8_t si, sj;
i = (i + 1) & 0xff;
si = s[i];
j = (j + si) & 0xff;
sj = s[j];
s[i] = sj;
s[j] = si;
return s[(si + sj) & 0xff];
}
template<class T> T rand() {
T v;
for (u32 i = 0; i < sizeof(v); i++)
*(u8*) &v = getbyte();
return v;
}
private:
void reset() {
i = 0xff;
j = 0;
for (u32 n = 0; n < 0x100; n++)
s[n] = n;
}
void addkey(const u8 *key, size_t nbytes) {
size_t n, keypos;
uint8_t si;
for (n = 0, keypos = 0; n < 256; n++, keypos++) {
if (keypos >= nbytes)
keypos = 0;
i = (i + 1) & 0xff;
si = s[i];
j = (j + si + key[keypos]) & 0xff;
s[i] = s[j];
s[j] = si;
}
}
u8 i;
u8 j;
u8 s[256];
};
......@@ -10,6 +10,8 @@ extern "C" {
#include <string.h>
}
typedef uint8_t u8;
typedef uint16_t u16;
typedef uint32_t u32;
typedef uint64_t u64;
typedef uintptr_t uptr;
......@@ -99,11 +101,13 @@ struct proc {
u32 cpuid;
u32 pid;
char name[32];
void (*f) (void*);
void *farg;
};
struct cpu {
u32 id;
u32 ncli;
};
extern pthread_key_t myproc_key;
......@@ -124,22 +128,8 @@ mycpu()
return (cpu*) &cpus[myproc()->cpuid];
}
void cli();
void sti();
static inline void
pushcli()
{
cli();
mycpu()->ncli++;
}
static inline void
popcli()
{
if (--mycpu()->ncli == 0)
sti();
}
static inline void pushcli() {}
static inline void popcli() {}
void threadpin(void (*fn)(void*), void *arg, const char *name, int cpu);
namespace intelctr {
using scopedperf::tsc_ctr;
using scopedperf::pmc_setup;
static tsc_ctr tsc;
static pmc_setup<48> l2_ld_hit(0x00410124, "l2 ld hit");
static pmc_setup<48> l2_ld_miss(0x00410224, "l2 ld miss");
// rfo: request for ownership (~write)
static pmc_setup<48> l2_rfo_hit(0x00410424, "l2 rfo hit");
static pmc_setup<48> l2_rfo_miss(0x00410824, "l2 rfo miss");
static pmc_setup<48> l2_i_hit(0x00411024, "l2 i hit");
static pmc_setup<48> l2_i_miss(0x00412024, "l2 i miss");
static pmc_setup<48> l2_prefetch_hit(0x00414024, "l2 pref hit");
static pmc_setup<48> l2_prefetch_miss(0x00418024, "l2 pref miss");
static pmc_setup<48> l2_prefetch(0x0041c024, "l2 prefetch"); // ~zero
static pmc_setup<48> l2_miss(0x0041aa24, "l2 all miss");
static pmc_setup<48> l2_refs(0x0041ff24, "l2 all refs");
// ---
static pmc_setup<48> l2_ld_demand(0x00410f26, "l2 demand ld");
static pmc_setup<48> l2_ld_demand_i(0x00410126, "l2 dem ld i"); // ~l2_ld_miss
static pmc_setup<48> l2_ld_demand_s(0x00410226, "l2 dem ld s"); // ~l2_rfo_miss
static pmc_setup<48> l2_ld_demand_e(0x00410426, "l2 dem ld e");
static pmc_setup<48> l2_ld_demand_m(0x00410826, "l2 dem ld m");
static pmc_setup<48> l2_ld_prefetch(0x0041f026, "l2 prefetch ld"); // ~zero
// ---
static pmc_setup<48> l2_wr_i(0x00410127, "l2 write i");
static pmc_setup<48> l2_wr_s(0x00410227, "l2 write s");
static pmc_setup<48> l2_wr_m(0x00410827, "l2 write m");
static pmc_setup<48> l2_wr_sem(0x00410e27, "l2 write sem");
static pmc_setup<48> l2_wr(0x00410f27, "l2 write"); // l2_wr_i + l2_wr_sem
static pmc_setup<48> l2_wrlk(0x0041f027, "l2 wrlk"); // ??
// ---
// where do loads come from? interesting, but maybe inaccurate?
// doesn't add up to other l2 counters..
static pmc_setup<48> ld_l1hit(0x004101cb, "ld l1 hit");
static pmc_setup<48> ld_l2hit(0x004102cb, "ld l2 hit");
static pmc_setup<48> ld_l3hit_unsh(0x004104cb, "ld l3 unsh");
static pmc_setup<48> ld_l2other(0x004108cb, "ld l2 other");
static pmc_setup<48> ld_offdie(0x004110cb, "ld offdie");
static pmc_setup<48> ld_lfb(0x004140cb, "ld lfb");
static pmc_setup<48> ld_dtlbmiss(0x004180cb, "ld dtlb-miss");
// ---
static pmc_setup<48> uops(0x0041010e, "uops_issued");
static pmc_setup<48> mem_loads(0x0041010b, "mem load ins");
static pmc_setup<48> mem_stores(0x0041020b, "mem store ins");
static pmc_setup<48> dtlb_miss(0x00410149, "dtlb miss");
static pmc_setup<48> itlb_miss(0x00410185, "itlb miss");
}
差异被折叠。
#include <unistd.h>
#include <signal.h>
#include <getopt.h>
#include "crange_arch.hh"
#include "gc.hh"
#include "crange.hh"
#include "atomic_util.hh"
#include "ns.hh"
#include "scopedperf.hh"
#include "intelctr.hh"
#include "arc4.hh"
#include "amd64.h"
static auto perfgroup = ctrgroup(&intelctr::tsc
// ,&intelctr::l2_refs
// ,&intelctr::l2_miss
);
u64
proc_hash(const u32 &pid)
......@@ -12,75 +23,165 @@ proc_hash(const u32 &pid)
return pid;
}
pthread_key_t myproc_key;
pthread_key_t myproc_key, arc4_key;
cpu cpus[NCPU];
u32 ncpu;
u64 ticks;
xns<u32, proc*, proc_hash> *xnspid;
struct makeproc_info {
void (*f) (void*);
void *farg;
const char *name;
int cpu;
};
static auto rnd_perfsum = scopedperf::perfsum("arc4 rnd", &perfgroup);
void
cli()
template<class T>
T rnd()
{
/* suspend all threads with the same cpuid */
}
auto __PERF_ANON = scopedperf::perf_region(&rnd_perfsum);
void
sti()
{
/* resume all threads with the same cpuid */
arc4 *a = (arc4*) pthread_getspecific(arc4_key);
if (!a) {
struct seed { u64 a, b; } s = { rdtsc(), pthread_self() };
a = new arc4((u8*) &s, sizeof(s));
pthread_setspecific(arc4_key, a);
}
return a->rand<T>();
}
static void*
makeproc_start(void *arg)
proc_start(void *arg)
{
makeproc_info *mpi = (makeproc_info *) arg;
proc *p = (proc *) arg;
proc *p = new proc();
pthread_setspecific(myproc_key, p);
p->pid = pthread_self();
p->cpuid = mpi->cpu;
snprintf(p->name, sizeof(p->name), "%s", mpi->name);
initprocgc(p);
xnspid->insert(p->pid, p);
mpi->f(mpi->farg);
delete mpi;
p->f(p->farg);
return 0;
}
void
makeproc(makeproc_info *mpi)
makeproc(proc *p)
{
pthread_t tid;
makeproc_info *mcopy = new makeproc_info(*mpi);
pthread_create(&tid, 0, &makeproc_start, mcopy);
pthread_create(&tid, 0, &proc_start, p);
}
void
threadpin(void (*fn)(void*), void *arg, const char *name, int cpu)
{
makeproc_info mpi = { fn, arg, name, cpu };
makeproc(&mpi);
proc *p = new proc();
memset(p, 0, sizeof(*p));
p->f = fn;
p->farg = arg;
snprintf(p->name, sizeof(p->name), "%s", name);
p->cpuid = cpu;
makeproc(p);
}
static pthread_barrier_t worker_b, populate_b;
enum { iter_total = 1000000 };
enum { crange_items = 1024 };
static void
worker(void *arg)
{
crange *cr = (crange*) arg;
for (u32 i = 0; i < iter_total / ncpu; i++) {
ANON_REGION("worker op", &perfgroup);
u64 k = 1 + rnd<u32>() % (crange_items * 2);
auto span = cr->search_lock(k, 1);
if (rnd<u8>() & 1) {
ANON_REGION("worker del", &perfgroup);
span.replace(0);
} else {
ANON_REGION("worker add", &perfgroup);
span.replace(new range(cr, k, 1));
}
}
pthread_barrier_wait(&worker_b);
}
static void
populate(void *arg)
{
crange *cr = (crange*) arg;
for (u32 i = 0; i < crange_items; i++)
cr->search_lock(1 + 2*i, 1).replace(new range(cr, 1+2*i, 1));
pthread_barrier_wait(&populate_b);
}
static const struct option long_opts[] = {
{ "ncpu", required_argument, 0, 'n' },
{ 0, no_argument, 0, 0 }
};
static u32
l2(u64 v)
{
u32 l = 0;
while (v) {
v = v>>1;
l++;
}
return l;
}
int
main(int ac, char **av)
{
ncpu = NCPU;
for (;;) {
int long_idx;
int opt = getopt_long(ac, av, "n:", long_opts, &long_idx);
if (opt == -1)
break;
switch (opt) {
case 'n':
ncpu = atoi(optarg);
assert(ncpu <= NCPU);
break;
case '?':
printf("Options:\n");
for (u32 i = 0; long_opts[i].name; i++)
printf(" -%c / --%s%s\n",
long_opts[i].val,
long_opts[i].name,
long_opts[i].has_arg == required_argument ? " ARG" :
long_opts[i].has_arg == optional_argument ? " [ARG]" : "");
exit(-1);
}
}
assert(0 == pthread_key_create(&myproc_key, 0));
assert(0 == pthread_key_create(&arc4_key, 0));
for (u32 i = 0; i < NCPU; i++)
cpus[i].id = i;
ncpu = NCPU;
xnspid = new xns<u32, proc*, proc_hash>(false);
xnspid = new xns<u32, proc*, proc_hash>(false);
initgc();
printf("Hello world!\n");
sleep(100);
pthread_barrier_init(&populate_b, 0, 2);
crange cr(l2(crange_items));
threadpin(populate, &cr, "populate", 0);
pthread_barrier_wait(&populate_b);
pthread_barrier_init(&worker_b, 0, ncpu+1);
for (u32 i = 0; i < ncpu; i++) {
char buf[32];
sprintf(buf, "worker%d", i);
threadpin(worker, &cr, buf, i);
}
pthread_barrier_wait(&worker_b);
scopedperf::perfsum_base::printall();
}
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论