提交 0e6651e8 创建 作者: Nickolai Zeldovich's avatar Nickolai Zeldovich

use scopedperf

上级 343ef749
namespace intelctr {
using scopedperf::tsc_ctr;
using scopedperf::pmc_setup;
static tsc_ctr tsc;
static pmc_setup<48> l2_ld_hit(0x00410124, "l2 ld hit");
static pmc_setup<48> l2_ld_miss(0x00410224, "l2 ld miss");
// rfo: request for ownership (~write)
static pmc_setup<48> l2_rfo_hit(0x00410424, "l2 rfo hit");
static pmc_setup<48> l2_rfo_miss(0x00410824, "l2 rfo miss");
static pmc_setup<48> l2_i_hit(0x00411024, "l2 i hit");
static pmc_setup<48> l2_i_miss(0x00412024, "l2 i miss");
static pmc_setup<48> l2_prefetch_hit(0x00414024, "l2 pref hit");
static pmc_setup<48> l2_prefetch_miss(0x00418024, "l2 pref miss");
static pmc_setup<48> l2_prefetch(0x0041c024, "l2 prefetch"); // ~zero
static pmc_setup<48> l2_miss(0x0041aa24, "l2 all miss");
static pmc_setup<48> l2_refs(0x0041ff24, "l2 all refs");
// ---
static pmc_setup<48> l2_ld_demand(0x00410f26, "l2 demand ld");
static pmc_setup<48> l2_ld_demand_i(0x00410126, "l2 dem ld i"); // ~l2_ld_miss
static pmc_setup<48> l2_ld_demand_s(0x00410226, "l2 dem ld s"); // ~l2_rfo_miss
static pmc_setup<48> l2_ld_demand_e(0x00410426, "l2 dem ld e");
static pmc_setup<48> l2_ld_demand_m(0x00410826, "l2 dem ld m");
static pmc_setup<48> l2_ld_prefetch(0x0041f026, "l2 prefetch ld"); // ~zero
// ---
static pmc_setup<48> l2_wr_i(0x00410127, "l2 write i");
static pmc_setup<48> l2_wr_s(0x00410227, "l2 write s");
static pmc_setup<48> l2_wr_m(0x00410827, "l2 write m");
static pmc_setup<48> l2_wr_sem(0x00410e27, "l2 write sem");
static pmc_setup<48> l2_wr(0x00410f27, "l2 write"); // l2_wr_i + l2_wr_sem
static pmc_setup<48> l2_wrlk(0x0041f027, "l2 wrlk"); // ??
// ---
// where do loads come from? interesting, but maybe inaccurate?
// doesn't add up to other l2 counters..
static pmc_setup<48> ld_l1hit(0x004101cb, "ld l1 hit");
static pmc_setup<48> ld_l2hit(0x004102cb, "ld l2 hit");
static pmc_setup<48> ld_l3hit_unsh(0x004104cb, "ld l3 unsh");
static pmc_setup<48> ld_l2other(0x004108cb, "ld l2 other");
static pmc_setup<48> ld_offdie(0x004110cb, "ld offdie");
static pmc_setup<48> ld_lfb(0x004140cb, "ld lfb");
static pmc_setup<48> ld_dtlbmiss(0x004180cb, "ld dtlb-miss");
// ---
static pmc_setup<48> uops(0x0041010e, "uops_issued");
static pmc_setup<48> mem_loads(0x0041010b, "mem load ins");
static pmc_setup<48> mem_stores(0x0041020b, "mem store ins");
static pmc_setup<48> dtlb_miss(0x00410149, "dtlb miss");
static pmc_setup<48> itlb_miss(0x00410185, "itlb miss");
}
差异被折叠。
......@@ -8,6 +8,8 @@
#include "atomic_util.hh"
#include "ns.hh"
#include "rnd.hh"
#include "scopedperf.hh"
#include "intelctr.hh"
u64
proc_hash(const u32 &pid)
......@@ -57,21 +59,30 @@ threadpin(void (*fn)(void*), void *arg, const char *name, int cpu)
static pthread_barrier_t worker_b, populate_b;
enum { iter_total = 10000000 };
enum { iter_total = 1000000 };
enum { crange_items = 1024 };
static auto perfgroup = ctrgroup(&intelctr::tsc
// ,&intelctr::l2_refs
// ,&intelctr::l2_miss
);
static void
worker(void *arg)
{
crange *cr = (crange*) arg;
for (u32 i = 0; i < iter_total / ncpu; i++) {
ANON_REGION("worker op", &perfgroup);
u64 k = 1 + rnd() % (crange_items * 2);
auto span = cr->search_lock(k, 1);
if (rnd() & 1)
if (rnd() & 1) {
ANON_REGION("worker del", &perfgroup);
span.replace(0);
else
} else {
ANON_REGION("worker add", &perfgroup);
span.replace(new range(cr, k, 1));
}
}
pthread_barrier_wait(&worker_b);
......@@ -153,5 +164,5 @@ main(int ac, char **av)
}
pthread_barrier_wait(&worker_b);
printf("exiting\n");
scopedperf::perfsum_base::printall();
}
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论