Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
X
xv6-public
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
问题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
银宸时代
OS Lab Group
奖励实验
xv6-public
提交
0e6651e8
提交
0e6651e8
2月 20, 2012
创建
作者:
Nickolai Zeldovich
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use scopedperf
上级
343ef749
隐藏空白字符变更
内嵌
并排
正在显示
3 个修改的文件
包含
603 行增加
和
4 行删除
+603
-4
intelctr.hh
user/intelctr.hh
+59
-0
scopedperf.hh
user/scopedperf.hh
+529
-0
umain.cc
user/umain.cc
+15
-4
没有找到文件。
user/intelctr.hh
0 → 100644
浏览文件 @
0e6651e8
namespace
intelctr
{
using
scopedperf
::
tsc_ctr
;
using
scopedperf
::
pmc_setup
;
static
tsc_ctr
tsc
;
static
pmc_setup
<
48
>
l2_ld_hit
(
0x00410124
,
"l2 ld hit"
);
static
pmc_setup
<
48
>
l2_ld_miss
(
0x00410224
,
"l2 ld miss"
);
// rfo: request for ownership (~write)
static
pmc_setup
<
48
>
l2_rfo_hit
(
0x00410424
,
"l2 rfo hit"
);
static
pmc_setup
<
48
>
l2_rfo_miss
(
0x00410824
,
"l2 rfo miss"
);
static
pmc_setup
<
48
>
l2_i_hit
(
0x00411024
,
"l2 i hit"
);
static
pmc_setup
<
48
>
l2_i_miss
(
0x00412024
,
"l2 i miss"
);
static
pmc_setup
<
48
>
l2_prefetch_hit
(
0x00414024
,
"l2 pref hit"
);
static
pmc_setup
<
48
>
l2_prefetch_miss
(
0x00418024
,
"l2 pref miss"
);
static
pmc_setup
<
48
>
l2_prefetch
(
0x0041c024
,
"l2 prefetch"
);
// ~zero
static
pmc_setup
<
48
>
l2_miss
(
0x0041aa24
,
"l2 all miss"
);
static
pmc_setup
<
48
>
l2_refs
(
0x0041ff24
,
"l2 all refs"
);
// ---
static
pmc_setup
<
48
>
l2_ld_demand
(
0x00410f26
,
"l2 demand ld"
);
static
pmc_setup
<
48
>
l2_ld_demand_i
(
0x00410126
,
"l2 dem ld i"
);
// ~l2_ld_miss
static
pmc_setup
<
48
>
l2_ld_demand_s
(
0x00410226
,
"l2 dem ld s"
);
// ~l2_rfo_miss
static
pmc_setup
<
48
>
l2_ld_demand_e
(
0x00410426
,
"l2 dem ld e"
);
static
pmc_setup
<
48
>
l2_ld_demand_m
(
0x00410826
,
"l2 dem ld m"
);
static
pmc_setup
<
48
>
l2_ld_prefetch
(
0x0041f026
,
"l2 prefetch ld"
);
// ~zero
// ---
static
pmc_setup
<
48
>
l2_wr_i
(
0x00410127
,
"l2 write i"
);
static
pmc_setup
<
48
>
l2_wr_s
(
0x00410227
,
"l2 write s"
);
static
pmc_setup
<
48
>
l2_wr_m
(
0x00410827
,
"l2 write m"
);
static
pmc_setup
<
48
>
l2_wr_sem
(
0x00410e27
,
"l2 write sem"
);
static
pmc_setup
<
48
>
l2_wr
(
0x00410f27
,
"l2 write"
);
// l2_wr_i + l2_wr_sem
static
pmc_setup
<
48
>
l2_wrlk
(
0x0041f027
,
"l2 wrlk"
);
// ??
// ---
// where do loads come from? interesting, but maybe inaccurate?
// doesn't add up to other l2 counters..
static
pmc_setup
<
48
>
ld_l1hit
(
0x004101cb
,
"ld l1 hit"
);
static
pmc_setup
<
48
>
ld_l2hit
(
0x004102cb
,
"ld l2 hit"
);
static
pmc_setup
<
48
>
ld_l3hit_unsh
(
0x004104cb
,
"ld l3 unsh"
);
static
pmc_setup
<
48
>
ld_l2other
(
0x004108cb
,
"ld l2 other"
);
static
pmc_setup
<
48
>
ld_offdie
(
0x004110cb
,
"ld offdie"
);
static
pmc_setup
<
48
>
ld_lfb
(
0x004140cb
,
"ld lfb"
);
static
pmc_setup
<
48
>
ld_dtlbmiss
(
0x004180cb
,
"ld dtlb-miss"
);
// ---
static
pmc_setup
<
48
>
uops
(
0x0041010e
,
"uops_issued"
);
static
pmc_setup
<
48
>
mem_loads
(
0x0041010b
,
"mem load ins"
);
static
pmc_setup
<
48
>
mem_stores
(
0x0041020b
,
"mem store ins"
);
static
pmc_setup
<
48
>
dtlb_miss
(
0x00410149
,
"dtlb miss"
);
static
pmc_setup
<
48
>
itlb_miss
(
0x00410185
,
"itlb miss"
);
}
user/scopedperf.hh
0 → 100644
浏览文件 @
0e6651e8
#pragma once
/*
* Canonical location:
* git+ssh://amsterdam.csail.mit.edu/home/am1/prof/proftools.git
* under spmc/lib/scopedperf.hh
*/
#include <string>
#include <iostream>
#include <iomanip>
#include <sstream>
#include <vector>
#include <algorithm>
#include <assert.h>
#include <string.h>
#include <stdint.h>
#include <sys/time.h>
namespace
scopedperf
{
/*
* statically enable/disable most of the generated code for profiling.
*/
class
default_enabler
{
public
:
bool
enabled
()
const
{
return
true
;
}
};
class
always_enabled
{
public
:
bool
enabled
()
const
{
return
true
;
}
};
class
always_disabled
{
public
:
bool
enabled
()
const
{
return
false
;
}
};
/*
* spinlock: mostly to avoid pthread mutex sleeping.
*/
class
spinlock
{
public
:
spinlock
()
:
x
(
0
)
{}
void
acquire
()
{
while
(
!
__sync_bool_compare_and_swap
(
&
x
,
0
,
1
))
;
}
void
release
()
{
x
=
0
;
}
private
:
volatile
uint
x
;
};
class
scoped_spinlock
{
public
:
scoped_spinlock
(
spinlock
*
larg
)
:
l
(
larg
)
{
l
->
acquire
();
held
=
true
;
}
void
release
()
{
if
(
held
)
l
->
release
();
held
=
false
;
}
~
scoped_spinlock
()
{
release
();
}
private
:
spinlock
*
const
l
;
bool
held
;
};
/*
* ctrgroup: a group of performance counters.
*/
template
<
typename
...
Counters
>
class
ctrgroup_chain
;
template
<>
class
ctrgroup_chain
<>
{
public
:
ctrgroup_chain
()
{}
static
const
uint
nctr
=
0
;
void
get_samples
(
uint64_t
*
v
)
const
{}
void
get_delta
(
uint64_t
*
delta
,
uint64_t
*
prev
)
const
{}
std
::
vector
<
std
::
string
>
get_names
()
const
{
return
{};
}
};
template
<
typename
One
,
typename
...
Others
>
class
ctrgroup_chain
<
One
,
Others
...
>
:
ctrgroup_chain
<
Others
...
>
{
public
:
ctrgroup_chain
(
One
*
x
,
Others
*
...
y
)
:
ctrgroup_chain
<
Others
...
>
(
y
...),
ctr
(
x
)
{
x
->
setup
();
}
static
const
uint
nctr
=
1
+
ctrgroup_chain
<
Others
...
>::
nctr
;
void
get_samples
(
uint64_t
*
v
)
const
{
v
[
0
]
=
ctr
->
sample
();
ctrgroup_chain
<
Others
...
>::
get_samples
(
v
+
1
);
}
void
get_delta
(
uint64_t
*
delta
,
uint64_t
*
prev
)
const
{
uint64_t
x
=
ctr
->
sample
();
*
delta
=
(
x
-
*
prev
)
&
ctr
->
mask
;
*
prev
=
x
;
ctrgroup_chain
<
Others
...
>::
get_delta
(
delta
+
1
,
prev
+
1
);
}
std
::
vector
<
std
::
string
>
get_names
()
const
{
std
::
vector
<
std
::
string
>
v
=
ctrgroup_chain
<
Others
...
>::
get_names
();
v
.
insert
(
v
.
begin
(),
ctr
->
name
);
return
v
;
}
private
:
const
One
*
const
ctr
;
};
template
<
typename
...
Counters
>
ctrgroup_chain
<
Counters
...
>
ctrgroup
(
Counters
*
...
args
)
{
return
ctrgroup_chain
<
Counters
...
>
(
args
...);
}
/*
* perfsum: aggregating counter deltas across multiple CPUs.
*/
class
perfsum_base
{
public
:
enum
display_opt
{
show
,
hide
};
perfsum_base
(
const
std
::
string
&
n
,
display_opt
d
)
:
name
(
n
),
disp
(
d
)
{
scoped_spinlock
x
(
get_sums_lock
());
get_sums
()
->
push_back
(
this
);
}
static
void
printall
(
int
w0
=
17
,
int
w
=
13
)
{
scoped_spinlock
x
(
get_sums_lock
());
auto
sums
=
get_sums
();
std
::
sort
(
sums
->
begin
(),
sums
->
end
(),
[](
perfsum_base
*
a
,
perfsum_base
*
b
)
{
return
a
->
name
<
b
->
name
;
});
for
(
perfsum_base
*
ps
:
*
sums
)
{
if
(
ps
->
disp
==
hide
||
!
ps
->
get_enabled
())
continue
;
auto
p
=
ps
->
get_stats
();
print_row
(
ps
->
name
,
ps
->
get_names
(),
w0
,
w
,
[](
const
std
::
string
&
name
)
{
return
name
;
});
print_row
(
" avg"
,
p
,
w0
,
w
,
[](
const
std
::
pair
<
uint64_t
,
uint64_t
>
&
e
)
{
return
((
double
)
e
.
second
)
/
(
double
)
e
.
first
;
});
print_row
(
" total"
,
p
,
w0
,
w
,
[](
const
std
::
pair
<
uint64_t
,
uint64_t
>
&
e
)
{
return
e
.
second
;
});
print_row
(
" count"
,
p
,
w0
,
w
,
[](
const
std
::
pair
<
uint64_t
,
uint64_t
>
&
e
)
{
return
e
.
first
;
});
}
}
static
void
resetall
()
{
scoped_spinlock
x
(
get_sums_lock
());
for
(
perfsum_base
*
ps
:
*
get_sums
())
ps
->
reset
();
}
virtual
std
::
vector
<
std
::
pair
<
uint64_t
,
uint64_t
>
>
get_stats
()
const
=
0
;
virtual
std
::
vector
<
std
::
string
>
get_names
()
const
=
0
;
virtual
bool
get_enabled
()
const
=
0
;
virtual
void
reset
()
=
0
;
private
:
template
<
class
Row
,
class
Callback
>
static
void
print_row
(
const
std
::
string
&
rowname
,
const
Row
&
r
,
int
w0
,
int
w
,
Callback
f
)
{
std
::
cout
<<
std
::
left
<<
std
::
setw
(
w0
)
<<
rowname
;
for
(
const
auto
&
elem
:
r
)
std
::
cout
<<
std
::
left
<<
std
::
setw
(
w
)
<<
f
(
elem
)
<<
" "
;
std
::
cout
<<
std
::
endl
;
}
static
std
::
vector
<
perfsum_base
*>
*
get_sums
()
{
static
std
::
vector
<
perfsum_base
*>
v
;
return
&
v
;
}
static
spinlock
*
get_sums_lock
()
{
static
spinlock
l
;
return
&
l
;
}
const
std
::
string
name
;
const
display_opt
disp
;
};
static
inline
void
compiler_barrier
()
{
/* Avoid compile-time reordering across performance counter reads */
__asm
__volatile
(
""
:::
"memory"
);
}
template
<
typename
Enabler
,
typename
...
Counters
>
class
perfsum_ctr
:
public
perfsum_base
,
public
Enabler
{
public
:
perfsum_ctr
(
const
ctrgroup_chain
<
Counters
...
>
*
c
,
const
std
::
string
&
n
,
display_opt
d
)
:
perfsum_base
(
n
,
d
),
cg
(
c
),
base
(
0
)
{
reset
();
}
perfsum_ctr
(
const
std
::
string
&
n
,
const
perfsum_ctr
<
Enabler
,
Counters
...
>
*
basesum
,
display_opt
d
)
:
perfsum_base
(
n
,
d
),
cg
(
basesum
->
cg
),
base
(
basesum
)
{
reset
();
}
void
get_samples
(
uint64_t
*
s
)
const
{
compiler_barrier
();
cg
->
get_samples
(
s
);
compiler_barrier
();
}
void
record
(
uint
cpuid
,
uint64_t
*
s
)
{
uint64_t
delta
[
cg
->
nctr
];
compiler_barrier
();
cg
->
get_delta
(
delta
,
s
);
compiler_barrier
();
for
(
uint
i
=
0
;
i
<
cg
->
nctr
;
i
++
)
stat
[
cpuid
].
sum
[
i
]
+=
delta
[
i
];
stat
[
cpuid
].
count
++
;
}
std
::
vector
<
std
::
pair
<
uint64_t
,
uint64_t
>
>
get_stats
()
const
/* override */
{
std
::
vector
<
std
::
pair
<
uint64_t
,
uint64_t
>
>
v
;
for
(
uint
i
=
0
;
i
<
cg
->
nctr
;
i
++
)
{
uint64_t
b
=
base
?
base
->
addcpus
([
i
](
const
stats
*
s
)
{
return
s
->
sum
[
i
];
})
:
addcpus
([](
const
stats
*
s
)
{
return
s
->
count
;
});
v
.
push_back
(
std
::
make_pair
(
b
,
addcpus
([
i
](
const
stats
*
s
)
{
return
s
->
sum
[
i
];
})));
}
return
v
;
}
std
::
vector
<
std
::
string
>
get_names
()
const
/* override */
{
return
cg
->
get_names
();
}
bool
get_enabled
()
const
/* override */
{
return
Enabler
::
enabled
();
}
void
reset
()
/* override */
{
memset
(
stat
,
0
,
sizeof
(
stat
));
}
private
:
enum
{
maxcpu
=
256
};
struct
stats
{
uint64_t
count
;
uint64_t
sum
[
ctrgroup_chain
<
Counters
...
>::
nctr
];
}
__attribute__
((
aligned
(
128
)));
struct
stats
stat
[
maxcpu
];
const
struct
ctrgroup_chain
<
Counters
...
>
*
const
cg
;
const
struct
perfsum_ctr
<
Enabler
,
Counters
...
>
*
const
base
;
template
<
class
T
>
uint64_t
addcpus
(
T
f
)
const
{
uint64_t
tot
=
0
;
for
(
uint
i
=
0
;
i
<
maxcpu
;
i
++
)
tot
+=
f
(
&
stat
[
i
]);
return
tot
;
}
};
template
<
typename
Enabler
,
typename
...
Counters
>
class
perfsum_ctr_inlinegroup
:
public
ctrgroup_chain
<
Counters
...
>
,
public
perfsum_ctr
<
Enabler
,
Counters
...
>
{
public
:
perfsum_ctr_inlinegroup
(
const
std
::
string
&
n
,
perfsum_base
::
display_opt
d
,
Counters
*
...
ctrs
)
:
ctrgroup_chain
<
Counters
...
>
(
ctrs
...),
perfsum_ctr
<
Enabler
,
Counters
...
>
(
this
,
n
,
d
)
{}
};
template
<
typename
Enabler
=
default_enabler
,
typename
...
Counters
>
perfsum_ctr
<
Enabler
,
Counters
...
>
perfsum
(
const
std
::
string
&
name
,
const
ctrgroup_chain
<
Counters
...
>
*
c
,
const
perfsum_base
::
display_opt
d
=
perfsum_base
::
show
)
{
return
perfsum_ctr
<
Enabler
,
Counters
...
>
(
c
,
name
,
d
);
}
template
<
typename
Enabler
=
default_enabler
,
typename
...
Counters
>
perfsum_ctr_inlinegroup
<
Enabler
,
Counters
...
>
perfsum_group
(
const
std
::
string
&
name
,
Counters
*
...
c
)
{
return
perfsum_ctr_inlinegroup
<
Enabler
,
Counters
...
>
(
name
,
perfsum_base
::
show
,
c
...);
}
template
<
typename
Enabler
,
typename
...
Counters
>
perfsum_ctr
<
Enabler
,
Counters
...
>
perfsum_frac
(
const
std
::
string
&
name
,
const
perfsum_ctr
<
Enabler
,
Counters
...
>
*
base
)
{
return
perfsum_ctr
<
Enabler
,
Counters
...
>
(
name
,
base
,
perfsum_base
::
show
);
}
/*
* namedctr &c: actual counter implementations.
*/
template
<
uint64_t
CounterWidth
>
class
namedctr
{
public
:
namedctr
(
const
std
::
string
&
n
)
:
name
(
n
)
{}
void
setup
()
{}
const
std
::
string
name
;
static
const
uint64_t
mask
=
((
1ULL
<<
(
CounterWidth
-
1
))
-
1
)
<<
1
|
1
;
};
class
tsc_ctr
:
public
namedctr
<
64
>
{
public
:
tsc_ctr
()
:
namedctr
(
"tsc"
)
{}
static
uint64_t
sample
()
{
uint64_t
a
,
d
;
__asm
__volatile
(
"rdtsc"
:
"=a"
(
a
),
"=d"
(
d
));
return
a
|
(
d
<<
32
);
}
};
class
tscp_ctr
:
public
namedctr
<
64
>
{
public
:
tscp_ctr
()
:
namedctr
(
"tscp"
)
{}
static
uint64_t
sample
()
{
uint64_t
a
,
d
,
c
;
__asm
__volatile
(
"rdtscp"
:
"=a"
(
a
),
"=d"
(
d
),
"=c"
(
c
));
return
a
|
(
d
<<
32
);
}
};
template
<
uint64_t
CounterWidth
>
class
pmc_ctr
:
public
namedctr
<
CounterWidth
>
{
public
:
pmc_ctr
(
int
n
)
:
namedctr
<
CounterWidth
>
(
mkname
(
n
)),
cn
(
n
)
{}
pmc_ctr
(
const
std
::
string
&
nm
)
:
namedctr
<
CounterWidth
>
(
nm
),
cn
(
-
1
)
{}
uint64_t
sample
()
const
{
uint64_t
a
,
d
;
__asm
__volatile
(
"rdpmc"
:
"=a"
(
a
),
"=d"
(
d
)
:
"c"
(
cn
));
return
a
|
(
d
<<
32
);
}
int
cn
;
private
:
static
std
::
string
mkname
(
int
n
)
{
std
::
stringstream
ss
;
ss
<<
"pmc"
<<
n
;
return
ss
.
str
();
}
};
template
<
uint64_t
CounterWidth
=
64
>
class
pmc_setup
:
public
pmc_ctr
<
CounterWidth
>
{
public
:
pmc_setup
(
uint64_t
v
,
const
std
::
string
&
nm
)
:
pmc_ctr
<
CounterWidth
>
(
nm
),
pmc_v
(
v
)
{}
void
setup
()
{
if
(
pmc_ctr
<
CounterWidth
>::
cn
>=
0
)
return
;
/*
* XXX detect how many counters the hardware has
*/
static
bool
pmcuse
[
4
];
static
spinlock
pmcuselock
;
int
n
=
0
;
scoped_spinlock
x
(
&
pmcuselock
);
while
(
n
<
4
&&
pmcuse
[
n
])
n
++
;
assert
(
n
<
4
);
pmcuse
[
n
]
=
true
;
x
.
release
();
// ugly but effective
std
::
stringstream
ss
;
ss
<<
"for f in /sys/kernel/spmc/cpu*/"
<<
n
<<
"; do "
<<
"echo "
<<
std
::
hex
<<
pmc_v
<<
" > $f; done"
;
assert
(
0
==
system
(
ss
.
str
().
c_str
()));
pmc_ctr
<
CounterWidth
>::
cn
=
n
;
}
private
:
uint64_t
pmc_v
;
};
class
tod_ctr
:
public
namedctr
<
64
>
{
public
:
tod_ctr
()
:
namedctr
(
"tod-usec"
)
{}
uint64_t
sample
()
const
{
struct
timeval
tv
;
gettimeofday
(
&
tv
,
0
);
return
((
uint64_t
)
tv
.
tv_usec
)
+
((
uint64_t
)
tv
.
tv_sec
)
*
1000000
;
}
};
class
zero_ctr
:
public
namedctr
<
64
>
{
public
:
zero_ctr
()
:
namedctr
(
"zero"
)
{}
uint64_t
sample
()
const
{
return
0
;
}
};
/*
* scoped performance-counting regions, which record samples into a perfsum.
*/
template
<
typename
Enabler
,
typename
...
Counters
>
class
base_perf_region
{
public
:
base_perf_region
(
perfsum_ctr
<
Enabler
,
Counters
...
>
*
psarg
)
:
ps
(
psarg
),
enabled
(
ps
->
enabled
()),
cpuid
(
enabled
?
sched_getcpu
()
:
0
)
{
if
(
enabled
)
ps
->
get_samples
(
s
);
}
// invoke lap multiple times to precisely measure iterations
// (use same measurement for end of one & start of next round)
void
lap
()
{
if
(
enabled
)
ps
->
record
(
cpuid
,
s
);
}
private
:
perfsum_ctr
<
Enabler
,
Counters
...
>
*
const
ps
;
const
bool
enabled
;
const
uint
cpuid
;
uint64_t
s
[
ctrgroup_chain
<
Counters
...
>::
nctr
];
};
template
<
typename
Enabler
,
typename
...
Counters
>
class
scoped_perf_region
:
public
base_perf_region
<
Enabler
,
Counters
...
>
{
public
:
scoped_perf_region
(
perfsum_ctr
<
Enabler
,
Counters
...
>
*
psarg
)
:
base_perf_region
<
Enabler
,
Counters
...
>
(
psarg
)
{}
~
scoped_perf_region
()
{
base_perf_region
<
Enabler
,
Counters
...
>::
lap
();
}
};
template
<
typename
Enabler
,
typename
...
Counters
>
class
killable_perf_region
:
public
base_perf_region
<
Enabler
,
Counters
...
>
{
public
:
killable_perf_region
(
perfsum_ctr
<
Enabler
,
Counters
...
>
*
psarg
)
:
base_perf_region
<
Enabler
,
Counters
...
>
(
psarg
),
active
(
true
)
{}
~
killable_perf_region
()
{
stop
();
}
// perform a final measurement, if needed before destructor
void
stop
()
{
if
(
active
)
base_perf_region
<
Enabler
,
Counters
...
>::
lap
();
active
=
false
;
}
// prevent destructor from performing a measurement
void
kill
()
{
active
=
false
;
}
private
:
bool
active
;
};
template
<
typename
Enabler
,
typename
...
Counters
>
scoped_perf_region
<
Enabler
,
Counters
...
>
perf_region
(
perfsum_ctr
<
Enabler
,
Counters
...
>
*
ps
)
{
return
scoped_perf_region
<
Enabler
,
Counters
...
>
(
ps
);
}
template
<
typename
Enabler
,
typename
...
Counters
>
killable_perf_region
<
Enabler
,
Counters
...
>
killable_region
(
perfsum_ctr
<
Enabler
,
Counters
...
>
*
ps
)
{
return
killable_perf_region
<
Enabler
,
Counters
...
>
(
ps
);
}
/*
* macros for the common case of putting in a scoped perf-counting region.
*/
#define __PERF_CONCAT2(a, b) a ## b
#define __PERF_CONCAT(a, b) __PERF_CONCAT2(a, b)
#define __PERF_ANON __PERF_CONCAT(__anon_id_, __COUNTER__)
#define __PERF_REGION(region_var, sum_var, region_type, text, group) \
static auto __PERF_CONCAT(sum_var, _sum) = scopedperf::perfsum(text, group); \
auto region_var = region_type(&__PERF_CONCAT(sum_var, _sum));
#define ANON_REGION(text, group) \
__PERF_REGION(__PERF_ANON, __PERF_ANON, scopedperf::perf_region, text, group)
#define PERF_REGION(var, text, group) \
__PERF_REGION(var, __PERF_ANON, scopedperf::perf_region, text, group)
#define KILLABLE_REGION(var, text, group) \
__PERF_REGION(var, __PERF_ANON, scopedperf::killable_region, text, group)
}
/* namespace scopedperf */
user/umain.cc
浏览文件 @
0e6651e8
...
...
@@ -8,6 +8,8 @@
#include "atomic_util.hh"
#include "ns.hh"
#include "rnd.hh"
#include "scopedperf.hh"
#include "intelctr.hh"
u64
proc_hash
(
const
u32
&
pid
)
...
...
@@ -57,21 +59,30 @@ threadpin(void (*fn)(void*), void *arg, const char *name, int cpu)
static
pthread_barrier_t
worker_b
,
populate_b
;
enum
{
iter_total
=
1000000
0
};
enum
{
iter_total
=
1000000
};
enum
{
crange_items
=
1024
};
static
auto
perfgroup
=
ctrgroup
(
&
intelctr
::
tsc
// ,&intelctr::l2_refs
// ,&intelctr::l2_miss
);
static
void
worker
(
void
*
arg
)
{
crange
*
cr
=
(
crange
*
)
arg
;
for
(
u32
i
=
0
;
i
<
iter_total
/
ncpu
;
i
++
)
{
ANON_REGION
(
"worker op"
,
&
perfgroup
);
u64
k
=
1
+
rnd
()
%
(
crange_items
*
2
);
auto
span
=
cr
->
search_lock
(
k
,
1
);
if
(
rnd
()
&
1
)
if
(
rnd
()
&
1
)
{
ANON_REGION
(
"worker del"
,
&
perfgroup
);
span
.
replace
(
0
);
else
}
else
{
ANON_REGION
(
"worker add"
,
&
perfgroup
);
span
.
replace
(
new
range
(
cr
,
k
,
1
));
}
}
pthread_barrier_wait
(
&
worker_b
);
...
...
@@ -153,5 +164,5 @@ main(int ac, char **av)
}
pthread_barrier_wait
(
&
worker_b
);
printf
(
"exiting
\n
"
);
scopedperf
::
perfsum_base
::
printall
(
);
}
编写
预览
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论