Skip to content
项目
群组
代码片段
帮助
当前项目
正在载入...
登录 / 注册
切换导航面板
X
xv6-public
项目
项目
详情
活动
周期分析
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
统计图
问题
0
问题
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
CI / CD
CI / CD
流水线
作业
日程
统计图
Wiki
Wiki
代码片段
代码片段
成员
成员
折叠边栏
关闭边栏
活动
图像
聊天
创建新问题
作业
提交
问题看板
Open sidebar
银宸时代
OS Lab Group
奖励实验
xv6-public
提交
ae76c346
提交
ae76c346
2月 09, 2012
创建
作者:
Silas Boyd-Wickizer
浏览文件
操作
浏览文件
下载
差异文件
Merge 'flicts
上级
364cfa6a
ae573cc0
隐藏空白字符变更
内嵌
并排
正在显示
17 个修改的文件
包含
513 行增加
和
222 行删除
+513
-222
Makefile
Makefile
+3
-1
amd64.h
amd64.h
+0
-6
cilk.c
cilk.c
+311
-0
compiler.h
compiler.h
+6
-0
console.c
console.c
+12
-4
cpu.h
cpu.h
+3
-2
exec.c
exec.c
+5
-5
kernel.h
kernel.h
+19
-12
lockstat.h
lockstat.h
+1
-0
main.c
main.c
+4
-2
mapbench.c
mapbench.c
+2
-2
param.h
param.h
+3
-4
proc.c
proc.c
+1
-1
proc.h
proc.h
+2
-2
trap.c
trap.c
+14
-1
wq.c
wq.c
+121
-180
wq.h
wq.h
+6
-0
没有找到文件。
Makefile
浏览文件 @
ae76c346
...
...
@@ -20,7 +20,8 @@ NM = $(TOOLPREFIX)nm
OBJCOPY
=
$(TOOLPREFIX)
objcopy
CFLAGS
=
-fno-pic
-static
-fno-builtin
-fno-strict-aliasing
-O2
-Wall
-MD
-ggdb
\
-m64
-Werror
-std
=
c99
-fms-extensions
-mno-sse
-mcmodel
=
large
-I
$(QEMUSRC)
\
-m64
-Werror
-std
=
c99
-fms-extensions
-mno-sse
-mcmodel
=
large
-mno-red-zone
\
-I
$(QEMUSRC)
\
-fno-omit-frame-pointer
-DHW_
$(HW)
-include
param.h
-include
compiler.h
CFLAGS
+=
$(
shell
$(CC)
-fno-stack-protector
-E
-x
c /dev/null
>
/dev/null 2>&1
&&
echo
-fno-stack-protector
)
ASFLAGS
=
-m64
-gdwarf-2
-MD
...
...
@@ -29,6 +30,7 @@ LDFLAGS += -m elf_x86_64
OBJS
=
\
bio.o
\
cga.o
\
cilk.o
\
condvar.o
\
console.o
\
crange.o
\
...
...
amd64.h
浏览文件 @
ae76c346
...
...
@@ -115,12 +115,6 @@ rep_nop(void)
}
static
inline
void
barrier
(
void
)
{
__asm
volatile
(
""
:::
"memory"
);
}
static
inline
void
lidt
(
void
*
p
)
{
__asm
volatile
(
"lidt (%0)"
:
:
"r"
(
p
)
:
"memory"
);
...
...
cilk.c
0 → 100644
浏览文件 @
ae76c346
// cilk style run queue
// A work queue is built from NCPU per-core wqueues.
// A core pushes work to the head of its per-core wqueue.
// A core pops work from the head of its per-core wqueue.
// A core pops work from the tail of another core's per-core wqueue.
//
// Usage:
// void goo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cprintf("goo\n");
// arg[1] = 'g';
// }
// void foo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cilk_push(goo, a0, 0);
// arg[0] = 'f';
// cprintf("foo\n");
// }
// void example(void) {
// char arg[2];
// cilk_start();
// cilk_push(foo, (uptr)arg, 0);
// cprintf("example\n");
// cilk_end();
// cprintf("%c %c\n", arg[0], arg[1]);
// }
#if CILKENABLE
#include "types.h"
#include "kernel.h"
#include "amd64.h"
#include "cpu.h"
#include "bits.h"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.h"
#include "mtrace.h"
#include "qlock.h"
#define NSLOTS (1 << CILKSHIFT)
struct
cilkqueue
{
struct
cilkthread
*
thread
[
NSLOTS
];
volatile
int
head
__mpalign__
;
qlock_t
lock
;
volatile
int
tail
;
__padout__
;
}
__mpalign__
;
struct
cilkthread
{
u64
rip
;
u64
arg0
;
u64
arg1
;
struct
cilkframe
*
frame
;
// parent cilkframe
__padout__
;
}
__mpalign__
;
struct
cilkstat
{
u64
push
;
u64
full
;
u64
pop
;
u64
steal
;
__padout__
;
}
__mpalign__
;
struct
cilkqueue
queue
[
NCPU
]
__mpalign__
;
struct
cilkstat
stat
[
NCPU
]
__mpalign__
;
static
struct
cilkqueue
*
cilk_cur
(
void
)
{
return
&
queue
[
mycpu
()
->
id
];
}
static
struct
cilkframe
*
cilk_frame
(
void
)
{
return
mycpu
()
->
cilkframe
;
}
static
struct
cilkstat
*
cilk_stat
(
void
)
{
return
&
stat
[
mycpu
()
->
id
];
}
static
int
__cilk_push
(
struct
cilkqueue
*
q
,
struct
cilkthread
*
t
)
{
int
i
;
i
=
q
->
head
;
if
((
i
-
q
->
tail
)
==
NSLOTS
)
{
cilk_stat
()
->
full
++
;
return
-
1
;
}
i
=
i
&
(
NSLOTS
-
1
);
q
->
thread
[
i
]
=
t
;
q
->
head
++
;
cilk_stat
()
->
push
++
;
return
0
;
}
static
struct
cilkthread
*
__cilk_pop
(
struct
cilkqueue
*
q
)
{
struct
qnode
qn
;
int
i
;
ql_lock
(
&
q
->
lock
,
&
qn
);
i
=
q
->
head
;
if
((
i
-
q
->
tail
)
==
0
)
{
ql_unlock
(
&
q
->
lock
,
&
qn
);
return
NULL
;
}
i
=
(
i
-
1
)
&
(
NSLOTS
-
1
);
q
->
head
--
;
ql_unlock
(
&
q
->
lock
,
&
qn
);
cilk_stat
()
->
pop
++
;
return
q
->
thread
[
i
];
}
static
struct
cilkthread
*
__cilk_steal
(
struct
cilkqueue
*
q
)
{
struct
qnode
qn
;
int
i
;
ql_lock
(
&
q
->
lock
,
&
qn
);
i
=
q
->
tail
;
if
((
i
-
q
->
head
)
==
0
)
{
ql_unlock
(
&
q
->
lock
,
&
qn
);
return
NULL
;
}
i
=
i
&
(
NSLOTS
-
1
);
q
->
tail
++
;
ql_unlock
(
&
q
->
lock
,
&
qn
);
cilk_stat
()
->
steal
++
;
return
q
->
thread
[
i
];
}
static
void
__cilk_run
(
struct
cilkthread
*
th
)
{
void
(
*
fn
)(
uptr
arg0
,
uptr
arg1
)
=
(
void
*
)
th
->
rip
;
struct
cilkframe
*
old
=
mycpu
()
->
cilkframe
;
mycpu
()
->
cilkframe
=
th
->
frame
;
fn
(
th
->
arg0
,
th
->
arg1
);
mycpu
()
->
cilkframe
=
old
;
subfetch
(
&
th
->
frame
->
ref
,
1
);
kfree
(
th
);
}
// Add the (rip, arg0, arg1) work to the local work queue.
// Guarantees some core will at some point execute the work.
// The current core might execute the work immediately.
void
cilk_push
(
void
*
rip
,
u64
arg0
,
u64
arg1
)
{
void
(
*
fn
)(
uptr
,
uptr
)
=
rip
;
struct
cilkthread
*
th
;
th
=
(
struct
cilkthread
*
)
kalloc
();
if
(
th
==
NULL
)
{
fn
(
arg0
,
arg1
);
return
;
}
th
->
rip
=
(
uptr
)
rip
;
th
->
arg0
=
arg0
;
th
->
arg1
=
arg1
;
th
->
frame
=
cilk_frame
();
if
(
__cilk_push
(
cilk_cur
(),
th
))
{
kfree
(
th
);
fn
(
arg0
,
arg1
);
}
else
fetchadd
(
&
cilk_frame
()
->
ref
,
1
);
}
// Try to execute one cilkthread.
// Check local queue then steal from other queues.
int
cilk_trywork
(
void
)
{
struct
cilkthread
*
th
;
int
i
;
pushcli
();
th
=
__cilk_pop
(
cilk_cur
());
if
(
th
!=
NULL
)
{
__cilk_run
(
th
);
popcli
();
return
1
;
}
// XXX(sbw) should be random
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
{
if
(
i
==
mycpu
()
->
id
)
continue
;
th
=
__cilk_steal
(
&
queue
[
i
]);
if
(
th
!=
NULL
)
{
__cilk_run
(
th
);
popcli
();
return
1
;
}
}
popcli
();
return
0
;
}
// Start a new work queue frame.
// We don't allow nested work queue frames.
void
cilk_start
(
void
)
{
pushcli
();
if
(
myproc
()
->
cilkframe
.
ref
!=
0
)
panic
(
"cilk_start"
);
mycpu
()
->
cilkframe
=
&
myproc
()
->
cilkframe
;
}
// End of the current work queue frame.
// The core works while the reference count of the current
// work queue frame is not 0.
void
cilk_end
(
void
)
{
while
(
cilk_frame
()
->
ref
!=
0
)
{
struct
cilkthread
*
th
;
int
i
;
while
((
th
=
__cilk_pop
(
cilk_cur
()))
!=
NULL
)
__cilk_run
(
th
);
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
{
th
=
__cilk_steal
(
&
queue
[
i
]);
if
(
th
!=
NULL
)
{
__cilk_run
(
th
);
break
;
}
}
}
mycpu
()
->
cilkframe
=
NULL
;
popcli
();
}
void
cilk_dump
(
void
)
{
int
i
;
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
cprintf
(
"push %lu full %lu pop %lu steal %lu
\n
"
,
stat
[
i
].
push
,
stat
[
i
].
full
,
stat
[
i
].
pop
,
stat
[
i
].
steal
);
}
static
void
__test_stub
(
uptr
a0
,
uptr
a1
)
{
//cprintf("%lu, %lu\n", a0, a1);
}
void
testcilk
(
void
)
{
enum
{
iters
=
1000
};
static
volatile
int
running
=
1
;
u64
e
,
s
;
int
i
;
pushcli
();
if
(
mycpu
()
->
id
==
0
)
{
microdelay
(
1
);
s
=
rdtsc
();
cilk_start
();
for
(
i
=
0
;
i
<
iters
;
i
++
)
cilk_push
(
__test_stub
,
i
,
i
);
cilk_end
();
e
=
rdtsc
();
cprintf
(
"testcilk: %lu
\n
"
,
(
e
-
s
)
/
iters
);
cilk_dump
();
running
=
0
;
}
else
{
while
(
running
)
cilk_trywork
();
}
popcli
();
}
void
initcilkframe
(
struct
cilkframe
*
cilk
)
{
memset
(
cilk
,
0
,
sizeof
(
*
cilk
));
}
void
initcilk
(
void
)
{
int
i
;
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
ql_init
(
&
queue
[
i
].
lock
,
"queue lock"
);
}
#endif // CILKENABLE
compiler.h
浏览文件 @
ae76c346
#define __padout__ char __padout[0] __attribute__((aligned(CACHELINE)))
#define __mpalign__ __attribute__((aligned(CACHELINE)))
#define __noret__ __attribute__((noreturn))
static
inline
void
barrier
(
void
)
{
__asm
volatile
(
""
:::
"memory"
);
}
console.c
浏览文件 @
ae76c346
...
...
@@ -162,10 +162,10 @@ kerneltrap(struct trapframe *tf)
kstack
=
myproc
()
->
kstack
;
}
__cprintf
(
"kernel trap %u cpu %u
\n
"
" tf: rip %p rsp %p
cr2
%p
\n
"
" tf: rip %p rsp %p
rbp %p cr2 %p cs
%p
\n
"
" proc: name %s pid %u kstack %p
\n
"
,
tf
->
trapno
,
mycpu
()
->
id
,
tf
->
rip
,
tf
->
rsp
,
rcr2
()
,
tf
->
rip
,
tf
->
rsp
,
tf
->
rbp
,
rcr2
(),
tf
->
cs
,
name
,
pid
,
kstack
);
printtrace
(
tf
->
rbp
);
...
...
@@ -235,6 +235,14 @@ consoleintr(int (*getc)(void))
case
C
(
'P'
):
// Process listing.
procdumpall
();
break
;
case
C
(
'E'
):
// Print user-space PCs.
for
(
u32
i
=
0
;
i
<
NCPU
;
i
++
)
cpus
[
i
].
timer_printpc
=
1
;
break
;
case
C
(
'T'
):
// Print user-space PCs and stack traces.
for
(
u32
i
=
0
;
i
<
NCPU
;
i
++
)
cpus
[
i
].
timer_printpc
=
2
;
break
;
case
C
(
'U'
):
// Kill line.
while
(
input
.
e
!=
input
.
w
&&
input
.
buf
[(
input
.
e
-
1
)
%
INPUT_BUF
]
!=
'\n'
){
...
...
@@ -248,8 +256,8 @@ consoleintr(int (*getc)(void))
consputc
(
BACKSPACE
);
}
break
;
case
C
(
'
W'
):
// Work queue
stats
wq
_dump
();
case
C
(
'
C'
):
// cilk
stats
cilk
_dump
();
break
;
case
C
(
'L'
):
// Prof stats
profdump
();
...
...
cpu.h
浏览文件 @
ae76c346
#include "mmu.h"
struct
wq
frame
;
struct
cilk
frame
;
// Per-CPU state
struct
cpu
{
...
...
@@ -10,7 +10,8 @@ struct cpu {
struct
segdesc
gdt
[
NSEGS
];
// x86 global descriptor table
struct
taskstate
ts
;
// Used by x86 to find stack for interrupt
struct
context
*
scheduler
;
// swtch() here to enter scheduler
struct
wqframe
*
wqframe
;
struct
cilkframe
*
cilkframe
;
int
timer_printpc
;
// Cpu-local storage variables; see below
struct
cpu
*
cpu
;
...
...
exec.c
浏览文件 @
ae76c346
...
...
@@ -190,7 +190,7 @@ exec(char *path, char **argv)
args
.
path
=
path
;
args
.
argv
=
argv
;
wq
_start
();
cilk
_start
();
for
(
i
=
0
,
off
=
elf
.
phoff
;
i
<
elf
.
phnum
;
i
++
,
off
+=
sizeof
(
ph
)){
Elf64_Word
type
;
if
(
readi
(
ip
,
(
char
*
)
&
type
,
...
...
@@ -199,7 +199,7 @@ exec(char *path, char **argv)
goto
bad
;
if
(
type
!=
ELF_PROG_LOAD
)
continue
;
wq
_push
(
dosegment
,
(
uptr
)
&
args
,
(
uptr
)
off
);
cilk
_push
(
dosegment
,
(
uptr
)
&
args
,
(
uptr
)
off
);
}
if
(
odp
)
{
...
...
@@ -210,14 +210,14 @@ exec(char *path, char **argv)
ip
=
0
;
}
wq
_push
(
doheap
,
(
uptr
)
&
args
,
(
uptr
)
0
);
cilk
_push
(
doheap
,
(
uptr
)
&
args
,
(
uptr
)
0
);
// dostack reads from the user address space. The wq
// stuff doesn't switch to the user address space.
//
wq
_push(dostack, (uptr)&args, (uptr)0);
//
cilk
_push(dostack, (uptr)&args, (uptr)0);
dostack
((
uptr
)
&
args
,
(
uptr
)
0
);
wq
_end
();
cilk
_end
();
// Commit to the user image.
oldvmap
=
myproc
()
->
vmap
;
...
...
kernel.h
浏览文件 @
ae76c346
...
...
@@ -12,9 +12,9 @@ static inline uptr v2p(void *a) { return (uptr) a - KBASE; }
static
inline
void
*
p2v
(
uptr
a
)
{
return
(
void
*
)
a
+
KBASE
;
}
struct
trapframe
;
struct
cilkframe
;
struct
spinlock
;
struct
condvar
;
struct
wqframe
;
struct
context
;
struct
vmnode
;
struct
inode
;
...
...
@@ -328,20 +328,27 @@ struct vmap * vmap_copy(struct vmap *, int);
// wq.c
#if WQENABLE
void
wq_push
(
void
*
rip
,
u64
arg0
,
u64
arg1
);
void
wq_start
(
void
);
void
wq_end
(
void
);
void
wq_dump
(
void
);
int
wq_trywork
(
void
);
void
initwqframe
(
struct
wqframe
*
wq
);
#else
#define wq_push(rip, arg0, arg1) do { \
#define wq_trywork() 0
#endif
// cilk.c
#if CILKENABLE
void
cilk_push
(
void
*
rip
,
u64
arg0
,
u64
arg1
);
void
cilk_start
(
void
);
void
cilk_end
(
void
);
void
cilk_dump
(
void
);
int
cilk_trywork
(
void
);
void
initcilkframe
(
struct
cilkframe
*
wq
);
#else
#define cilk_push(rip, arg0, arg1) do { \
void (*fn)(uptr, uptr) = rip; \
fn(arg0, arg1); \
} while(0)
#define
wq
_start() do { } while(0)
#define
wq
_end() do { } while(0)
#define
wq
_dump() do { } while(0)
#define
wq
_trywork() 0
#define init
wq
frame(x) do { } while (0)
#define
cilk
_start() do { } while(0)
#define
cilk
_end() do { } while(0)
#define
cilk
_dump() do { } while(0)
#define
cilk
_trywork() 0
#define init
cilk
frame(x) do { } while (0)
#endif
lockstat.h
浏览文件 @
ae76c346
...
...
@@ -42,3 +42,4 @@ struct klockstat {
#define LOCKSTAT_PROC 1
#define LOCKSTAT_SCHED 1
#define LOCKSTAT_VM 1
#define LOCKSTAT_WQ 1
main.c
浏览文件 @
ae76c346
...
...
@@ -24,6 +24,7 @@ extern void initdisk(void);
extern
void
inituser
(
void
);
extern
void
inithz
(
void
);
extern
void
initwq
(
void
);
extern
void
initcilk
(
void
);
extern
void
initsamp
(
void
);
extern
void
initpci
(
void
);
extern
void
initnet
(
void
);
...
...
@@ -103,8 +104,9 @@ cmain(u64 mbmagic, u64 mbaddr)
initbio
();
// buffer cache
initinode
();
// inode cache
initdisk
();
// disk
#if WQENABLE
initwq
();
// work queues
initwq
();
#if CILKENABLE
initcilk
();
#endif
initsamp
();
initlockstat
();
...
...
mapbench.c
浏览文件 @
ae76c346
...
...
@@ -51,7 +51,7 @@ main(int ac, char **av)
int
nthread
=
atoi
(
av
[
1
]);
acquire
(
&
l
);
printf
(
1
,
"mapbench[%d]: start esp %x, nthread=%d
\n
"
,
getpid
(),
rrsp
(),
nthread
);
//
printf(1, "mapbench[%d]: start esp %x, nthread=%d\n", getpid(), rrsp(), nthread);
for
(
int
i
=
0
;
i
<
nthread
;
i
++
)
{
sbrk
(
8192
);
...
...
@@ -72,7 +72,7 @@ main(int ac, char **av)
acquire
(
&
l
);
}
release
(
&
l
);
printf
(
1
,
"mapbench[%d]: done
\n
"
,
getpid
());
//
printf(1, "mapbench[%d]: done\n", getpid());
for
(
int
i
=
0
;
i
<
nthread
;
i
++
)
wait
();
...
...
param.h
浏览文件 @
ae76c346
...
...
@@ -13,7 +13,7 @@
#define CACHELINE 64 // cache line size
#define CPUKSTACKS (NPROC + NCPU)
#define QUANTUM 10 // scheduling time quantum and tick length (in msec)
#define
WQSHIFT
4 // 2^WORKSHIFT work queue slots
#define
CILKSHIFT
4 // 2^WORKSHIFT work queue slots
#define VICTIMAGE 1000000 // cycles a proc executes before an eligible victim
#define VERBOSE 0 // print kernel diagnostics
#define SPINLOCK_DEBUG 1 // Debug spin locks
...
...
@@ -21,20 +21,19 @@
#define VERIFYFREE LOCKSTAT
#define ALLOC_MEMSET 1
#define KSHAREDSIZE (32 << 10)
#define WQENABLE 1
#define WQSHIFT 4
#if defined(HW_josmp)
#define NCPU 16 // maximum number of CPUs
#define MTRACE 0
#define WQENABLE 0 // Enable work queue
#define PERFSIZE (1<<30ull)
#elif defined(HW_qemu)
#define NCPU 4 // maximum number of CPUs
#define MTRACE 0
#define WQENABLE 1 // Enable work queue
#define PERFSIZE (16<<20ull)
#elif defined(HW_ud0)
#define NCPU 4 // maximum number of CPUs
#define MTRACE 0
#define WQENABLE 0 // Enable work queue
#define PERFSIZE (512<<20ull)
#else
#error "Unknown HW"
...
...
proc.c
浏览文件 @
ae76c346
...
...
@@ -206,7 +206,7 @@ allocproc(void)
snprintf
(
p
->
lockname
,
sizeof
(
p
->
lockname
),
"cv:proc:%d"
,
p
->
pid
);
initlock
(
&
p
->
lock
,
p
->
lockname
+
3
,
LOCKSTAT_PROC
);
initcondvar
(
&
p
->
cv
,
p
->
lockname
);
init
wqframe
(
&
p
->
wq
frame
);
init
cilkframe
(
&
p
->
cilk
frame
);
if
(
ns_insert
(
nspid
,
KI
(
p
->
pid
),
(
void
*
)
p
)
<
0
)
panic
(
"allocproc: ns_insert"
);
...
...
proc.h
浏览文件 @
ae76c346
...
...
@@ -13,7 +13,7 @@ struct context {
}
__attribute__
((
packed
));
// Work queue frame
struct
wq
frame
{
struct
cilk
frame
{
volatile
u64
ref
;
};
...
...
@@ -63,7 +63,7 @@ struct proc {
struct
mtrace_stacks
mtrace_stacks
;
#endif
struct
runq
*
runq
;
struct
wqframe
wq
frame
;
struct
cilkframe
cilk
frame
;
STAILQ_ENTRY
(
proc
)
runqlink
;
struct
condvar
*
oncv
;
// Where it is sleeping, for kill()
...
...
trap.c
浏览文件 @
ae76c346
...
...
@@ -71,8 +71,21 @@ trap(struct trapframe *tf)
switch
(
tf
->
trapno
){
case
T_IRQ0
+
IRQ_TIMER
:
if
(
mycpu
()
->
timer_printpc
)
{
cprintf
(
"cpu%d: proc %s rip %lx rsp %lx cs %x
\n
"
,
mycpu
()
->
id
,
myproc
()
?
myproc
()
->
name
:
"(none)"
,
tf
->
rip
,
tf
->
rsp
,
tf
->
cs
);
if
(
mycpu
()
->
timer_printpc
==
2
&&
tf
->
rbp
>
KBASE
)
{
uptr
pc
[
10
];
getcallerpcs
((
void
*
)
tf
->
rbp
,
pc
,
NELEM
(
pc
));
for
(
int
i
=
0
;
i
<
10
&&
pc
[
i
];
i
++
)
cprintf
(
"cpu%d: %lx
\n
"
,
mycpu
()
->
id
,
pc
[
i
]);
}
mycpu
()
->
timer_printpc
=
0
;
}
if
(
mycpu
()
->
id
==
0
)
cv_tick
();
cv_tick
();
lapiceoi
();
break
;
case
T_IRQ0
+
IRQ_IDE
:
...
...
wq.c
浏览文件 @
ae76c346
// cilk style run queue
// A work queue is built from NCPU per-core wqueues.
// A core pushes work to the head of its per-core wqueue.
// A core pops work from the head of its per-core wqueue.
// A core pops work from the tail of another core's per-core wqueue.
//
// Usage:
// void goo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// cprintf("goo\n");
// arg[1] = 'g';
// }
// void foo(uptr a0, uptr a1) {
// char *arg = (char*) a0;
// wq_push(goo, a0, 0);
// arg[0] = 'f';
// cprintf("foo\n");
// }
// void example(void) {
// char arg[2];
// wq_start();
// wq_push(foo, (uptr)arg, 0);
// cprintf("example\n");
// wq_end();
// cprintf("%c %c\n", arg[0], arg[1]);
// }
#if WQENABLE
#include "types.h"
#include "kernel.h"
#include "spinlock.h"
#include "amd64.h"
#include "cpu.h"
#include "bits.h"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.h"
#include "mtrace.h"
#include "qlock.h"
#include "wq.h"
#define NSLOTS (1 << WQSHIFT)
struct
wqueue
{
struct
wqthread
*
thread
[
NSLOTS
];
struct
work
*
w
[
NSLOTS
];
volatile
int
head
__mpalign__
;
qlock_t
lock
;
volatile
int
tail
;
struct
spinlock
lock
;
__padout__
;
}
__mpalign__
;
struct
wqthread
{
u64
rip
;
u64
arg0
;
u64
arg1
;
struct
wqframe
*
frame
;
// parent wqframe
__padout__
;
}
__mpalign__
;
}
__mpalign__
;;
struct
wqstat
{
u64
push
;
...
...
@@ -68,145 +28,163 @@ struct wqstat {
struct
wqueue
queue
[
NCPU
]
__mpalign__
;
struct
wqstat
stat
[
NCPU
]
__mpalign__
;
static
struct
wqueue
*
wq_cur
(
void
)
static
inline
struct
wqueue
*
getwq
(
void
)
{
return
&
queue
[
mycpu
()
->
id
];
pushcli
();
return
&
queue
[
cpunum
()];
}
static
struct
wqframe
*
wq_frame
(
void
)
static
inline
void
putwq
(
struct
wqueue
*
wq
)
{
return
mycpu
()
->
wqframe
;
popcli
()
;
}
static
struct
wqstat
*
static
inline
struct
wqstat
*
wq_stat
(
void
)
{
return
&
stat
[
mycpu
()
->
id
];
return
&
stat
[
cpunum
()];
}
static
struct
work
*
allocwork
(
void
)
{
return
(
struct
work
*
)
kalloc
();
}
static
void
freework
(
struct
work
*
w
)
{
kfree
(
w
);
}
static
int
__wq_push
(
struct
wqueue
*
q
,
struct
wqthread
*
t
)
int
wq_push
(
struct
work
*
w
)
{
int
i
;
i
=
q
->
head
;
if
((
i
-
q
->
tail
)
==
NSLOTS
)
{
struct
wqueue
*
wq
=
getwq
();
i
=
wq
->
head
;
if
((
i
-
wq
->
tail
)
==
NSLOTS
)
{
wq_stat
()
->
full
++
;
return
-
1
;
}
i
=
i
&
(
NSLOTS
-
1
);
q
->
thread
[
i
]
=
t
;
q
->
head
++
;
wq
->
w
[
i
]
=
w
;
barrier
()
;
wq
->
head
++
;
wq_stat
()
->
push
++
;
putwq
(
wq
);
return
0
;
}
static
struct
wqthread
*
__wq_pop
(
struct
wqueue
*
q
)
int
wq_push1
(
void
(
*
fn
)(
struct
work
*
w
,
void
*
a0
),
void
*
a0
)
{
struct
qnode
qn
;
int
i
;
struct
work
*
w
=
allocwork
();
if
(
w
==
NULL
)
return
-
1
;
w
->
rip
=
fn
;
w
->
arg0
=
a0
;
if
(
wq_push
(
w
)
<
0
)
{
freework
(
w
);
return
-
1
;
}
return
0
;
}
int
wq_push2
(
void
(
*
fn
)(
struct
work
*
,
void
*
,
void
*
),
void
*
a0
,
void
*
a1
)
{
struct
work
*
w
=
allocwork
();
if
(
w
==
NULL
)
return
-
1
;
w
->
rip
=
fn
;
w
->
arg0
=
a0
;
w
->
arg1
=
a1
;
if
(
wq_push
(
w
)
<
0
)
{
freework
(
w
);
return
-
1
;
}
return
0
;
}
ql_lock
(
&
q
->
lock
,
&
qn
);
i
=
q
->
head
;
if
((
i
-
q
->
tail
)
==
0
)
{
ql_unlock
(
&
q
->
lock
,
&
qn
);
static
struct
work
*
__wq_pop
(
int
c
)
{
// Called with cli
struct
wqueue
*
wq
=
&
queue
[
c
];
struct
work
*
w
;
int
i
;
acquire
(
&
wq
->
lock
);
i
=
wq
->
head
;
if
((
i
-
wq
->
tail
)
==
0
)
{
release
(
&
wq
->
lock
);
return
NULL
;
}
i
=
(
i
-
1
)
&
(
NSLOTS
-
1
);
q
->
head
--
;
ql_unlock
(
&
q
->
lock
,
&
qn
);
w
=
wq
->
w
[
i
];
wq
->
head
--
;
release
(
&
wq
->
lock
);
wq_stat
()
->
pop
++
;
return
q
->
thread
[
i
]
;
return
w
;
}
static
struct
w
qthread
*
__wq_steal
(
struct
wqueue
*
q
)
static
struct
w
ork
*
__wq_steal
(
int
c
)
{
struct
qnode
qn
;
// Called with cli
struct
wqueue
*
wq
=
&
queue
[
c
];
struct
work
*
w
;
int
i
;
ql_lock
(
&
q
->
lock
,
&
qn
);
i
=
q
->
tail
;
if
((
i
-
q
->
head
)
==
0
)
{
ql_unlock
(
&
q
->
lock
,
&
qn
);
acquire
(
&
wq
->
lock
);
i
=
w
q
->
tail
;
if
((
i
-
w
q
->
head
)
==
0
)
{
release
(
&
wq
->
lock
);
return
NULL
;
}
i
=
i
&
(
NSLOTS
-
1
);
q
->
tail
++
;
ql_unlock
(
&
q
->
lock
,
&
qn
);
w
=
wq
->
w
[
i
];
wq
->
tail
++
;
release
(
&
wq
->
lock
);
wq_stat
()
->
steal
++
;
return
q
->
thread
[
i
]
;
return
w
;
}
static
void
__wq_run
(
struct
w
qthread
*
th
)
__wq_run
(
struct
w
ork
*
w
)
{
void
(
*
fn
)(
uptr
arg0
,
uptr
arg1
)
=
(
void
*
)
th
->
rip
;
struct
wqframe
*
old
=
mycpu
()
->
wqframe
;
mycpu
()
->
wqframe
=
th
->
frame
;
fn
(
th
->
arg0
,
th
->
arg1
);
mycpu
()
->
wqframe
=
old
;
subfetch
(
&
th
->
frame
->
ref
,
1
);
kfree
(
th
);
void
(
*
fn
)(
struct
work
*
,
void
*
,
void
*
)
=
w
->
rip
;
fn
(
w
,
w
->
arg0
,
w
->
arg1
);
freework
(
w
);
}
// Add the (rip, arg0, arg1) work to the local work queue.
// Guarantees some core will at some point execute the work.
// The current core might execute the work immediately.
void
wq_push
(
void
*
rip
,
u64
arg0
,
u64
arg1
)
{
void
(
*
fn
)(
uptr
,
uptr
)
=
rip
;
struct
wqthread
*
th
;
th
=
(
struct
wqthread
*
)
kalloc
();
if
(
th
==
NULL
)
{
fn
(
arg0
,
arg1
);
return
;
}
th
->
rip
=
(
uptr
)
rip
;
th
->
arg0
=
arg0
;
th
->
arg1
=
arg1
;
th
->
frame
=
wq_frame
();
if
(
__wq_push
(
wq_cur
(),
th
))
{
kfree
(
th
);
fn
(
arg0
,
arg1
);
}
else
fetchadd
(
&
wq_frame
()
->
ref
,
1
);
}
// Try to execute one wqthread.
// Check local queue then steal from other queues.
int
wq_trywork
(
void
)
{
struct
w
qthread
*
th
;
struct
w
ork
*
w
;
int
i
;
pushcli
();
th
=
__wq_pop
(
wq_cur
()
);
if
(
th
!=
NULL
)
{
__wq_run
(
th
);
w
=
__wq_pop
(
mycpu
()
->
id
);
if
(
w
!=
NULL
)
{
__wq_run
(
w
);
popcli
();
return
1
;
}
// XXX(sbw) should be random
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
{
if
(
i
==
mycpu
()
->
id
)
continue
;
th
=
__wq_steal
(
&
queue
[
i
]
);
if
(
th
!=
NULL
)
{
__wq_run
(
th
);
w
=
__wq_steal
(
i
);
if
(
w
!=
NULL
)
{
__wq_run
(
w
);
popcli
();
return
1
;
}
...
...
@@ -216,42 +194,6 @@ wq_trywork(void)
return
0
;
}
// Start a new work queue frame.
// We don't allow nested work queue frames.
void
wq_start
(
void
)
{
pushcli
();
if
(
myproc
()
->
wqframe
.
ref
!=
0
)
panic
(
"wq_start"
);
mycpu
()
->
wqframe
=
&
myproc
()
->
wqframe
;
}
// End of the current work queue frame.
// The core works while the reference count of the current
// work queue frame is not 0.
void
wq_end
(
void
)
{
while
(
wq_frame
()
->
ref
!=
0
)
{
struct
wqthread
*
th
;
int
i
;
while
((
th
=
__wq_pop
(
wq_cur
()))
!=
NULL
)
__wq_run
(
th
);
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
{
th
=
__wq_steal
(
&
queue
[
i
]);
if
(
th
!=
NULL
)
{
__wq_run
(
th
);
break
;
}
}
}
mycpu
()
->
wqframe
=
NULL
;
popcli
();
}
void
wq_dump
(
void
)
{
...
...
@@ -262,31 +204,35 @@ wq_dump(void)
}
static
void
__test_stub
(
uptr
a0
,
uptr
a1
)
__test_stub
(
struct
work
*
w
,
void
*
a0
,
void
*
a1
)
{
//cprintf("%lu, %lu\n", a0, a1);
//long i = (long)a0;
//cprintf("%u: %lu\n", cpunum(), i);
volatile
int
*
running
=
a1
;
subfetch
(
running
,
1
);
}
void
testwq
(
void
)
{
enum
{
iters
=
10
00
};
static
volatile
int
running
=
1
;
enum
{
iters
=
10
};
static
volatile
int
running
=
iters
;
u64
e
,
s
;
int
i
;
long
i
;
pushcli
();
if
(
mycpu
()
->
id
==
0
)
{
microdelay
(
1
);
s
=
rdtsc
();
wq_start
();
for
(
i
=
0
;
i
<
iters
;
i
++
)
wq_push
(
__test_stub
,
i
,
i
);
wq_end
();
for
(
i
=
0
;
i
<
iters
;
i
++
)
{
if
(
wq_push2
(
__test_stub
,
(
void
*
)
i
,
(
void
*
)
&
running
)
<
0
)
panic
(
"testwq: oops"
);
}
e
=
rdtsc
();
cprintf
(
"testwq: %lu
\n
"
,
(
e
-
s
)
/
iters
);
while
(
running
)
nop_pause
();
wq_dump
();
running
=
0
;
}
else
{
while
(
running
)
wq_trywork
();
...
...
@@ -295,17 +241,12 @@ testwq(void)
}
void
initwqframe
(
struct
wqframe
*
wq
)
{
memset
(
wq
,
0
,
sizeof
(
*
wq
));
}
void
initwq
(
void
)
{
int
i
;
for
(
i
=
0
;
i
<
NCPU
;
i
++
)
ql_init
(
&
queue
[
i
].
lock
,
"queue lock"
);
initlock
(
&
queue
[
i
].
lock
,
"wq lock"
,
LOCKSTAT_WQ
);
}
#endif // WQENABLE
wq.h
0 → 100644
浏览文件 @
ae76c346
struct
work
{
void
*
rip
;
void
*
arg0
;
void
*
arg1
;
char
data
[];
};
编写
预览
您添加了
0
人
到此讨论。请谨慎行事。
请先完成此评论的编辑!
取消
请
注册
或者
登录
后发表评论