提交 844f2375 创建 作者: David Benjamin's avatar David Benjamin

Merge branch 'scale-amd64' of git+ssh://pdos.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

......@@ -8,6 +8,7 @@ QEMUSRC ?= ../mtrace
MTRACE ?= $(QEMU)
HW ?= qemu
EXCEPTIONS ?= y
RUN ?= $(empty)
O = o.$(HW)
ifdef USE_CLANG
......@@ -97,7 +98,8 @@ $(O)/fs.img: $(O)/tools/mkfs README $(UPROGS)
##
QEMUOPTS = -smp $(QEMUSMP) -m 512 -serial mon:stdio -nographic \
-net user -net nic,model=e1000 \
-redir tcp:2323::23 -redir tcp:8080::80
-redir tcp:2323::23 -redir tcp:8080::80 \
$(if $(RUN),-append "\$$ $(RUN)",)
qemu: $(KERN)
$(QEMU) $(QEMUOPTS) -kernel $(KERN)
......
......@@ -6,6 +6,7 @@ UPROGS= \
cat \
du \
echo \
exechack \
init \
forkexectree \
forkexecbench \
......@@ -30,7 +31,7 @@ UPROGS= \
wqsh \
cp \
perf \
pmc \
xtime \
asharing \
xls \
xdu \
......
......@@ -6,6 +6,8 @@
#include "mtrace.h"
#include "pthread.h"
#include <sys/mman.h>
static int cpu;
static pthread_barrier_t bar;
enum { ncore = 8 };
......@@ -27,11 +29,11 @@ vmsharing(void* arg)
u64 i = (u64) arg;
volatile char *p = (char*)(0x40000UL + i * 4096);
if (map((void *) p, 4096) < 0)
die("map failed");
if (mmap((void *) p, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) < 0)
die("mmap failed");
if (unmap((void *) p, 4096) < 0)
die("unmap failed");
if (munmap((void *) p, 4096) < 0)
die("munmap failed");
return 0;
}
......
#include "types.h"
#include "stat.h"
#include "user.h"
#include "mtrace.h"
#include "amd64.h"
#define NITERS 100
#define PROCMAX NCPU
#define PROCMIN (NCPU-4)
static void
worker0(void)
{
const char* av[] = { "exechack", "w", 0 };
exec(av[0], av);
die("worker exec");
}
static void
worker1(void)
{
exit();
}
static void
master(void)
{
u64 pcount = 0;
u64 i = 0;
u64 t0 = rdtsc();
while (i < NITERS) {
while (pcount < PROCMAX) {
int pid;
pid = fork(0);
if (pid < 0)
die("master fork");
if (pid == 0)
worker0();
pcount++;
}
while (pcount > PROCMIN) {
if (wait() < 0)
die("master wait");
pcount--;
i++;
}
}
while (pcount) {
wait();
pcount--;
}
u64 t1 = rdtsc();
printf("%lu\n", (t1-t0)/i);
}
int
main(int ac, char **av)
{
if (ac > 1 && av[1][0] == 'w')
worker1();
master();
}
......@@ -37,7 +37,7 @@ int
main(int ac, char **av)
{
if (ac == 2)
exit();
exit();
execbench();
exit();
}
......@@ -5,6 +5,7 @@
#include "user.h"
#include "fcntl.h"
#include "lib.h"
#include "major.h"
static const char *sh_argv[] = { "sh", 0 };
static const char *app_argv[][MAXARG] = {
......@@ -14,6 +15,17 @@ static const char *app_argv[][MAXARG] = {
#endif
};
static struct {
const char* name;
int major;
} dev[] = {
{ "/dev/netif", MAJ_NETIF },
{ "/dev/sampler", MAJ_SAMPLER },
{ "/dev/lockstat", MAJ_LOCKSTAT },
{ "/dev/stat", MAJ_STAT },
{ "/dev/cmdline", MAJ_CMDLINE},
};
static int
startone(const char **argv)
{
......@@ -32,6 +44,30 @@ startone(const char **argv)
return pid;
}
static void
runcmdline(void)
{
const char* argv[3] = { "sh", 0, 0 };
char buf[256];
char* b;
long r;
int fd;
fd = open("/dev/cmdline", O_RDONLY);
if (fd < 0)
return;
r = read(fd, buf, sizeof(buf)-1);
if (r < 0)
return;
buf[r] = 0;
if ((b = strchr(buf, '$'))) {
argv[1] = b+1;
startone(argv);
}
}
int
main(void)
{
......@@ -45,16 +81,15 @@ main(void)
dup(0); // stderr
mkdir("dev");
if (mknod("/dev/netif", 2, 1) < 0)
fprintf(2, "init: mknod netif failed\n");
if (mknod("/dev/sampler", 3, 1) < 0)
fprintf(2, "init: mknod sampler failed\n");
if (mknod("/dev/lockstat", 4, 1) < 0)
fprintf(2, "init: mknod lockstat failed\n");
for (int i = 0; i < NELEM(dev); i++)
if (mknod(dev[i].name, dev[i].major, 1) < 0)
fprintf(2, "init: mknod %s failed\n", dev[i].name);
for (u32 i = 0; i < NELEM(app_argv); i++)
startone(app_argv[i]);
runcmdline();
for(;;){
pid = startone(sh_argv);
while((wpid=wait()) >= 0 && wpid != pid)
......
......@@ -6,6 +6,8 @@
#include "mtrace.h"
#include "pthread.h"
#include <sys/mman.h>
enum { readaccess = 1 };
enum { verbose = 0 };
enum { npg = 1 };
......@@ -33,7 +35,8 @@ thr(void *arg)
fprintf(1, "%d: %d ops\n", tid, i);
volatile char *p = (char*) (0x100000000UL + tid * npg * 0x100000);
if (map((void *) p, npg * 4096) < 0) {
if (mmap((void *) p, npg * 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) == MAP_FAILED) {
fprintf(1, "%d: map failed\n", tid);
exit();
}
......@@ -43,7 +46,7 @@ thr(void *arg)
p[j] = '\0';
}
if (unmap((void *) p, npg * 4096) < 0) {
if (munmap((void *) p, npg * 4096) < 0) {
fprintf(1, "%d: unmap failed\n", tid);
exit();
}
......
......@@ -6,6 +6,8 @@
#include "uspinlock.h"
#include "pthread.h"
#include <sys/mman.h>
static volatile char *p;
static struct uspinlock l;
static volatile int state;
......@@ -50,7 +52,8 @@ int
main(void)
{
p = (char *) 0x80000;
if (map((void *) p, 8192) < 0) {
if (mmap((void *) p, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) < 0) {
fprintf(1, "map failed\n");
exit();
}
......@@ -73,7 +76,7 @@ main(void)
fprintf(1, "shm ok\n");
if (unmap((void *) p, 8192) < 0) {
if (munmap((void *) p, 8192) < 0) {
fprintf(1, "unmap failed\n");
exit();
}
......
......@@ -5,11 +5,13 @@
#include "futex.h"
#include "errno.h"
#include "atomic.hh"
#include "mtrace.h"
static volatile std::atomic<u64> waiting;
static volatile std::atomic<u64> waking;
static int iters;
static int nworkers;
static volatile int go;
static struct {
u64 mem;
......@@ -24,6 +26,11 @@ void* worker0(void* x)
u64* f = &(ftx[id>>1].mem);
long r;
setaffinity(id);
while (go == 0)
yield();
if (id & 0x1) {
for (u64 i = 0; i < iters; i++) {
r = futex(f, FUTEX_WAIT, (u64)(i<<1), 0);
......@@ -50,6 +57,7 @@ void* worker0(void* x)
static
void master0(void)
{
go = 1;
for (int i = 0; i < nworkers; i++)
wait();
}
......@@ -75,9 +83,11 @@ main(int ac, char** av)
}
nsleep(1000*1000);
mtenable("xv6-schedbench");
u64 t0 = rdtsc();
master0();
u64 t1 = rdtsc();
mtdisable("xv6-schedbench");
printf("%lu\n", (t1-t0)/iters);
}
......
......@@ -142,7 +142,7 @@ getcmd(char *buf, int nbuf)
}
int
main(void)
main(int ac, char** av)
{
static char buf[100];
int fd;
......@@ -154,7 +154,30 @@ main(void)
break;
}
}
// If args, concatenate them parse as a command.
if (ac > 1) {
char* b = buf;
char* e = b+sizeof(buf);
for (int i = 1; i < ac; i++) {
int n;
n = strlen(av[i]);
if (b+n+1 > e)
die("sh: too long");
strcpy(b, av[i]);
b += n;
if (b+1+1 > e)
die("sh: too long");
strcpy(b, " ");
b++;
}
if(fork1() == 0)
runcmd(parsecmd(buf));
wait();
exit();
}
// Read and run input commands.
while(getcmd(buf, sizeof(buf)) >= 0){
if(buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' '){
......
......@@ -6,6 +6,8 @@
#include "traps.h"
#include "pthread.h"
#include <sys/mman.h>
char buf[2048];
char name[3];
const char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 };
......@@ -1712,24 +1714,26 @@ unmappedtest(void)
printf("unmappedtest\n");
for (int i = 1; i <= 8; i++) {
int r = map((void*)off, i*4096);
if (r < 0)
void *p = mmap((void*)off, i*4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED)
die("unmappedtest: map failed");
off += (i*2*4096);
}
for (int i = 8; i >= 1; i--) {
long r = map(0, i*4096);
if (r < 0)
void *p = mmap(0, i*4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED)
die("unmappedtest: map failed");
r = unmap((void*)r, i*4096);
int r = munmap(p, i*4096);
if (r < 0)
die("unmappedtest: unmap failed");
}
off = 0x1000;
for (int i = 1; i <= 8; i++) {
int r = unmap((void*)off, i*4096);
int r = munmap((void*)off, i*4096);
if (r < 0)
die("unmappedtest: unmap failed");
off += (i*2*4096);
......@@ -1737,6 +1741,24 @@ unmappedtest(void)
printf("unmappedtest ok\n");
}
static int nenabled;
static char **enabled;
void
run_test(const char *name, void (*test)())
{
if (!nenabled) {
test();
} else {
for (int i = 0; i < nenabled; i++) {
if (strcmp(name, enabled[i]) == 0) {
test();
break;
}
}
}
}
int
main(int argc, char *argv[])
{
......@@ -1748,47 +1770,52 @@ main(int argc, char *argv[])
}
close(open("usertests.ran", O_CREATE));
unopentest();
bigargtest();
bsstest();
sbrktest();
nenabled = argc - 1;
enabled = argv + 1;
#define TEST(name) run_test(#name, name)
TEST(unopentest);
TEST(bigargtest);
TEST(bsstest);
TEST(sbrktest);
// we should be able to grow a user process to consume all phys mem
unmappedtest();
validatetest();
opentest();
writetest();
writetest1();
createtest();
preads();
// mem();
pipe1();
preempt();
exitwait();
rmdot();
thirteen();
longname();
bigfile();
subdir();
concreate();
linktest();
unlinkread();
createdelete();
twofiles();
sharedfd();
dirfile();
iref();
forktest();
bigdir(); // slow
tls_test();
thrtest();
ftabletest();
exectest();
TEST(unmappedtest);
TEST(validatetest);
TEST(opentest);
TEST(writetest);
TEST(writetest1);
TEST(createtest);
TEST(preads);
// TEST(mem);
TEST(pipe1);
TEST(preempt);
TEST(exitwait);
TEST(rmdot);
TEST(thirteen);
TEST(longname);
TEST(bigfile);
TEST(subdir);
TEST(concreate);
TEST(linktest);
TEST(unlinkread);
TEST(createdelete);
TEST(twofiles);
TEST(sharedfd);
TEST(dirfile);
TEST(iref);
TEST(forktest);
TEST(bigdir); // slow
TEST(tls_test);
TEST(thrtest);
TEST(ftabletest);
TEST(exectest);
exit();
}
......@@ -17,6 +17,8 @@ static struct selector pmc_selector[] = {
{ "not halted", CMN|0x76 },
{ "remote probes", CMN|(0x4|0x8)<<8|0xec },
{ "L2 misses", CMN|(0x2|0x8)<<8|0x7e },
{ "MAB requests", CMN|(0x1)<<8|0x68 },
{ "MAB cycles", CMN|(0x1)<<8|0x69 },
};
static const char*
......@@ -48,28 +50,31 @@ main(int ac, const char *av[])
xav = &xav[1];
}
sys_stat* s0 = sys_stat::read();
pmc_count::config(pmc_selector[pmci].sel);
pmc_count pmc0 = pmc_count::read(0);
u64 t0 = rdtsc();
int pid = fork(0);
if (pid < 0) {
fprintf(1, "pmc: fork failed\n");
exit();
}
if (pid < 0)
die("xtime: fork failed");
if (pid == 0) {
exec(xav[0], xav);
fprintf(1, "pmc: exec failed\n");
exit();
die("xtime: exec failed");
}
wait();
sys_stat* s1 = sys_stat::read();
pmc_count pmc1 = pmc_count::read(0);
u64 t1 = rdtsc();
sys_stat* s2 = s1->delta(s0);
fprintf(1, "%s cycles\n", valstr(t1-t0));
fprintf(1, "%s %s\n", valstr(pmc1.delta(pmc0).sum()),
pmc_selector[pmci].name);
u64 tot = s2->busy() + s2->idle();
fprintf(1, ".%lu idle\n", (s2->idle()*100)/tot);
exit();
}
......@@ -251,6 +251,18 @@ rcr2(void)
return val;
}
static inline void
prefetchw(void *a)
{
__asm volatile("prefetchw (%0)" : : "r" (a));
}
static inline void
prefetch(void *a)
{
__asm volatile("prefetch (%0)" : : "r" (a));
}
// Layout of the trap frame built on the stack by the
// hardware and by trapasm.S, and passed to trap().
// Also used by sysentry (but sparsely populated).
......
#if defined(HW_ben)
#define lapicstartap x2apicstartap
#define lapiceoi x2apiceoi
#define lapic_tlbflush x2apic_tlbflush
#define lapic_sampconf x2apic_sampconf
#define lapicpc x2apicpc
#define lapicid x2apicid
#define initlapic initx2apic
#else
#define lapicstartap xapicstartap
#define lapiceoi xapiceoi
#define lapic_tlbflush xapic_tlbflush
#define lapic_sampconf xapic_sampconf
#define lapicpc xapicpc
#define lapicid xapicid
#define initlapic initxapic
#endif
// xapic.cc
void xapicstartap(hwid_t, u32 addr);
void xapiceoi(void);
void xapic_tlbflush(hwid_t);
void xapic_sampconf(hwid_t);
void xapicpc(char mask);
hwid_t xapicid(void);
void initxapic(void);
// x2apic.cc
void x2apicstartap(hwid_t, u32 addr);
void x2apiceoi(void);
void x2apic_tlbflush(hwid_t);
void x2apic_sampconf(hwid_t);
void x2apicpc(char mask);
hwid_t x2apicid(void);
void initx2apic(void);
......@@ -81,7 +81,12 @@
// CPUID function 0x00000001
#define CPUID_FEATURES 0x00000001
#define FEATURE_ECX_MWAIT (1 << 3)
#define FEATURE_EBX_APIC(x) (((x) >> 24) & 0xff)
// CPUID function 0x00000005
#define CPUID_MWAIT 0x00000005
// APIC Base Address Register MSR
#define MSR_APIC_BAR 0x0000001b
#define APIC_BAR_XAPIC_EN (1 << 11)
#define APIC_BAR_X2APIC_EN (1 << 10)
#pragma once
template<class A, class B>
class pair {
public:
A _a;
B _b;
#include <type_traits>
#include <utility>
pair(const A &a, const B &b) : _a(a), _b(b) {}
bool operator==(const pair<A, B> &other) {
return _a == other._a && _b == other._b;
}
};
using std::pair;
using std::make_pair;
template<int N>
class strbuf {
......@@ -27,13 +20,6 @@ class strbuf {
}
};
template<class A, class B>
pair<A, B>
mkpair(const A &a, const B &b)
{
return pair<A, B>(a, b);
}
class scoped_acquire {
private:
spinlock *_l;
......@@ -48,25 +34,6 @@ class scoped_acquire {
class retryable {};
namespace std {
template<class T>
struct remove_reference
{ typedef T type; };
template<class T>
struct remove_reference<T&>
{ typedef T type; };
template<class T>
struct remove_reference<T&&>
{ typedef T type; };
template<class T>
typename remove_reference<T>::type&&
move(T&& a)
{
return static_cast<typename remove_reference<T>::type&&>(a);
}
struct ostream { int next_width; };
extern ostream cout;
......
......@@ -7,6 +7,8 @@ using std::atomic;
extern atomic<u64> tlbflush_req;
class uwq;
// Per-CPU state
struct cpu {
cpuid_t id; // Index into cpus[] below
......@@ -20,12 +22,14 @@ struct cpu {
atomic<u64> tlbflush_done; // last tlb flush req done on this cpu
struct proc *prev; // The previously-running process
uwq* uwq __mpalign__; // Current userspace wq (might be nullptr)
hwid_t hwid __mpalign__; // Local APIC ID, accessed by other CPUs
// Cpu-local storage variables; see below
struct cpu *cpu;
struct proc *proc; // The currently-running process.
struct kmem *kmem; // The per-core memory table
u64 syscallno; // Temporary used by sysentry
} __mpalign__;
extern struct cpu cpus[NCPU];
......
......@@ -54,35 +54,3 @@ struct elfnote {
#define ELF_PROG_FLAG_EXEC 1
#define ELF_PROG_FLAG_WRITE 2
#define ELF_PROG_FLAG_READ 4
// All known .note types
#define ELF_NOTE_XV6_ADDR 1
// xv6-specific address note
struct xv6_addrdesc {
Elf64_Word id;
Elf64_Addr vaddr;
};
struct xv6_addrnote {
struct elfnote elfnote;
// name is 0 bytes
struct xv6_addrdesc desc;
};
// All xv6-specific IDs for notes about addresses
#define XV6_ADDR_ID_WQ 1
#define DEFINE_XV6_ADDRNOTE(xname, xid, xvaddr) \
const struct xv6_addrnote xname PROG_NOTE_ATTRIBUTE = { \
elfnote: { \
namesz: 0, \
descsz: sizeof(((xv6_addrnote *)nullptr)->desc), \
type: ELF_NOTE_XV6_ADDR \
}, \
desc: { \
id: (xid), \
vaddr: (Elf64_Addr)(xvaddr) } \
}
#define PROG_NOTE_ATTRIBUTE __attribute__ ((section(".note"), used))
......@@ -80,8 +80,3 @@ struct devsw {
};
extern struct devsw devsw[];
#define CONSOLE 1
#define NETIF 2
#define SAMPLER 3
#define DEVLOCKSTAT 4
......@@ -26,4 +26,4 @@ void threadhelper(void (*fn)(void *), void *arg);
struct trapframe;
void trap(struct trapframe *tf);
void sysentry(void);
u64 sysentry_c(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 num);
u64 sysentry_c(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 num);
......@@ -141,13 +141,6 @@ void kmemprint(void);
// kbd.c
void kbdintr(void);
// lapic.c
void lapicstartap(hwid_t, u32 addr);
void lapiceoi(void);
void lapic_tlbflush(hwid_t);
void lapic_sampconf(hwid_t);
void lapicpc(char mask);
// main.c
void halt(void) __attribute__((noreturn));
......@@ -175,6 +168,7 @@ int pipewrite(struct pipe*, const char*, int);
// proc.c
struct proc* copyproc(struct proc*);
void finishproc(struct proc*, bool removepid = true);
void execswitch(proc* p);
void exit(void);
int fork(int);
int growproc(int);
......@@ -217,7 +211,11 @@ int fetchint64(uptr, u64*);
int fetchstr(char*, const char*, u64);
int fetchmem(void*, const void*, u64);
int putmem(void*, const void*, u64);
u64 syscall(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 num);
u64 syscall(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 num);
// sysfile.cc
int doexec(const char* upath,
userptr<userptr<const char> > uargv);
// string.c
extern "C" int memcmp(const void*, const void*, size_t);
......
......@@ -64,12 +64,18 @@ static inline void mtresume(struct proc *p)
#define mtrec() mtrace_call_set(1, ~0ull)
#define mtign() mtrace_call_set(0, ~0ull)
static inline void mtreadavar(const char *fmt, ...)
__attribute__((format(printf, 1, 2)));
static inline void mtwriteavar(const char *fmt, ...)
__attribute__((format(printf, 1, 2)));
class mt_ascope
{
char name[64];
bool active;
public:
explicit mt_ascope(const char *fmt, ...)
__attribute__((format(printf, 2, 3)))
{
va_list ap;
......@@ -96,6 +102,7 @@ public:
{
vsnprintf(name, sizeof(name) - 1, fmt, ap);
mtrace_ascope_register(0, name);
mtwriteavar("kstack:%p", myproc()->kstack);
active = true;
}
......
#define MAJ_CONSOLE 1
#define MAJ_NETIF 2
#define MAJ_SAMPLER 3
#define MAJ_LOCKSTAT 4
#define MAJ_STAT 5
#define MAJ_CMDLINE 6
#define KBASE 0xFFFFFF0000000000ull
#define KBASEEND 0xFFFFFF2000000000ull // 128GB
#define KBASEEND 0xFFFFFF5000000000ull // 320GB
#define KCODE 0xFFFFFFFFC0000000ull
#define KSHARED 0xFFFFF00000000000ull
#define USERWQ 0xFFFFF00100000000ull
......
......@@ -121,10 +121,10 @@ struct taskstate
} __attribute__ ((packed, aligned(16)));
typedef struct hwid {
u8 num;
u32 num;
} hwid_t;
#define HWID(xnum) (struct hwid){ num: (u8)(xnum) }
#define HWID(xnum) (struct hwid){ num: (u32)(xnum) }
#endif
#define INT_P (1<<7) /* interrupt descriptor present */
......
......@@ -15,6 +15,7 @@ struct pci_func {
u32 reg_base[6];
u32 reg_size[6];
u8 irq_line;
u8 msi_capreg;
};
struct pci_bus {
......@@ -24,3 +25,4 @@ struct pci_bus {
int pci_init(void);
void pci_func_enable(struct pci_func *f);
void pci_msi_enable(struct pci_func *f, u8 irqnum);
......@@ -443,6 +443,9 @@ typedef u8 pci_revision_t;
#define PCI_VPD_DATAREG(ofs) ((ofs) + 4)
#define PCI_VPD_OPFLAG 0x80000000
#define PCI_MSI_MCR_MMC(cr) (((cr) >> 17) & 0x7)
#define PCI_MSI_MCR_64BIT 0x00800000
/*
* Power Management Capability; access via capability pointer.
*/
......
#include "sched.hh"
struct pmc_count {
static void config(u64 sel) {
perf_start(sel, 0);
}
static pmc_count read(unsigned int ctr) {
static pmc_count read(unsigned int ctr, bool* map = nullptr) {
pmc_count n;
// XXX(sbw) we loose our original affinity
for (int i = 0; i < NCPU; i++) {
setaffinity(i);
// XXX(sbw) qemu doesn't seem to support CR4_PCE
if (map == nullptr || map[i]) {
setaffinity(i);
// XXX(sbw) qemu doesn't seem to support CR4_PCE
#if defined(HW_qemu)
n.count_[i] = 0;
n.count_[i] = 0;
#else
n.count_[i] = rdpmc(ctr);
n.count_[i] = rdpmc(ctr);
#endif
}
}
setaffinity(-1);
return n;
......@@ -39,3 +43,61 @@ struct pmc_count {
u64 count_[NCPU];
};
struct sys_stat {
static sys_stat* read() {
sys_stat* that;
int fd;
long r;
that = new sys_stat();
assert(that != nullptr);
fd = open("/dev/stat", O_RDONLY);
assert(fd != -1);
r = ::read(fd, that->stats, sizeof(that->stats));
assert(r == sizeof(that->stats));
return that;
}
sys_stat* delta(const sys_stat* o) const {
sys_stat* that;
that = new sys_stat();
for (int i = 0; i < NCPU; i++) {
that->stats[i].enqs = stats[i].enqs - o->stats[i].enqs;
that->stats[i].deqs = stats[i].deqs - o->stats[i].deqs;
that->stats[i].steals = stats[i].steals - o->stats[i].steals;
that->stats[i].misses = stats[i].misses - o->stats[i].misses;
that->stats[i].idle = stats[i].idle - o->stats[i].idle;
that->stats[i].busy = stats[i].busy - o->stats[i].busy;
}
return that;
}
u64 busy() const {
u64 tot = 0;
for (int i = 0; i < NCPU; i++)
tot += stats[i].busy;
return tot;
}
u64 idle() const {
u64 tot = 0;
for (int i = 0; i < NCPU; i++)
tot += stats[i].idle;
return tot;
}
static void* operator new(unsigned long nbytes) {
assert(nbytes == sizeof(sys_stat));
return malloc(nbytes);
}
static void operator delete(void* p) {
free(p);
}
sched_stat stats[NCPU];
};
......@@ -84,6 +84,9 @@ struct proc : public rcu_freed, public sched_link {
int run_cpuid_;
int in_exec_;
int uaccess_;
const char *upath;
userptr<userptr<const char> > uargv;
u8 __cxa_eh_global[16];
std::atomic<int> exception_inuse;
......
......@@ -4,3 +4,13 @@ struct sched_link
sched_link* next;
};
struct sched_stat
{
u64 enqs;
u64 deqs;
u64 steals;
u64 misses;
u64 idle;
u64 busy;
u64 schedstart;
};
......@@ -35,19 +35,17 @@ struct uwq_worker {
struct uwq : public referenced, public rcu_freed {
friend struct uwq_worker;
static uwq* alloc(vmap* vmap, filetable *ftable);
static uwq* alloc(vmap* vmap, filetable *ftable, uptr uentry);
bool haswork() const;
bool tryworker();
void setuentry(uptr uentry);
virtual void do_gc(void) { delete this; }
protected:
virtual void onzero() const;
private:
uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf *ipc);
uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf *ipc, uptr uentry);
~uwq();
uwq& operator=(const uwq&);
uwq(const uwq& x);
......
......@@ -72,11 +72,11 @@ struct vma
// The elements of e[] are not ordered by address.
struct vmap {
#if VM_CRANGE
struct crange cr;
struct crange vmas;
#endif
#if VM_RADIX
struct radix rx;
struct radix vmas;
#endif
static vmap* alloc();
......
......@@ -2,11 +2,11 @@
#include "uspinlock.h"
#include "amd64.h"
#include "user.h"
#include "memlayout.h"
#include "uwq.hh"
#include "wqtypes.hh"
int mycpuid(void);
uwq_ipcbuf* allocipc(void);
static inline void*
allocwq(unsigned long nbytes)
......@@ -20,18 +20,6 @@ freewq(void* p)
free(p);
}
static inline uwq_ipcbuf*
allocipc(void)
{
static bool alloced;
if (alloced)
die("allocklen: allocing more than once");
if (sizeof(uwq_ipcbuf) > USERWQSIZE)
die("allocipc: too large");
alloced = true;
return (uwq_ipcbuf*)USERWQ;
}
static inline void
wqlock_acquire(wqlock_t *lock)
{
......
OBJS = \
acpi.o \
async.o \
bio.o \
bootdata.o \
cga.o \
cmdline.o \
condvar.o \
console.o \
cpprt.o \
......@@ -15,7 +17,6 @@ OBJS = \
futex.o \
idle.o \
ioapic.o \
lapic.o \
hwvm.o \
hz.o \
kalloc.o \
......@@ -51,6 +52,8 @@ OBJS = \
wqkern.o \
wqlib.o \
script.o \
xapic.o \
x2apic.o \
zalloc.o \
incbin.o \
sysvectors.o \
......
#include "types.h"
#include "amd64.h"
#include "kernel.hh"
#include "cpu.hh"
#include "apic.hh"
struct rsdp {
u8 signature[8];
u8 checksum;
u8 oemid[6];
u8 revision;
u32 rsdtaddr;
u32 length;
u64 xsdtaddr;
u8 extchecksum;
u8 reserved[3];
};
struct header {
u8 signature[4];
u32 length;
u8 revision;
u8 checksum;
u8 oemid[6];
u8 oemtableid[8];
u32 oemrevision;
u32 creatorid;
u32 creatorrevision;
};
struct rsdt {
struct header hdr;
u32 entry[];
};
struct xsdt {
struct header hdr;
u64 entry[];
} __attribute__((packed));
struct madt {
struct header hdr;
u32 localaddr;
u32 flags;
u8 entry[];
};
struct madt_apic {
u8 type;
u8 length;
u8 procid;
u8 apicid;
u32 flags;
} __attribute__((packed));
struct madt_x2apic {
u8 type;
u8 length;
u8 reserved[2];
u32 apicid;
u32 flags;
u32 procuid;
} __attribute__((packed));
#define CPU_ENABLED 0x1
static u8
sum(u8 *a, u32 length)
{
u8 s = 0;
for (u32 i = 0; i < length; i++)
s += a[i];
return s;
}
static struct rsdp *
rsdp_search1(paddr pa, int len)
{
u8 *start = (u8 *)p2v(pa);
for (u8 *p = start; p < (start + len); p += 16) {
if ((memcmp(p, "RSD PTR ", 8) == 0) && (sum(p, 20) == 0))
return (struct rsdp *)p;
}
return 0;
}
static struct rsdp *
rsdp_search(void)
{
struct rsdp *ret;
u8 *bda;
paddr pa;
bda = (u8 *)p2v(0x400);
if ((pa = ((bda[0x0F] << 8) | bda[0x0E]) << 4)) {
if ((ret = rsdp_search1(pa, 1024)))
return ret;
}
return rsdp_search1(0xE0000, 0x20000);
}
static void
scan_madt(struct madt* madt)
{
struct madt_x2apic* mx2;
struct madt_apic* ma;
u8* type;
u8* end;
u32 c;
end = ((u8*)madt) + madt->hdr.length;
type = ((u8*)madt) + sizeof(*madt);
c = 0 == myid() ? 1 : 0;
while (type < end) {
s64 id = -1;
switch (type[0]) {
case 0: // Processor Local APIC
ma = (struct madt_apic*) type;
if (ma->flags & CPU_ENABLED)
id = ma->apicid;
break;
case 9: // Processor Local x2APIC
mx2 = (struct madt_x2apic*) type;
if (mx2->flags & CPU_ENABLED)
id = mx2->apicid;
break;
}
if (id != -1 && id != lapicid().num) {
assert(c < NCPU);
if (VERBOSE)
cprintf("%u from %u to %ld\n", c, cpus[c].hwid.num, id);
cpus[c].hwid.num = id;
c = c+1 == myid() ? c+2 : c+1;
}
type = type + type[1];
}
}
void
initacpi(void)
{
struct rsdp* rsdp = rsdp_search();
struct madt* madt = nullptr;
if (!rsdp)
return;
if (rsdp->xsdtaddr) {
struct xsdt* xsdt = (struct xsdt*) p2v(rsdp->xsdtaddr);
if (sum((u8 *)xsdt, xsdt->hdr.length)) {
cprintf("initacpi: bad xsdt checksum\n");
return;
}
u32 n = xsdt->hdr.length > sizeof(*xsdt) ?
(xsdt->hdr.length - sizeof(*xsdt)) / 8 : 0;
for (u32 i = 0; i < n; i++) {
struct header* h = (struct header*) p2v(xsdt->entry[i]);
if (memcmp(h->signature, "APIC", 4) == 0) {
madt = (struct madt*) h;
break;
}
}
} else {
struct rsdt* rsdt = (struct rsdt*) p2v(rsdp->rsdtaddr);
if (sum((u8 *)rsdt, rsdt->hdr.length)) {
cprintf("initacpi: bad rsdt checksum\n");
return;
}
u32 n = rsdt->hdr.length > sizeof(*rsdt) ?
(rsdt->hdr.length - sizeof(*rsdt)) / 8 : 0;
for (u32 i = 0; i < n; i++) {
struct header* h = (struct header*) p2v(rsdt->entry[i]);
if (memcmp(h->signature, "APIC", 4) == 0) {
madt = (struct madt*) h;
break;
}
}
}
if (madt != nullptr)
scan_madt(madt);
}
......@@ -32,7 +32,7 @@
u64
bio_hash(const pair<u32, u64> &p)
{
return p._a ^ p._b;
return p.first ^ p.second;
}
static xns<pair<u32, u64>, buf*, bio_hash> *bufns;
......@@ -51,7 +51,7 @@ bget(u32 dev, u64 sector, int *writer)
loop:
// Try for cached block.
// XXX ignore dev
b = bufns->lookup(mkpair(dev, sector));
b = bufns->lookup(make_pair(dev, sector));
if (b) {
if (b->dev != dev || b->sector != sector)
panic("block mismatch");
......@@ -76,7 +76,7 @@ bget(u32 dev, u64 sector, int *writer)
b = new buf(dev, sector);
b->flags = B_BUSY;
*writer = 1;
if (bufns->insert(mkpair(b->dev, b->sector), b) < 0) {
if (bufns->insert(make_pair(b->dev, b->sector), b) < 0) {
gc_delayed(b);
goto loop;
}
......
......@@ -59,7 +59,11 @@ start:
movl %eax, %edi
movl %ebx, %esi
# Initialize stack.
# Save command line passed to the kernel. QEMU puts this
# in the bss, so we need to save it before we zero bss.
call savecmdline
# Initialize stack.
movl $PADDR(stack+STACK), %esp
# Zero bss. QEMU's MultiBoot seems not to.
......@@ -162,7 +166,40 @@ pdt:
// Filled in below.
.space 4096
.global cmdline
cmdline:
.space 256
.code32
savecmdline:
pushl %eax
pushl %ebx
pushl %ecx
pushl %edx
// Check for for cmdline in flags
movl (%esi), %eax
and $(1<<2), %eax
jz 2f
// Save null-terminated cmdline
movl 0x10(%esi), %eax
movl $PADDR(cmdline), %ebx
movl $255, %ecx
1: movb (%eax), %dl
movb %dl, (%ebx)
cmp $0, %dl
je 2f
inc %eax
inc %ebx
loop 1b
2: popl %eax
popl %ebx
popl %ecx
popl %edx
ret
initpagetables:
pushl %edi
pushl %ecx
......
#include "types.h"
#include "kernel.hh"
#include "mmu.h"
#include "amd64.h"
#include "spinlock.h"
#include "condvar.h"
#include "fs.h"
#include "file.hh"
#include "major.h"
extern char cmdline[];
static int
cmdlineread(struct inode *inode, char *dst, u32 off, u32 n)
{
u32 cc;
if (off >= strlen(cmdline))
return 0;
cc = MIN(n, strlen(cmdline)-off);
memcpy(dst, &cmdline[off], cc);
return cc;
}
void
initcmdline(void)
{
if (VERBOSE)
cprintf("cmdline: %s\n", cmdline);
devsw[MAJ_CMDLINE].write = nullptr;
devsw[MAJ_CMDLINE].read = cmdlineread;
}
......@@ -18,6 +18,7 @@
#include "fmt.hh"
#include "sperf.hh"
#include "wq.hh"
#include "major.h"
#define BACKSPACE 0x100
......@@ -341,8 +342,8 @@ initconsole(void)
initlock(&cons.lock, "console", LOCKSTAT_CONSOLE);
cons.locking = 1;
devsw[CONSOLE].write = consolewrite;
devsw[CONSOLE].read = consoleread;
devsw[MAJ_CONSOLE].write = consolewrite;
devsw[MAJ_CONSOLE].read = consoleread;
picenable(IRQ_KBD);
ioapicenable(IRQ_KBD, 0);
......
......@@ -309,8 +309,12 @@ e1000attach(struct pci_func *pcif)
e1000.pcidevid = PCI_PRODUCT(pcif->dev_id);
e1000irq = pcif->irq_line;
#if defined(HW_ben) || defined(HW_tom)
// XXX(sbw) do something..
#else
picenable(e1000irq);
ioapicenable(e1000irq, 0);
#endif
e1000reset();
......
......@@ -19,36 +19,6 @@
#define BRK (USERTOP >> 1)
static int
donotes(struct inode *ip, uwq *uwq, u64 off)
{
struct proghdr ph;
struct elfnote note;
if (readi(ip, (char*)&ph, off, sizeof(ph)) != sizeof(ph))
return -1;
if (readi(ip, (char*)&note, ph.offset, sizeof(note)) != sizeof(note))
return -1;
if (note.type == ELF_NOTE_XV6_ADDR) {
struct xv6_addrdesc desc;
if (note.descsz != sizeof(desc))
return -1;
if (readi(ip, (char*)&desc,
ph.offset+__offsetof(struct xv6_addrnote, desc),
sizeof(desc)) != sizeof(desc))
return -1;
if (desc.id == XV6_ADDR_ID_WQ) {
uwq->setuentry(desc.vaddr);
return 0;
}
}
return -1;
}
static int
dosegment(inode* ip, vmap* vmp, u64 off)
{
struct proghdr ph;
......@@ -177,7 +147,6 @@ exec(const char *path, char **argv, void *ascopev)
ANON_REGION(__func__, &perfgroup);
struct inode *ip = nullptr;
struct vmap *vmp = nullptr;
uwq* newuwq = nullptr;
const char *s, *last;
struct elfhdr elf;
struct proghdr ph;
......@@ -188,17 +157,6 @@ exec(const char *path, char **argv, void *ascopev)
cwork* w;
long sp;
myproc()->exec_cpuid_ = mycpuid();
mt_ascope *ascope = (mt_ascope*) ascopev;
ascope->close();
myproc()->in_exec_ = 1;
yield();
myproc()->in_exec_ = 0;
ascope->open("sys_exec2(%s)", path);
myproc()->run_cpuid_ = mycpuid();
if((ip = namei(myproc()->cwd, path)) == 0)
return -1;
......@@ -218,9 +176,6 @@ exec(const char *path, char **argv, void *ascopev)
if((vmp = vmap::alloc()) == 0)
goto bad;
if((newuwq = uwq::alloc(vmp, myproc()->ftable)) == 0)
goto bad;
for(i=0, off=elf.phoff; i<elf.phnum; i++, off+=sizeof(ph)){
Elf64_Word type;
if(readi(ip, (char*)&type,
......@@ -229,10 +184,6 @@ exec(const char *path, char **argv, void *ascopev)
goto bad;
switch (type) {
case ELF_PROG_NOTE:
if (donotes(ip, newuwq, off) < 0)
goto bad;
break;
case ELF_PROG_LOAD:
if (dosegment(ip, vmp, off) < 0)
goto bad;
......@@ -254,9 +205,9 @@ exec(const char *path, char **argv, void *ascopev)
oldvmap = myproc()->vmap;
olduwq = myproc()->uwq;
myproc()->vmap = vmp;
myproc()->uwq = newuwq;
myproc()->tf->rip = elf.entry;
myproc()->tf->rsp = sp;
myproc()->run_cpuid_ = myid();
for(last=s=path; *s; s++)
if(*s == '/')
......@@ -279,8 +230,6 @@ exec(const char *path, char **argv, void *ascopev)
cprintf("exec failed\n");
if(vmp)
vmp->decref();
if(newuwq)
newuwq->dec();
gc_end_epoch();
return 0;
}
......@@ -152,7 +152,7 @@ bfree(int dev, u64 x)
u64
ino_hash(const pair<u32, u32> &p)
{
return p._a ^ p._b;
return p.first ^ p.second;
}
static xns<pair<u32, u32>, inode*, ino_hash> *ins;
......@@ -268,7 +268,7 @@ igetnoref(u32 dev, u32 inum)
// Try for cached inode.
{
scoped_gc_epoch e;
struct inode *ip = ins->lookup(mkpair(dev, inum));
struct inode *ip = ins->lookup(make_pair(dev, inum));
if (ip) {
if (!(ip->flags & I_VALID)) {
acquire(&ip->lock);
......@@ -290,7 +290,7 @@ igetnoref(u32 dev, u32 inum)
snprintf(ip->lockname, sizeof(ip->lockname), "cv:ino:%d", ip->inum);
initlock(&ip->lock, ip->lockname+3, LOCKSTAT_FS);
initcondvar(&ip->cv, ip->lockname);
if (ins->insert(mkpair(ip->dev, ip->inum), ip) < 0) {
if (ins->insert(make_pair(ip->dev, ip->inum), ip) < 0) {
gc_delayed(ip);
goto retry;
}
......@@ -399,7 +399,7 @@ iput(struct inode *ip)
ip->gen += 1;
iupdate(ip);
ins->remove(mkpair(ip->dev, ip->inum), &ip);
ins->remove(make_pair(ip->dev, ip->inum), &ip);
gc_delayed(ip);
icache_free[mycpu()->id].x++;
return;
......
......@@ -9,6 +9,7 @@
#include "cpu.hh"
#include "percpu.hh"
#include "kmtrace.hh"
#include "sperf.hh"
//
// futexkey
......@@ -170,9 +171,11 @@ futexaddr::onzero(void) const
long
futexwait(futexkey_t key, u64 val, u64 timer)
{
ANON_REGION(__func__, &perfgroup);
futexaddr* fa;
mtreadavar("futex:ns:%lx", key);
mtreadavar("futex:ns:%p", key);
{
scoped_gc_epoch gc;
again:
......@@ -187,7 +190,7 @@ futexwait(futexkey_t key, u64 val, u64 timer)
fa->dec();
goto again;
}
mtwriteavar("futex:ns:%lx", key);
mtwriteavar("futex:ns:%p", key);
fa->inserted_ = true;
} else {
if (!fa->tryinc()) {
......@@ -196,7 +199,7 @@ futexwait(futexkey_t key, u64 val, u64 timer)
}
}
assert(fa->key_ == key);
mtwriteavar("futex:%lx.%p", key, fa);
mtwriteavar("futex:%p.%p", key, fa);
acquire(&myproc()->futex_lock);
auto cleanup = scoped_cleanup([&fa](){
......@@ -226,13 +229,15 @@ futexwait(futexkey_t key, u64 val, u64 timer)
long
futexwake(futexkey_t key, u64 nwake)
{
ANON_REGION(__func__, &perfgroup);
futexaddr* fa;
u64 nwoke = 0;
if (nwake == 0)
return -1;
mtreadavar("futex:ns:%lx", key);
mtreadavar("futex:ns:%p", key);
{
scoped_gc_epoch gc;
fa = nsfutex->lookup(key);
......@@ -243,7 +248,7 @@ futexwake(futexkey_t key, u64 nwake)
auto cleanup = scoped_cleanup([&fa](){
fa->dec();
});
mtwriteavar("futex:%lx.%p", key, fa);
mtwriteavar("futex:%p.%p", key, fa);
fa->nspid_->enumerate([&nwoke, &nwake](u32 pid, proc* p) {
acquire(&p->futex_lock);
......
......@@ -11,6 +11,7 @@
#include "proc.hh"
#include "vm.hh"
#include "wq.hh"
#include "apic.hh"
using namespace std;
......@@ -213,8 +214,6 @@ freevm(pgmap *pml4)
void
inittls(void)
{
extern hwid_t lapicid(void);
struct cpu *c;
cpuid_t id = -1;
......
......@@ -8,6 +8,7 @@
#include "hwvm.hh"
#include "condvar.h"
#include "proc.hh"
#include "apic.hh"
void initpic(void);
void initioapic(void);
......@@ -16,7 +17,6 @@ void initcga(void);
void initconsole(void);
void initpg(void);
void initmp(void);
void initlapic(void);
void inittls(void);
void initnmi(void);
void inittrap(void);
......@@ -39,6 +39,7 @@ void initsperf(void);
void initidle(void);
void initcpprt(void);
void initfutex(void);
void initcmdline(void);
void idleloop(void);
static volatile int bstate;
......@@ -109,6 +110,7 @@ cmain(u64 mbmagic, u64 mbaddr)
initsperf();
inittrap();
initlapic();
initcmdline();
initkalloc(mbaddr);
initz();
initproc(); // process table
......
......@@ -7,6 +7,7 @@
#include "fs.h"
#include "file.hh"
#include "net.hh"
#include "major.h"
#ifdef LWIP
extern "C" {
......@@ -249,8 +250,8 @@ initnet(void)
{
struct proc *t;
devsw[NETIF].write = nullptr;
devsw[NETIF].read = netifread;
devsw[MAJ_NETIF].write = nullptr;
devsw[MAJ_NETIF].read = netifread;
t = threadalloc(initnet_worker, nullptr);
if (t == nullptr)
......
......@@ -3,6 +3,7 @@
#include "kernel.hh"
#include "pci.hh"
#include "pcireg.hh"
#include "traps.h"
extern int e1000attach(struct pci_func *pcif);
extern int e1000eattach(struct pci_func *pcif);
......@@ -134,6 +135,69 @@ pci_attach(struct pci_func *f)
&pci_attach_vendor[0], f);
}
static void
pci_scan_caplist(struct pci_func* f)
{
u32 cap_ptr = PCI_CAPLIST_PTR(pci_conf_read(f, PCI_CAPLISTPTR_REG));
for (int i = 0; i < 10 && cap_ptr != 0; i++) {
u32 cap_entry = pci_conf_read(f, cap_ptr);
switch (PCI_CAPLIST_CAP(cap_entry)) {
case PCI_CAP_MSI:
f->msi_capreg = cap_ptr;
break;
default:
break;
}
cap_ptr = PCI_CAPLIST_NEXT(cap_entry);
}
}
void
pci_msi_enable(struct pci_func *f, u8 irqnum)
{
// PCI System Architecture, Fourth Edition
assert(f->msi_capreg != 0);
u32 cap_entry = pci_conf_read(f, f->msi_capreg);
if (!(cap_entry & PCI_MSI_MCR_64BIT))
panic("pci_msi_enable only handles 64-bit address capable devices");
if (PCI_MSI_MCR_MMC(cap_entry) != 0)
panic("pci_msi_enable only handles 1 requested message");
// [PCI SA pg 253]
// Step 4. Assign a dword-aligned memory address to the device's
// Message Address Register.
// (The Message Address Register format is mandated by the x86
// architecture. See 9.11.1 in the Vol. 3 of the Intel architecture
// manual.)
pci_conf_write(f, f->msi_capreg + 4*1,
(0x0fee << 20) | // magic constant for northbridge
(0 << 12) | // destination ID
(1 << 3) | // redirection hint
(0 << 2)); // destination mode
pci_conf_write(f, f->msi_capreg + 4*2, 0);
// Step 5 and 6. Allocate messages for the device. Since we
// support only one message and that is the default value in
// the message control register, we do nothing.
// Step 7. Write base message data pattern into the device's
// Message Data Register.
// (The Message Data Register format is mandated by the x86
// architecture. See 9.11.2 in the Vol. 3 of the Intel architecture
// manual.
pci_conf_write(f, f->msi_capreg + 4*3,
(0 << 15) | // trigger mode (edge)
//(0 << 14) | // level for trigger mode (don't care)
(0 << 8) | // delivery mode (fixed)
(irqnum+T_IRQ0)); // vector
// Step 8. Set the MSI enable bit in the device's Message
// control register.
pci_conf_write(f, f->msi_capreg, cap_entry | (1 << 16));
}
static int
pci_scan_bus(struct pci_bus *bus)
{
......@@ -160,7 +224,11 @@ pci_scan_bus(struct pci_bus *bus)
u32 intr = pci_conf_read(&af, PCI_INTERRUPT_REG);
af.irq_line = PCI_INTERRUPT_LINE(intr);
u32 cmd_status = pci_conf_read(&af, PCI_COMMAND_STATUS_REG);
if (cmd_status & PCI_STATUS_CAPLIST_SUPPORT)
pci_scan_caplist(&af);
af.dev_class = pci_conf_read(&af, PCI_CLASS_REG);
if (pci_show_devs)
pci_print_func(&af);
......
......@@ -42,6 +42,7 @@ proc::proc(int npid) :
ftable(0), cwd(0), tsc(0), curcycles(0), cpuid(0), epoch(0),
cpu_pin(0), oncv(0), cv_wakeup(0),
user_fs_(0), unmap_tlbreq_(0), in_exec_(0), uaccess_(0),
upath(0), uargv(userptr<const char>(nullptr)),
exception_inuse(0), magic(PROC_MAGIC), state_(EMBRYO)
{
snprintf(lockname, sizeof(lockname), "cv:proc:%d", pid);
......@@ -204,6 +205,83 @@ freeproc(struct proc *p)
gc_delayed(p);
}
void
execstub(void)
{
userptr<userptr<const char> > uargv;
const char* upath;
upath = myproc()->upath;
uargv = myproc()->uargv;
barrier();
myproc()->upath = nullptr;
post_swtch();
long r = doexec(upath, uargv);
myproc()->tf->rax = r;
// This stuff would have been called in syscall and syscall_c
// if we returned from the the previous kstack
mtstop(myproc());
mtign();
if (myproc()->killed) {
mtstart(trap, myproc());
exit();
}
}
static void
kstackfree(void* kstack)
{
ksfree(slab_stack, kstack);
}
void
execswitch(proc* p)
{
// Alloc a new kernel stack, set it up, and free the old one
context* cntxt;
trapframe* tf;
char* kstack;
char* sp;
if ((kstack = (char*) ksalloc(slab_stack)) == 0)
panic("execswitch: ksalloc");
sp = kstack + KSTACKSIZE;
sp -= sizeof(*p->tf);
tf = (trapframe*)sp;
// XXX(sbw) we only need the whole tf if exec fails
*tf = *p->tf;
sp -= 8;
// XXX(sbw) we could use the sysret return path
*(u64*)sp = (u64)trapret;
sp -= sizeof(*p->context);
cntxt = (context*)sp;
memset(cntxt, 0, sizeof(*cntxt));
cntxt->rip = (uptr)execstub;
cwork* w = new cwork();
if (w != nullptr) {
w->rip = (void*) kstackfree;
w->arg0 = p->kstack;
if (wqcrit_push(w, myproc()->exec_cpuid_) < 0) {
ksfree(slab_stack, p->kstack);
delete w;
}
} else {
ksfree(slab_stack, p->kstack);
}
p->kstack = kstack;
p->context = cntxt;
p->tf = tf;
}
proc*
proc::alloc(void)
{
......
......@@ -10,6 +10,8 @@
#include "amd64.h"
#include "cpu.hh"
#include "sampler.h"
#include "major.h"
#include "apic.hh"
#define LOGHEADER_SZ (sizeof(struct logheader) + \
sizeof(((struct logheader*)0)->cpu[0])*NCPU)
......@@ -273,7 +275,7 @@ initsamp(void)
pmulog[myid()].event = (pmuevent*) p;
pmulog[myid()].capacity = PERFSIZE / sizeof(struct pmuevent);
devsw[SAMPLER].write = sampwrite;
devsw[SAMPLER].read = sampread;
devsw[SAMPLER].stat = sampstat;
devsw[MAJ_SAMPLER].write = sampwrite;
devsw[MAJ_SAMPLER].read = sampread;
devsw[MAJ_SAMPLER].stat = sampstat;
}
......@@ -13,6 +13,8 @@
#include "wq.hh"
#include "percpu.hh"
#include "sperf.hh"
#include "major.h"
#include "rnd.hh"
enum { sched_debug = 0 };
enum { steal_nonexec = 1 };
......@@ -30,21 +32,16 @@ public:
assert(nbytes == sizeof(schedule));
return buf;
}
sched_stat stats_;
u64 ncansteal_;
private:
struct spinlock lock_;
sched_link head_;
void sanity(void);
struct {
std::atomic<u64> enqs;
std::atomic<u64> deqs;
std::atomic<u64> steals;
std::atomic<u64> misses;
} stats_;
volatile u64 ncansteal_ __mpalign__;
struct spinlock lock_ __mpalign__;
sched_link head_;
volatile bool cansteal_ __mpalign__;
};
percpu<schedule> schedule_;
......@@ -60,6 +57,9 @@ schedule::schedule(void)
stats_.deqs = 0;
stats_.steals = 0;
stats_.misses = 0;
stats_.idle = 0;
stats_.busy = 0;
stats_.schedstart = 0;
}
void
......@@ -73,7 +73,8 @@ schedule::enq(proc* p)
head_.prev->next = entry;
head_.prev = entry;
if (cansteal((proc*)entry, true))
ncansteal_++;
if (ncansteal_++ == 0)
cansteal_ = true;
sanity();
stats_.enqs++;
}
......@@ -81,6 +82,10 @@ schedule::enq(proc* p)
proc*
schedule::deq(void)
{
if (head_.next == &head_)
return nullptr;
ANON_REGION(__func__, &perfgroup);
// Remove from head
scoped_acquire x(&lock_);
sched_link* entry = head_.next;
......@@ -90,7 +95,8 @@ schedule::deq(void)
entry->next->prev = entry->prev;
entry->prev->next = entry->next;
if (cansteal((proc*)entry, true))
--ncansteal_;
if (--ncansteal_ == 0)
cansteal_ = false;
sanity();
stats_.deqs++;
return (proc*)entry;
......@@ -99,14 +105,16 @@ schedule::deq(void)
proc*
schedule::steal(bool nonexec)
{
if (ncansteal_ == 0 || !tryacquire(&lock_))
if (!cansteal_ || !tryacquire(&lock_))
return nullptr;
ANON_REGION(__func__, &perfgroup);
for (sched_link* ptr = head_.next; ptr != &head_; ptr = ptr->next)
if (cansteal((proc*)ptr, nonexec)) {
ptr->next->prev = ptr->prev;
ptr->prev->next = ptr->next;
--ncansteal_;
if (--ncansteal_ == 0)
cansteal_ = false;
sanity();
++stats_.steals;
release(&lock_);
......@@ -121,10 +129,10 @@ void
schedule::dump(void)
{
cprintf("%8lu %8lu %8lu %8lu\n",
stats_.enqs.load(),
stats_.deqs.load(),
stats_.steals.load(),
stats_.misses.load());
stats_.enqs,
stats_.deqs,
stats_.steals,
stats_.misses);
stats_.enqs = 0;
stats_.deqs = 0;
......@@ -170,12 +178,17 @@ steal(void)
{
struct proc *steal;
int r = 0;
u64 s = rnd();
pushcli();
for (int nonexec = 0; nonexec < (steal_nonexec ? 2 : 1); nonexec++) {
for (int i = 0; i < NCPU; i++) {
steal = schedule_[i].steal(nonexec);
for (u64 i = 0; i < NCPU; i++) {
u64 k = (s+i) % NCPU;
if (k == myid())
continue;
steal = schedule_[k].steal(nonexec);
if (steal != nullptr) {
acquire(&steal->lock);
if (steal->get_state() == RUNNABLE && !steal->cpu_pin &&
......@@ -214,6 +227,9 @@ scheddump(void)
void
addrun(struct proc* p)
{
if (p->upath)
execswitch(p);
p->set_state(RUNNABLE);
schedule_[p->cpuid].enq(p);
}
......@@ -250,6 +266,13 @@ sched(void)
// Interrupts are disabled
next = schedule_->deq();
u64 t = rdtsc();
if (myproc() == idleproc())
schedule_->stats_.idle += t - schedule_->stats_.schedstart;
else
schedule_->stats_.busy += t - schedule_->stats_.schedstart;
schedule_->stats_.schedstart = t;
if (next == nullptr) {
if (myproc()->get_state() != RUNNABLE ||
// proc changed its CPU pin?
......@@ -270,6 +293,9 @@ sched(void)
mycpu()->proc = next;
mycpu()->prev = prev;
if (mycpu()->uwq != nullptr || next->uwq != nullptr)
mycpu()->uwq = next->uwq;
if (prev->get_state() == ZOMBIE)
mtstop(prev);
else
......@@ -293,9 +319,29 @@ sched(void)
post_swtch();
}
static int
statread(struct inode *inode, char *dst, u32 off, u32 n)
{
// Sort of like a binary /proc/stat
size_t sz = NCPU*sizeof(sched_stat);
if (n != sz)
return -1;
for (int i = 0; i < NCPU; i++) {
memcpy(&dst[i*sizeof(sched_stat)], &schedule_[i].stats_,
sizeof(schedule_[i].stats_));
}
return n;
}
void
initsched(void)
{
for (int i = 0; i < NCPU; i++)
new (&schedule_[i]) schedule();
devsw[MAJ_STAT].write = nullptr;
devsw[MAJ_STAT].read = statread;
}
......@@ -10,6 +10,7 @@
#include "condvar.h"
#include "fs.h"
#include "file.hh"
#include "major.h"
#if LOCKSTAT
static int lockstat_enable;
......@@ -241,8 +242,8 @@ lockstat_write(struct inode *ip, const char *buf, u32 off, u32 n)
void
initlockstat(void)
{
devsw[DEVLOCKSTAT].write = lockstat_write;
devsw[DEVLOCKSTAT].read = lockstat_read;
devsw[MAJ_LOCKSTAT].write = lockstat_write;
devsw[MAJ_LOCKSTAT].read = lockstat_read;
}
#else
void
......
......@@ -79,14 +79,12 @@ argcheckptr(const void *p, int size)
return 0;
}
extern u64 (*syscalls[])(u64, u64, u64, u64, u64);
extern u64 (*syscalls[])(u64, u64, u64, u64, u64, u64);
extern const int nsyscalls;
u64
syscall(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 num)
syscall(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 num)
{
mt_ascope ascope("syscall(%lx,%lx,%lx,%lx,%lx,%lx)", num, a0, a1, a2, a3, a4);
for (;;) {
#if EXCEPTIONS
try {
......@@ -94,7 +92,7 @@ syscall(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 num)
if(num < nsyscalls && syscalls[num]) {
mtstart(syscalls[num], myproc());
mtrec();
u64 r = syscalls[num](a0, a1, a2, a3, a4);
u64 r = syscalls[num](a0, a1, a2, a3, a4, a5);
mtstop(myproc());
mtign();
return r;
......
......@@ -299,7 +299,6 @@ sys_openat(int dirfd, const char *path, int omode)
// Reads the dirfd FD, dirfd's inode, the inodes of all files in
// path; writes the returned FD
mt_ascope ascope("%s(%d,%s,%d)", __func__, dirfd, path, omode);
mtwriteavar("thread:%x", myproc()->pid);
mtreadavar("inode:%x.%x", cwd->dev, cwd->inum);
if(omode & O_CREATE){
......@@ -431,12 +430,10 @@ sys_chdir(const char *path)
return 0;
}
//SYSCALL
int
sys_exec(const char *upath, userptr<userptr<const char> > uargv)
doexec(const char* upath, userptr<userptr<const char> > uargv)
{
ANON_REGION(__func__, &perfgroup);
static const int len = 32;
char *argv[MAXARG];
char path[DIRSIZ+1];
long r = -1;
......@@ -457,20 +454,38 @@ sys_exec(const char *upath, userptr<userptr<const char> > uargv)
if(uarg == 0)
break;
argv[i] = (char*) kmalloc(len, "execbuf");
if (argv[i]==nullptr || fetchstr(argv[i], (char*)uarg, len)<0)
argv[i] = (char*) kmalloc(MAXARGLEN, "execbuf");
if (argv[i]==nullptr || fetchstr(argv[i], (char*)uarg, MAXARGLEN)<0)
goto clean;
}
argv[i] = 0;
r = exec(path, argv, &ascope);
clean:
for (i=i-i; i >= 0; i--)
kmfree(argv[i], len);
for (i=i-1; i >= 0; i--)
kmfree(argv[i], MAXARGLEN);
return r;
}
//SYSCALL
int
sys_exec(const char *upath, userptr<userptr<const char> > uargv)
{
myproc()->exec_cpuid_ = myid();
#if EXECSWITCH
myproc()->exec_cpuid_ = mycpuid();
myproc()->uargv = uargv;
barrier();
// upath serves as a flag to the scheduler
myproc()->upath = upath;
yield();
myproc()->upath = nullptr;
#endif
return doexec(upath, uargv);
}
//SYSCALL
int
sys_pipe(int *fd)
{
struct file *rf, *wf;
......
......@@ -12,6 +12,8 @@
#include "kmtrace.hh"
#include "futex.h"
#include <uk/mman.h>
//SYSCALL
int
sys_fork(int flags)
......@@ -98,42 +100,64 @@ sys_uptime(void)
}
//SYSCALL
int
sys_map(userptr<void> addr, size_t len)
void *
sys_mmap(userptr<void> addr, size_t len, int prot, int flags, int fd,
off_t offset)
{
ANON_REGION(__func__, &perfgroup);
mt_ascope ascope("%s(%p,%lu,%#x,%#x,%d,%#lx)",
__func__, addr.unsafe_get(), len, prot, flags, fd, offset);
if (!(prot & (PROT_READ | PROT_WRITE))) {
cprintf("not implemented: !(prot & (PROT_READ | PROT_WRITE))\n");
return MAP_FAILED;
}
if (flags & MAP_SHARED) {
cprintf("not implemented: (flags & MAP_SHARED)\n");
return MAP_FAILED;
}
if (!(flags & MAP_ANONYMOUS)) {
cprintf("not implemented: !(flags & MAP_ANONYMOUS)\n");
return MAP_FAILED;
}
uptr start = PGROUNDDOWN(addr);
uptr end = PGROUNDUP(addr + len);
if ((flags & MAP_FIXED) && start != addr)
return MAP_FAILED;
#if MTRACE
mt_ascope ascope("%s(%p,%lx)", __func__, addr, len);
mtwriteavar("thread:%x", myproc()->pid);
for (uptr i = PGROUNDDOWN(addr); i < PGROUNDUP(addr + len); i += PGSIZE)
mtwriteavar("page:%016x", i);
if (addr != 0) {
for (uptr i = start / PGSIZE; i < end / PGSIZE; i++)
mtwriteavar("pte:%p.%#lx", myproc()->vmap, i);
}
#endif
vmnode *vmn = new vmnode(PGROUNDUP(len) / PGSIZE);
vmnode *vmn = new vmnode((end - start) / PGSIZE);
if (vmn == 0)
return -1;
return MAP_FAILED;
long r = myproc()->vmap->insert(vmn, PGROUNDDOWN(addr), 1);
uptr r = myproc()->vmap->insert(vmn, start, 1);
if (r < 0) {
delete vmn;
return -1;
return MAP_FAILED;
}
return r;
return (void*)r;
}
//SYSCALL
int
sys_unmap(userptr<void> addr, size_t len)
sys_munmap(userptr<void> addr, size_t len)
{
ANON_REGION(__func__, &perfgroup);
#if MTRACE
mt_ascope ascope("%s(%p,%lx)", __func__, addr, len);
mtwriteavar("thread:%x", myproc()->pid);
for (uptr i = PGROUNDDOWN(addr); i < PGROUNDUP(addr + len); i += PGSIZE)
mtwriteavar("page:%016x", i);
mt_ascope ascope("%s(%p,%#lx)", __func__, addr.unsafe_get(), len);
for (uptr i = addr / PGSIZE; i < PGROUNDUP(addr + len) / PGSIZE; i++)
mtwriteavar("pte:%p.%#lx", myproc()->vmap, i);
#endif
uptr align_addr = PGROUNDDOWN(addr);
......@@ -179,7 +203,6 @@ sys_futex(const u64* addr, int op, u64 val, u64 timer)
return -1;
mt_ascope ascope("%s(%p,%d,%lu,%lu)", __func__, addr, op, val, timer);
mtwriteavar("thread:%x", myproc()->pid);
switch(op) {
case FUTEX_WAIT:
......@@ -190,3 +213,11 @@ sys_futex(const u64* addr, int op, u64 val, u64 timer)
return -1;
}
}
//SYSCALL
long
sys_yield(void)
{
yield();
return 0;
}
......@@ -11,6 +11,7 @@
#include "kmtrace.hh"
#include "bits.hh"
#include "kalloc.hh"
#include "apic.hh"
extern "C" void __uaccess_end(void);
......@@ -20,7 +21,7 @@ struct intdesc idt[256] __attribute__((aligned(16)));
extern u64 trapentry[];
u64
sysentry_c(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 num)
sysentry_c(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 num)
{
sti();
......@@ -31,7 +32,7 @@ sysentry_c(u64 a0, u64 a1, u64 a2, u64 a3, u64 a4, u64 num)
trapframe *tf = (trapframe*) (myproc()->kstack + KSTACKSIZE - sizeof(*tf));
myproc()->tf = tf;
u64 r = syscall(a0, a1, a2, a3, a4, num);
u64 r = syscall(a0, a1, a2, a3, a4, a5, num);
if(myproc()->killed) {
mtstart(trap, myproc());
......
......@@ -18,6 +18,21 @@
#define TRAP(x) _TRAP(x, NOEC)
#define TRAPCODE(x) _TRAP(x, EC)
/* Calling convention:
*
* Syscall #: %rax
* Arguments: %rdi, %rsi, %rdx, %r10 (*), %r8, %r9
* Return RIP: %rcx (from syscall instruction)
* RFLAGS: %r11 (from syscall instruction)
*
* None of the above registers are preserved across function calls in
* the AMD64 ABI. This means user space doesn't need to save any
* registers across a syscall and we're free to clobber them.
*
* (*) This argument register differs from the regular AMD64 ABI.
* Normally, the fourth argument is in %rcx, but this is clobbered by
* syscall. %r10 is cheap to use because it is caller-save.
*/
.code64
.globl sysentry
.align 8
......@@ -25,23 +40,21 @@ sysentry:
// can syscall/sysret be used safely in the presence of NMIs?
// we are executing with cpl=0 but without a valid stack.
// blow away %r9: syscalls can take at most 5 args
swapgs
movq %gs:8, %r9 // myproc()
movq %rax, %gs:24 // save %rax so we can use it
movq %gs:8, %rax // myproc()
movq %ss:PROC_KSTACK_OFFSET(%r9), %r9
addq $(KSTACKSIZE-TRAPFRAME_SIZE), %r9
// syscall number: %rax
// function arguments: %rdi, %rsi, %rdx, %rcx, %r8, %r9 (killed)
movq %ss:PROC_KSTACK_OFFSET(%rax), %rax
addq $(KSTACKSIZE-TRAPFRAME_SIZE), %rax
// save all registers we're not allowed to clobber
// skip padding3, ds
movq %r15, %ss:0x10(%r9)
movq %r14, %ss:0x18(%r9)
movq %r13, %ss:0x20(%r9)
movq %r12, %ss:0x28(%r9)
movq %rbp, %ss:0x30(%r9)
movq %rbx, %ss:0x38(%r9)
movq %r15, %ss:0x10(%rax)
movq %r14, %ss:0x18(%rax)
movq %r13, %ss:0x20(%rax)
movq %r12, %ss:0x28(%rax)
movq %rbp, %ss:0x30(%rax)
movq %rbx, %ss:0x38(%rax)
// skip r11 (0x40)
// skip r10 (0x48)
// skip r9 (0x50)
......@@ -53,20 +66,21 @@ sysentry:
// skip rdi (0x80)
// skip trapno (0x88)
// skip err, padding2 (0x90)
movq %rcx, %ss:0x98(%r9) // rip saved by syscall
movq %rcx, %ss:0x98(%rax) // rip saved by syscall
// skip cs, padding (0xa0)
movq %r11, %ss:0xa8(%r9) // eflags saved by syscall
movq %rsp, %ss:0xb0(%r9)
movq %r11, %ss:0xa8(%rax) // eflags saved by syscall
movq %rsp, %ss:0xb0(%rax)
movw $KDSEG, %cx
movw %cx, %ds
movw %cx, %es
movq %r9, %rsp
movq %rax, %rsp
movq %r10, %rcx // saved by usys.S
movq %rax, %r9 // syscall# from usys.S
pushq %gs:24 // syscall# saved from %rax
call sysentry_c
popq %r11
// return using SYSRET
cli
......
......@@ -51,10 +51,15 @@ inituart(void)
// Try COM2 (aka ttyS1) first, because it usually does SOL for IPMI.
{ COM2, IRQ_COM2 },
// Still try COM1 (aka ttyS0), because it is what QEMU emulates.
{ COM1, IRQ_COM1 }
{ COM1, IRQ_COM1 },
};
int i;
#if defined(HW_ben)
int baud = 115200;
#else
int baud = 19200;
#endif
for (i = 0; i < 2; i++) {
com = conf[i].com;
irq_com = conf[i].irq;
......@@ -63,7 +68,7 @@ inituart(void)
outb(com+2, 0);
// 19200 baud
outb(com+3, 0x80); // Unlock divisor
outb(com+0, 115200/19200);
outb(com+0, 115200/baud);
outb(com+1, 0);
// 8 bits, one stop bit, no parity
outb(com+3, 0x03); // Lock divisor, 8 data bits.
......
......@@ -11,6 +11,7 @@
#include "vm.hh"
#include "kalloc.hh"
#include "bits.hh"
#include "rnd.hh"
extern "C" {
#include "kern_c.h"
}
......@@ -23,7 +24,7 @@ uwq_trywork(void)
u64 i, k;
// A "random" victim CPU
k = rdtsc();
k = rnd();
for (i = 0; i < NCPU; i++) {
u64 j = (i+k) % NCPU;
......@@ -31,14 +32,11 @@ uwq_trywork(void)
continue;
struct cpu *c = &cpus[j];
// The gc_epoch is for p and uwq
// The gc_epoch is for uwq
scoped_gc_epoch xgc();
barrier();
struct proc *p = c->proc;
if (p == nullptr)
continue;
uwq* uwq = p->uwq;
uwq* uwq = c->uwq;
if (uwq == nullptr)
continue;
......@@ -54,6 +52,23 @@ uwq_trywork(void)
//SYSCALL
int
sys_wqinit(uptr uentry)
{
uwq* uwq;
if (myproc()->uwq != nullptr)
return -1;
uwq = uwq::alloc(myproc()->vmap, myproc()->ftable, uentry);
if (uwq == nullptr)
return -1;
myproc()->uwq = uwq;
return 0;
}
//SYSCALL
int
sys_wqwait(void)
{
uwq_worker* w = myproc()->worker;
......@@ -103,7 +118,7 @@ uwq_worker::wait(void)
// uwq
//
uwq*
uwq::alloc(vmap* vmap, filetable *ftable)
uwq::alloc(vmap* vmap, filetable *ftable, uptr uentry)
{
uwq_ipcbuf* ipc;
uwq* u;
......@@ -115,7 +130,7 @@ uwq::alloc(vmap* vmap, filetable *ftable)
ftable->incref();
vmap->incref();
u = new uwq(vmap, ftable, ipc);
u = new uwq(vmap, ftable, ipc, uentry);
if (u == nullptr) {
ftable->decref();
vmap->decref();
......@@ -131,10 +146,10 @@ uwq::alloc(vmap* vmap, filetable *ftable)
return u;
}
uwq::uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf* ipc)
uwq::uwq(vmap* vmap, filetable* ftable, uwq_ipcbuf* ipc, uptr uentry)
: rcu_freed("uwq"),
vmap_(vmap), ftable_(ftable), ipc_(ipc),
uentry_(0), ustack_(UWQSTACK), uref_(0)
uentry_(uentry), ustack_(UWQSTACK), uref_(0)
{
for (int i = 0; i < NELEM(ipc_->len); i++)
ipc_->len[i].v_ = 0;
......@@ -254,12 +269,6 @@ uwq::onzero() const
u->finish();
}
void
uwq::setuentry(uptr uentry)
{
uentry_ = uentry;
}
proc*
uwq::allocworker(void)
{
......
......@@ -130,7 +130,7 @@ vmnode::loadpg(off_t off)
{
#ifdef MTRACE
mtreadavar("inode:%x.%x", ip->dev, ip->inum);
mtwriteavar("vmnode:%016x", this);
mtwriteavar("vmnode:%p", this);
#endif
assert(off <= sz);
......@@ -168,7 +168,7 @@ vmnode::loadall()
vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype, vmnode *vmn) :
#if VM_CRANGE
range(&vmap->cr, start, end-start),
range(&vmap->vmas, start, end-start),
#endif
vma_start(start), vma_end(end), va_type(vtype), n(vmn)
{
......@@ -194,10 +194,10 @@ vmap::alloc(void)
vmap::vmap() :
#if VM_CRANGE
cr(10),
vmas(10),
#endif
#if VM_RADIX
rx(PGSHIFT),
vmas(PGSHIFT),
#endif
ref(1), pml4(setupkvm()), kshared((char*) ksalloc(slab_kshared)),
brk_(0)
......@@ -253,12 +253,7 @@ vmap::incref()
bool
vmap::replace_vma(vma *a, vma *b)
{
#if VM_CRANGE
auto span = cr.search_lock(a->vma_start, a->vma_end - a->vma_start);
#endif
#if VM_RADIX
auto span = rx.search_lock(a->vma_start, a->vma_end - a->vma_start);
#endif
auto span = vmas.search_lock(a->vma_start, a->vma_end - a->vma_start);
if (a->deleted())
return false;
for (auto e: span)
......@@ -279,12 +274,11 @@ vmap::copy(int share)
{
vmap *nm = new vmap();
#if VM_CRANGE
for (auto r: cr) {
#endif
#if VM_RADIX
void *last = 0;
for (auto r: rx) {
#endif
for (auto r: vmas) {
#if VM_RADIX
if (!r || r == last)
continue;
last = r;
......@@ -318,12 +312,7 @@ vmap::copy(int share)
ne = new vma(nm, e->vma_start, e->vma_end, PRIVATE, e->n->copy());
}
#if VM_CRANGE
auto span = nm->cr.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
#endif
#if VM_RADIX
auto span = nm->rx.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
#endif
auto span = nm->vmas.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
for (auto x: span) {
#if VM_RADIX
if (!x)
......@@ -367,11 +356,11 @@ vmap::lookup(uptr start, uptr len)
panic("vmap::lookup bad len");
#if VM_CRANGE
auto r = cr.search(start, len);
auto r = vmas.search(start, len);
#endif
#if VM_RADIX
assert(len <= PGSIZE);
auto r = rx.search(start);
auto r = vmas.search(start);
#endif
if (r != 0) {
vma *e = (vma *) r;
......@@ -405,12 +394,7 @@ again:
{
// new scope to release the search lock before tlbflush
u64 len = n->npages * PGSIZE;
#if VM_CRANGE
auto span = cr.search_lock(vma_start, len);
#endif
#if VM_RADIX
auto span = rx.search_lock(vma_start, len);
#endif
auto span = vmas.search_lock(vma_start, len);
for (auto r: span) {
#if VM_RADIX
if (!r)
......@@ -474,12 +458,7 @@ vmap::remove(uptr vma_start, uptr len)
// new scope to release the search lock before tlbflush
uptr vma_end = vma_start + len;
#if VM_CRANGE
auto span = cr.search_lock(vma_start, len);
#endif
#if VM_RADIX
auto span = rx.search_lock(vma_start, len);
#endif
auto span = vmas.search_lock(vma_start, len);
for (auto r: span) {
vma *rvma = (vma*) r;
if (rvma->vma_start < vma_start || rvma->vma_end > vma_end) {
......@@ -627,7 +606,7 @@ vmap::pagefault(uptr va, u32 err)
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_W;
}
mtreadavar("vmnode:%016x", m->n);
mtreadavar("vmnode:%p", m->n);
return 1;
}
......@@ -636,9 +615,8 @@ int
pagefault(vmap *vmap, uptr va, u32 err)
{
#if MTRACE
mt_ascope ascope("%s(%p)", __func__, va);
mtwriteavar("thread:%x", myproc()->pid);
mtwriteavar("page:%p.%016x", vmap, PGROUNDDOWN(va));
mt_ascope ascope("%s(%#lx)", __func__, va);
mtwriteavar("pte:%p.%#lx", vmap, va / PGSIZE);
#endif
for (;;) {
......@@ -679,7 +657,7 @@ vmap::pagelookup(uptr va)
throw_bad_alloc();
char* kptr = (char*)(m->n->page[npg]);
mtreadavar("vmnode:%016x", m->n);
mtreadavar("vmnode:%p", m->n);
return &kptr[va & (PGSIZE-1)];
}
......@@ -687,9 +665,8 @@ void*
pagelookup(vmap* vmap, uptr va)
{
#if MTRACE
mt_ascope ascope("%s(%p)", __func__, va);
mtwriteavar("thread:%x", myproc()->pid);
mtwriteavar("page:%p.%016x", vmap, PGROUNDDOWN(va));
mt_ascope ascope("%s(%#lx)", __func__, va);
mtwriteavar("pte:%p.%#lx", vmap, va / PGSIZE);
#endif
for (;;) {
......@@ -758,17 +735,13 @@ vmap::sbrk(ssize_t n, uptr *addr)
s64 newn = PGROUNDUP(n + curbrk - newstart);
#if VM_CRANGE
range *prev = 0;
auto span = cr.search_lock(newstart, newn + PGSIZE);
#endif
#if VM_RADIX
auto span = rx.search_lock(newstart, newn + PGSIZE);
void *last = 0;
#endif
#if VM_CRANGE
auto span = vmas.search_lock(newstart, newn + PGSIZE);
for (auto r: span) {
#endif
#if VM_RADIX
void *last = 0;
for (auto r: span) {
if (!r || r == last)
continue;
last = r;
......@@ -826,7 +799,7 @@ vmap::unmapped_area(size_t npages)
while (addr < USERTOP) {
#if VM_CRANGE
auto x = cr.search(addr, n);
auto x = vmas.search(addr, n);
if (x == nullptr)
return addr;
vma* a = (vma*) x;
......@@ -836,7 +809,7 @@ vmap::unmapped_area(size_t npages)
#if VM_RADIX
bool overlap = false;
for (uptr ax = addr; ax < addr+n; ax += PGSIZE) {
auto x = rx.search(ax);
auto x = vmas.search(ax);
if (x != nullptr) {
overlap = true;
vma* a = (vma*) x;
......
#include "types.h"
#include "amd64.h"
#include "bits.hh"
#include "kernel.hh"
#include "traps.h"
#define ID 0x802 // ID
#define VER 0x803 // Version
#define TPR 0x808 // Task Priority
#define EOI 0x80b // EOI
#define SVR 0x80f // Spurious Interrupt Vector
#define ENABLE 0x00000100 // Unit Enable
#define ESR 0x828 // Error Status
#define ICR 0x830 // Interrupt Command
#define INIT 0x00000500 // INIT/RESET
#define STARTUP 0x00000600 // Startup IPI
#define BCAST 0x00080000 // Send to all APICs, including self.
#define LEVEL 0x00008000 // Level triggered
// #define DELIVS 0x00001000 // Delivery status
#define ASSERT 0x00004000 // Assert interrupt (vs deassert)
#define TIMER 0x832 // Local Vector Table 0 (TIMER)
#define X1 0x0000000B // divide counts by 1
#define PERIODIC 0x00020000 // Periodic
#define PCINT 0x834 // Performance Counter LVT
#define LINT0 0x835 // Local Vector Table 1 (LINT0)
#define LINT1 0x836 // Local Vector Table 2 (LINT1)
#define ERROR 0x837 // Local Vector Table 3 (ERROR)
#define MASKED 0x00010000 // Interrupt masked
#define MT_NMI 0x00000400 // NMI message type
#define MT_FIX 0x00000000 // Fixed message type
#define TICR 0x838 // Timer Initial Count
#define TCCR 0x839 // Timer Current Count
#define TDCR 0x83e // Timer Divide Configuration
#define IO_RTC 0x70
static u64 x2apichz;
void
x2apicstartap(hwid_t id, u32 addr)
{
int i;
volatile u16 *wrv;
// "The BSP must initialize CMOS shutdown code to 0AH
// and the warm reset vector (DWORD based at 40:67) to point at
// the AP startup code prior to the [universal startup algorithm]."
outb(IO_RTC, 0xF); // offset 0xF is shutdown code
outb(IO_RTC+1, 0x0A);
wrv = (u16*)(0x40<<4 | 0x67); // Warm reset vector
wrv[0] = 0;
wrv[1] = addr >> 4;
// "Universal startup algorithm."
// Send INIT (level-triggered) interrupt to reset other CPU.
writemsr(ICR, (((u64)id.num)<<32) | INIT | LEVEL | ASSERT);
//xapicwait();
microdelay(10000);
writemsr(ICR, (((u64)id.num)<<32) | INIT | LEVEL);
//xapicw(ICRLO, hwid.num |INIT | LEVEL);
//xapicwait();
microdelay(10000); // should be 10ms, but too slow in Bochs!
// Send startup IPI (twice!) to enter bootstrap code.
// Regular hardware is supposed to only accept a STARTUP
// when it is in the halted state due to an INIT. So the second
// should be ignored, but it is part of the official Intel algorithm.
// Bochs complains about the second one. Too bad for Bochs.
for(i = 0; i < 2; i++){
//xapicw(ICRHI, hwid.num<<24);
//xapicw(ICRLO, STARTUP | (addr>>12));
writemsr(ICR, (((u64)id.num)<<32) | STARTUP | (addr>>12));
microdelay(200);
}
}
void
x2apiceoi(void)
{
writemsr(EOI, 0);
}
void
x2apic_tlbflush(hwid_t id)
{
panic("x2apic_tlbflush");
}
void
x2apic_sampconf(hwid_t id)
{
panic("x2apic_sampconf");
}
void
x2apicpc(char mask)
{
writemsr(PCINT, mask ? MASKED : MT_NMI);
}
hwid_t
x2apicid(void)
{
u64 id = readmsr(ID);
return HWID((u32)id);
}
void
initx2apic(void)
{
u64 count;
// Enable local APIC; set spurious interrupt vector.
writemsr(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS));
if (x2apichz == 0) {
// Measure the TICR frequency
writemsr(TDCR, X1);
writemsr(TICR, 0xffffffff);
u64 ccr0 = readmsr(TCCR);
microdelay(10 * 1000); // 1/100th of a second
u64 ccr1 = readmsr(TCCR);
x2apichz = 100 * (ccr0 - ccr1);
}
count = (QUANTUM*x2apichz) / 1000;
if (count > 0xffffffff)
panic("initxapic: QUANTUM too large");
// The timer repeatedly counts down at bus frequency
// from xapic[TICR] and then issues an interrupt.
writemsr(TDCR, X1);
writemsr(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER));
writemsr(TICR, count);
// Disable logical interrupt lines.
writemsr(LINT0, MASKED);
writemsr(LINT1, MASKED);
// Disable performance counter overflow interrupts
// on machines that provide that interrupt entry.
if (((readmsr(VER)>>16) & 0xFF) >= 4)
x2apicpc(0);
// Map error interrupt to IRQ_ERROR.
writemsr(ERROR, T_IRQ0 + IRQ_ERROR);
// Clear error status register (requires back-to-back writes).
writemsr(ESR, 0);
writemsr(ESR, 0);
// Ack any outstanding interrupts.
writemsr(EOI, 0);
// Send an Init Level De-Assert to synchronise arbitration ID's.
writemsr(ICR, BCAST | INIT | LEVEL);
#if 0 // XXX(sbw) now need to poll anymore
while (readmsr(ICR) & DELIVS)
;
#endif
// Enable interrupts on the APIC (but not on the processor).
writemsr(TPR, 0);
}
......@@ -7,6 +7,7 @@
#include "traps.h"
#include "bits.hh"
#include "cpu.hh"
#include "apic.hh"
// Local APIC registers, divided by 4 for use as uint[] indices.
#define ID (0x0020/4) // ID
......@@ -24,7 +25,6 @@
#define DEASSERT 0x00000000
#define LEVEL 0x00008000 // Level triggered
#define BCAST 0x00080000 // Send to all APICs, including self.
#define BUSY 0x00001000
#define FIXED 0x00000000
#define ICRHI (0x0310/4) // Interrupt Command [63:32]
#define TIMER (0x0320/4) // Local Vector Table 0 (TIMER)
......@@ -43,31 +43,31 @@
#define IO_RTC 0x70
static volatile u32 *lapic = (u32 *)(KBASE + 0xfee00000);
static u64 lapichz;
static volatile u32 *xapic = (u32 *)(KBASE + 0xfee00000);
static u64 xapichz;
static void
lapicw(int index, int value)
xapicw(int index, int value)
{
lapic[index] = value;
lapic[ID]; // wait for write to finish, by reading
xapic[index] = value;
xapic[ID]; // wait for write to finish, by reading
}
static u32
lapicr(u32 off)
xapicr(u32 off)
{
return lapic[off];
return xapic[off];
}
static int
lapicwait()
xapicwait()
{
int i = 100000;
while ((lapicr(ICRLO) & BUSY) != 0) {
while ((xapicr(ICRLO) & DELIVS) != 0) {
nop_pause();
i--;
if (i == 0) {
cprintf("lapicwait: wedged?\n");
cprintf("xapicwait: wedged?\n");
return -1;
}
}
......@@ -75,70 +75,70 @@ lapicwait()
}
void
initlapic(void)
initxapic(void)
{
u64 count;
// Enable local APIC; set spurious interrupt vector.
lapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS));
xapicw(SVR, ENABLE | (T_IRQ0 + IRQ_SPURIOUS));
if (lapichz == 0) {
if (xapichz == 0) {
// Measure the TICR frequency
lapicw(TDCR, X1);
lapicw(TICR, 0xffffffff);
u64 ccr0 = lapicr(TCCR);
xapicw(TDCR, X1);
xapicw(TICR, 0xffffffff);
u64 ccr0 = xapicr(TCCR);
microdelay(10 * 1000); // 1/100th of a second
u64 ccr1 = lapicr(TCCR);
lapichz = 100 * (ccr0 - ccr1);
u64 ccr1 = xapicr(TCCR);
xapichz = 100 * (ccr0 - ccr1);
}
count = (QUANTUM*lapichz) / 1000;
count = (QUANTUM*xapichz) / 1000;
if (count > 0xffffffff)
panic("initlapic: QUANTUM too large");
panic("initxapic: QUANTUM too large");
// The timer repeatedly counts down at bus frequency
// from lapic[TICR] and then issues an interrupt.
lapicw(TDCR, X1);
lapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER));
lapicw(TICR, count);
// from xapic[TICR] and then issues an interrupt.
xapicw(TDCR, X1);
xapicw(TIMER, PERIODIC | (T_IRQ0 + IRQ_TIMER));
xapicw(TICR, count);
// Disable logical interrupt lines.
lapicw(LINT0, MASKED);
lapicw(LINT1, MASKED);
xapicw(LINT0, MASKED);
xapicw(LINT1, MASKED);
// Disable performance counter overflow interrupts
// on machines that provide that interrupt entry.
if(((lapic[VER]>>16) & 0xFF) >= 4)
lapicpc(0);
if(((xapic[VER]>>16) & 0xFF) >= 4)
xapicpc(0);
// Map error interrupt to IRQ_ERROR.
lapicw(ERROR, T_IRQ0 + IRQ_ERROR);
xapicw(ERROR, T_IRQ0 + IRQ_ERROR);
// Clear error status register (requires back-to-back writes).
lapicw(ESR, 0);
lapicw(ESR, 0);
xapicw(ESR, 0);
xapicw(ESR, 0);
// Ack any outstanding interrupts.
lapicw(EOI, 0);
xapicw(EOI, 0);
// Send an Init Level De-Assert to synchronise arbitration ID's.
lapicw(ICRHI, 0);
lapicw(ICRLO, BCAST | INIT | LEVEL);
while(lapic[ICRLO] & DELIVS)
xapicw(ICRHI, 0);
xapicw(ICRLO, BCAST | INIT | LEVEL);
while(xapic[ICRLO] & DELIVS)
;
// Enable interrupts on the APIC (but not on the processor).
lapicw(TPR, 0);
xapicw(TPR, 0);
}
void
lapicpc(char mask)
xapicpc(char mask)
{
lapicw(PCINT, mask ? MASKED : MT_NMI);
xapicw(PCINT, mask ? MASKED : MT_NMI);
}
hwid_t
lapicid(void)
xapicid(void)
{
if (readrflags() & FL_IF) {
cli();
......@@ -146,46 +146,45 @@ lapicid(void)
__builtin_return_address(0));
}
if (lapic == nullptr)
panic("lapicid");
return HWID(lapic[ID]>>24);
if (xapic == nullptr)
panic("xapicid");
return HWID(xapic[ID]>>24);
}
// Acknowledge interrupt.
void
lapiceoi(void)
xapiceoi(void)
{
if(lapic)
lapicw(EOI, 0);
if(xapic)
xapicw(EOI, 0);
}
// Send IPI
void
lapic_ipi(hwid_t hwid, int ino)
xapic_ipi(hwid_t hwid, int ino)
{
lapicw(ICRHI, hwid.num << 24);
lapicw(ICRLO, FIXED | DEASSERT | ino);
if (lapicwait() < 0)
panic("lapic_ipi: lapicwait failure");
xapicw(ICRHI, hwid.num << 24);
xapicw(ICRLO, FIXED | DEASSERT | ino);
if (xapicwait() < 0)
panic("xapic_ipi: xapicwait failure");
}
void
lapic_tlbflush(hwid_t hwid)
xapic_tlbflush(hwid_t hwid)
{
lapic_ipi(hwid, T_TLBFLUSH);
xapic_ipi(hwid, T_TLBFLUSH);
}
void
lapic_sampconf(hwid_t hwid)
xapic_sampconf(hwid_t hwid)
{
lapic_ipi(hwid, T_SAMPCONF);
xapic_ipi(hwid, T_SAMPCONF);
}
// Start additional processor running bootstrap code at addr.
// See Appendix B of MultiProcessor Specification.
void
lapicstartap(hwid hwid, u32 addr)
xapicstartap(hwid hwid, u32 addr)
{
int i;
volatile u16 *wrv;
......@@ -201,12 +200,14 @@ lapicstartap(hwid hwid, u32 addr)
// "Universal startup algorithm."
// Send INIT (level-triggered) interrupt to reset other CPU.
lapicw(ICRHI, hwid.num<<24);
lapicw(ICRLO, hwid.num | INIT | LEVEL | ASSERT);
lapicwait();
xapicw(ICRHI, hwid.num<<24);
xapicw(ICRLO, INIT | LEVEL | ASSERT);
xapicwait();
microdelay(10000);
lapicw(ICRLO, hwid.num |INIT | LEVEL);
lapicwait();
xapicw(ICRLO, INIT | LEVEL);
xapicwait();
microdelay(10000); // should be 10ms, but too slow in Bochs!
// Send startup IPI (twice!) to enter bootstrap code.
......@@ -215,8 +216,8 @@ lapicstartap(hwid hwid, u32 addr)
// should be ignored, but it is part of the official Intel algorithm.
// Bochs complains about the second one. Too bad for Bochs.
for(i = 0; i < 2; i++){
lapicw(ICRHI, hwid.num<<24);
lapicw(ICRLO, STARTUP | (addr>>12));
xapicw(ICRHI, hwid.num<<24);
xapicw(ICRLO, STARTUP | (addr>>12));
microdelay(200);
}
}
......@@ -208,6 +208,8 @@ vprintfmt(void (*putch)(int, void*), void *putdat,
case 'o':
num = getuint (ap, lflag);
base = 8;
if (altflag && num)
putch ('0', putdat);
goto number;
// pointer
......@@ -223,6 +225,10 @@ vprintfmt(void (*putch)(int, void*), void *putdat,
case 'x':
num = getuint (ap, lflag);
base = 16;
if (altflag && num) {
putch ('0', putdat);
putch ('x', putdat);
}
number:
printnum (putch, putdat, num, base, MAX(width, 0), padc);
break;
......
......@@ -4,6 +4,8 @@
#include "lib.h"
#include "percpu.hh"
#include <sys/mman.h>
#define WQCHUNKSZ 8192
#define WQBLOCKSZ 128
static_assert(WQCHUNKSZ%WQBLOCKSZ == 0, "Bad sizes");
......@@ -17,11 +19,11 @@ percpu<wqblock*> block;
static bool
refill(void)
{
long r = map(0, WQCHUNKSZ);
if (r < 0)
void *r = mmap(0, WQCHUNKSZ, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (r == MAP_FAILED)
return false;
for (uptr p = r; p < r+WQCHUNKSZ; p += WQBLOCKSZ) {
for (char *p = (char*)r; p < (char*)r+WQCHUNKSZ; p += WQBLOCKSZ) {
wqblock* n = (wqblock*)p;
n->next = *block;
*block = n;
......
......@@ -27,8 +27,11 @@ wq::wq(void)
{
int i;
for (i = 0; i < NCPU; i++)
for (i = 0; i < NCPU; i++) {
q_[i].head = 0;
q_[i].tail = 0;
wqlock_init(&q_[i].lock);
}
#if defined(XV6_USER)
ipc_ = allocipc();
......
......@@ -5,7 +5,7 @@
#include "wq.hh"
#include "atomic.hh"
#include "pthread.h"
#include "elf.hh"
#include "memlayout.h"
u64 wq_maxworkers = NWORKERS;
......@@ -28,7 +28,20 @@ initworker(void)
assert(wqwait() == 0);
}
}
DEFINE_XV6_ADDRNOTE(xnote, XV6_ADDR_ID_WQ, &initworker);
uwq_ipcbuf*
allocipc(void)
{
static bool alloced;
if (alloced)
die("allocklen: allocing more than once");
if (sizeof(uwq_ipcbuf) > USERWQSIZE)
die("allocipc: too large");
if (wqinit((uptr)initworker) < 0)
die("wqinit: failed");
alloced = true;
return (uwq_ipcbuf*)USERWQ;
}
int
mycpuid(void)
......
......@@ -8,6 +8,7 @@
#define NDEV 10 // maximum major device number
#define ROOTDEV 1 // device number of file system root disk
#define MAXARG 32 // max exec arguments
#define MAXARGLEN 64 // max exec argument length
#define MAXNAME 16 // max string names
#define NEPOCH 4
#define CACHELINE 64 // cache line size
......@@ -23,7 +24,7 @@
#define USERWQSIZE (1 << 14)
#define USTACKPAGES 8
#define WQSHIFT 7
#define CILKENABLE 0
#define EXECSWITCH 1
#if defined(HW_qemu)
#define NCPU 8 // maximum number of CPUs
#define MTRACE 0
......@@ -38,9 +39,15 @@
#define MTRACE 0
#define PERFSIZE (512<<20ull)
#elif defined(HW_tom)
#define DEBUG 0
#define NCPU 48 // maximum number of CPUs
#define MTRACE 0
#define PERFSIZE (1<<20ull)
#elif defined(HW_ben)
#define DEBUG 0
#define NCPU 80 // maximum number of CPUs
#define MTRACE 0
#define PERFSIZE (1<<20ull)
#elif defined(HW_user)
#define NCPU 256
#define MTRACE 0
......@@ -49,6 +56,10 @@
#define NCPU 2
#define MTRACE 0
#define PERFSIZE (16<<20ull)
#elif defined(HW_usched)
#define NCPU 2
#define MTRACE 0
#define PERFSIZE (16<<20ull)
#else
#error "Unknown HW"
#endif
......
#pragma once
#include <uk/mman.h>
// -*- c++ -*-
#pragma once
namespace std {
template<class T>
struct remove_reference
{ typedef T type; };
template<class T>
struct remove_reference<T&>
{ typedef T type; };
template<class T>
struct remove_reference<T&&>
{ typedef T type; };
}
// User/kernel shared mmap definitions
#pragma once
#define PROT_NONE 0x0
#define PROT_READ 0x1
#define PROT_WRITE 0x2
#define PROT_EXEC 0x4
#define MAP_SHARED 0x1
#define MAP_PRIVATE 0x2
#define MAP_FIXED 0x4
#define MAP_ANONYMOUS 0x8
#define MAP_FAILED ((void*)-1)
// -*- c++ -*-
#pragma once
#include <type_traits>
namespace std {
template<class T>
typename remove_reference<T>::type&&
move(T&& a)
{
return static_cast<typename remove_reference<T>::type&&>(a);
}
template<class A, class B>
struct pair {
typedef A first_type;
typedef B second_type;
A first;
B second;
pair(const pair&) = default;
pair(pair&&) = default;
constexpr pair() : first(), second() {}
pair(const A &a, const B &b) : first(a), second(b) {}
bool operator==(const pair<A, B> &other) {
return first == other.first && second == other.second;
}
};
template<class A, class B>
pair<A, B>
make_pair(const A &a, const B &b)
{
return pair<A, B>(a, b);
}
}
......@@ -34,13 +34,13 @@ def main():
", ".join(syscall.kargs))
print
print "u64 (*syscalls[])(u64, u64, u64, u64, u64) = {"
print "u64 (*syscalls[])(u64, u64, u64, u64, u64, u64) = {"
bynum = dict((s.num, s) for s in syscalls)
for num in range(max(bynum.keys()) + 1):
if num not in bynum:
print " nullptr,"
else:
print " (u64(*)(u64,u64,u64,u64,u64))%s," % bynum[num].kname
print " (u64(*)(u64,u64,u64,u64,u64,u64))%s," % bynum[num].kname
print "};"
print
print "extern const int nsyscalls = %d;" % (max(bynum.keys()) + 1)
......
NCXXFLAGS = -g -static -MD -m64 -O3 -Wall -Werror -DHW_$(HW) \
-fno-builtin -fno-strict-aliasing -fno-omit-frame-pointer \
-fms-extensions -mcx16 -mno-red-zone -std=c++0x \
-Wno-sign-compare -fno-exceptions -fno-rtti -fcheck-new \
-iquote . -iquote include \
-include param.h -include include/compiler.h
NCFLAGS = -g -static -MD -m64 -O3 -Wall -Werror -DHW_$(HW) \
-fno-builtin -fno-strict-aliasing -fno-omit-frame-pointer \
-fms-extensions -mcx16 -mno-red-zone -std=c99 \
-Wno-sign-compare -fno-exceptions \
-iquote . -iquote include \
-include param.h -include include/compiler.h
$(O)/user/%.o: user/%.c
@echo " CC $@"
$(Q)mkdir -p $(@D)
$(Q)$(CC) -DLINUX $(NCFLAGS) -c -o $@ $<
$(O)/user/%.o: user/%.cc
@echo " CXX $@"
$(Q)mkdir -p $(@D)
$(Q)$(CXX) -DLINUX $(NCXXFLAGS) -c -o $@ $<
$(O)/%: $(O)/user/%.o
@echo " LD $@"
$(Q)mkdir -p $(@D)
$(Q)$(CXX) -o $@ $^ -lpthread -ljemalloc
.PRECIOUS: $(O)/user/%.o
-include $(O)/user/*.d
ALL := $(O)/usched
// export LD_PRELOAD="/usr/lib/libjemalloc.so.1"
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <signal.h>
#include <unistd.h>
#include <atomic>
#include <pthread.h>
typedef uint64_t u64;
#include "util.h"
#include "percpu.hh"
#include "sched.hh"
#include "queue.h"
#include "uscopedperf.hh"
#include "intelctr.hh"
static auto perfgroup = ctrgroup(&intelctr::tsc);
static int nprocs = NCPU;
static int the_time = 5;
static uint64_t start;
static volatile int go;
static __thread unsigned myid_;
percpu<uint64_t> ops;
struct proc : public sched_link
{
char x[256];
LIST_ENTRY(proc) link;
};
struct stuff {
bool flipper;
uint32_t seed;
LIST_HEAD(proc_list, proc) proc_list;
uint32_t rnd() {
return ::rnd(&seed);
}
proc* allocproc() {
proc* p = (proc*) malloc(sizeof(proc));
return p;
}
void freeproc(proc* p) {
free(p);
}
void
fillproc(void)
{
}
};
percpu<stuff> stuff_;
class schedule
{
public:
schedule();
void enq(proc* entry);
proc* deq();
proc* steal(bool nonexec);
void dump();
sched_stat stats_;
u64 ncansteal_;
volatile bool cansteal_ __mpalign__;
private:
pthread_spinlock_t lock_;
sched_link head_ __mpalign__;;
};
percpu<schedule> schedule_;
int
mycpuid(void)
{
return myid_;
}
static bool
cansteal(proc* p, bool nonexec)
{
return true;
}
static void
sighandler(int x)
{
double tot;
uint64_t stop;
go = 2;
stop = read_tsc();
tot = 0;
for (int i = 0; i < NCPU; i++) {
tot += ops[i];
printf(" %lu %lu\n", ops[i], schedule_[i].stats_.steals);
}
printf("%f\n", (stop-start)/(tot/NCPU));
}
schedule::schedule(void)
{
head_.next = &head_;
head_.prev = &head_;
ncansteal_ = 0;
cansteal_ = false;
pthread_spin_init(&lock_, 0);
}
void
schedule::enq(proc* p)
{
sched_link* entry = p;
// Add to tail
pthread_spin_lock(&lock_);
entry->next = &head_;
entry->prev = head_.prev;
head_.prev->next = entry;
head_.prev = entry;
if (cansteal((proc*)entry, true))
if (ncansteal_++ == 0)
cansteal_ = true;
stats_.enqs++;
pthread_spin_unlock(&lock_);
}
proc*
schedule::deq(void)
{
ANON_REGION("deq", &perfgroup);
if (head_.next == &head_)
return nullptr;
// Remove from head
pthread_spin_lock(&lock_);
sched_link* entry = head_.next;
if (entry == &head_) {
pthread_spin_unlock(&lock_);
return nullptr;
}
entry->next->prev = entry->prev;
entry->prev->next = entry->next;
if (cansteal((proc*)entry, true))
if (--ncansteal_ == 0)
cansteal_ = false;
stats_.deqs++;
pthread_spin_unlock(&lock_);
return (proc*)entry;
}
proc*
schedule::steal(bool nonexec)
{
if (!cansteal_ || pthread_spin_trylock(&lock_))
return nullptr;
for (sched_link* ptr = head_.next; ptr != &head_; ptr = ptr->next)
if (cansteal((proc*)ptr, nonexec)) {
ptr->next->prev = ptr->prev;
ptr->prev->next = ptr->next;
if (--ncansteal_ == 0)
cansteal_ = false;
++stats_.steals;
pthread_spin_unlock(&lock_);
return (proc*)ptr;
}
pthread_spin_unlock(&lock_);
++stats_.misses;
return nullptr;
}
static void
runit(proc* p)
{
p->x[0] += myid_;
p->x[128] += myid_;
p->x[64] += myid_;
p->x[192] += myid_;
spin_delay(500);
}
#if 1
static proc*
stealit(void)
{
proc* p;
int r = stuff_->rnd();
ANON_REGION("stealit", &perfgroup);
for (int i = 0; i < NCPU; i++) {
int k = (r+i)%NCPU;
if (k == myid_)
continue;
p = schedule_[k].steal(true);
if (p) {
return p;
}
}
return nullptr;
}
#else
static proc*
stealit(void)
{
proc* p;
ANON_REGION("stealit", &perfgroup);
for (int i = 0; i < 2; i++) {
int k = i+myid_-(myid_%2);
if (k == myid_)
continue;
p = schedule_[k].steal(true);
if (p) {
return p;
}
}
return nullptr;
}
#endif
static void
schedit(void)
{
uint32_t r;
proc* p;
{
ANON_REGION("schedit", &perfgroup);
p = schedule_->deq();
if (p == nullptr)
p = stealit();
}
r = stuff_->rnd() % 100;
if (p) {
runit(p);
(*ops)++;
if (r < 10 && (myid_%2) == 0) {
stuff_->freeproc(p);
}
else
schedule_->enq(p);
}
if (r > 90 && (myid_%2) == 1) {
schedule_->enq(stuff_->allocproc());
}
}
static void*
worker(void* x)
{
myid_ = (long)x;
setaffinity(myid_);
stuff_->seed = myid_+getpid();
stuff_->fillproc();
if (myid_ == 0) {
for (int i = 0; i < nprocs; i++)
schedule_->enq(stuff_->allocproc());
if (signal(SIGALRM, sighandler) == SIG_ERR)
edie("signal failed\n");
alarm(the_time);
start = read_tsc();
go = 1;
} else {
while (go == 0)
;
}
while (go == 1) {
schedit();
}
return nullptr;
}
int
main(int ac, char** av)
{
myid_ = 0;
setaffinity(myid_);
for (int i = 1; i < NCPU; i++) {
pthread_t th;
if (pthread_create(&th, nullptr, worker, (void*)(long)i) < 0)
edie("pthread_create");
}
sleep(1);
worker((void*)0);
scopedperf::perfsum_base::printall();
return 0;
}
......@@ -39,3 +39,38 @@ setaffinity(int c)
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) < 0)
edie("setaffinity, sched_setaffinity failed");
}
static inline uint64_t
read_tsc(void)
{
uint64_t a, d;
__asm __volatile("rdtsc" : "=a" (a), "=d" (d));
return ((uint64_t) a) | (((uint64_t) d) << 32);
}
static inline void
rep_nop(void)
{
__asm __volatile("rep; nop" ::: "memory");
}
static inline void
cpu_relax(void)
{
rep_nop();
}
static inline void
spin_delay(uint64_t cycles)
{
uint64_t s = read_tsc();
while ((read_tsc() - s ) < cycles)
cpu_relax();
}
static inline uint32_t
rnd(uint32_t *seed)
{
*seed = *seed * 1103515245 + 12345;
return *seed & 0x7fffffff;
}
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论