提交 481e807f 创建 作者: Silas Boyd-Wickizer's avatar Silas Boyd-Wickizer

Merge branch 'scale-amd64' of git+ssh://amsterdam.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

......@@ -8,11 +8,23 @@
#include <sys/mman.h>
#include <utility>
char buf[2048];
char name[3];
const char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 };
int stdout = 1;
// Random number generator for randomized tests
static u64 rseed;
u64
rnd(void)
{
rseed = rseed * 6364136223846793005 + 1442695040888963407;
return rseed;
}
// simple file system tests
void
......@@ -1741,6 +1753,91 @@ unmappedtest(void)
printf("unmappedtest ok\n");
}
bool
test_fault(char *p)
{
int fds[2], pid;
char buf = 0;
if (pipe(fds) != 0)
die("test_fault: pipe failed");
if ((pid = fork(0)) < 0)
die("test_fault: fork failed");
if (pid == 0) {
close(fds[0]);
*p = 0x42;
if (write(fds[1], &buf, 1) != 1)
die("test_fault: write failed");
exit();
}
close(fds[1]);
bool faulted = (read(fds[0], &buf, 1) < 1);
wait();
close(fds[0]);
return faulted;
}
void
vmoverlap(void)
{
printf("vmoverlap\n");
char *base = (char*)0x1000;
char map[10] = {};
int mapn = 1;
rseed = 0;
for (int i = 0; i < 100; i++) {
int op = i % 20 >= 10;
int lo = rnd() % 10, hi = rnd() % 10;
if (lo > hi)
std::swap(lo, hi);
if (lo == hi)
continue;
if (op == 0) {
// Map
void *res = mmap(base + lo * 4096, (hi-lo) * 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
if (res == MAP_FAILED)
die("vmoverlap: mmap failed");
} else {
// Unmap
int res = munmap(base + lo * 4096, (hi-lo) * 4096);
if (res < 0)
die("vmoverlap: munmap failed");
}
for (int i = lo; i < hi; i++) {
if (op == 0) {
// Check that it zeroed the range
if (base[i*4096] != 0)
die("did not zero mapped-over region");
// Fill it in
base[i*4096] = mapn;
// Update the expected mapping
map[i] = mapn;
} else {
// Update the expected mapping
map[i] = 0;
}
}
// Check entire mapping
for (int i = 0; i < sizeof(map)/sizeof(map[0]); i++) {
if (map[i] && base[i*4096] != map[i])
die("page outside of mapped-over region changed");
else if (!map[i] && !test_fault(&base[i*4096]))
die("expected fault");
}
}
munmap(base, 10 * 4096);
printf("vmoverlap ok\n");
}
static int nenabled;
static char **enabled;
......@@ -1782,6 +1879,7 @@ main(int argc, char *argv[])
// we should be able to grow a user process to consume all phys mem
TEST(unmappedtest);
TEST(vmoverlap);
TEST(validatetest);
......
......@@ -231,13 +231,18 @@ struct radix_iterator {
if (k_ != key_limit_)
prime_path();
}
radix_iterator() = default;
radix_iterator(const radix_iterator &o) = default;
radix_iterator(radix_iterator &&o) = default;
// Move to the next non-null entry in the collection, or end.
radix_iterator &operator++() {
assert(k_ < key_limit_);
advance();
return *this;
}
radix_elem* operator*() {
radix_elem* operator*() const {
return path_[level_]->load().elem();
}
......@@ -245,10 +250,28 @@ struct radix_iterator {
// If the current element is non-null, does nothing.
void skip_nulls()
{
if (path_[level_]->load().is_null())
if (!**this)
++(*this);
}
// Return the key of the iterator's current element.
u64 key() const
{
return k_ << r_->shift_;
}
// Return the span of the key space occupied by the iterator's
// current element.
u64 span() const
{
return (u64)1 << (bits_per_level * level_ + r_->shift_);
}
// Return an iterator that points to the next element that is not
// equal to the current element. If no such element exists, returns
// end. Note that this element may be null.
radix_iterator next_change() const;
// Compare equality on just the key.
bool operator==(const radix_iterator &other) {
return r_ == other.r_ && k_ == other.k_; }
......@@ -267,9 +290,9 @@ private:
// Prepare the initial path_ and level_ based on k_.
void prime_path();
// Advance to the next non-null leaf. This assumes that
// k_ < key_limit_.
void advance();
// Advance to the next leaf. If skip_nulls is true, advances to the
// next non-null leaf. This assumes that k_ < key_limit_.
void advance(bool skip_nulls = true);
};
inline radix_iterator
......
......@@ -231,6 +231,9 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
assert(start >= start_);
assert(start + size <= start_ + size_);
// XXX(austin) We will deadlock with ourselves if we try to replace
// a range and the replaced range is on a different level than the
// locked range (because this update_range will try to push_down).
dprintf("%p: replace: [%lx, %lx) with %p\n", r_, start, start + size, val);
update_range(r_->root_.load(), &r_->root_, [val](radix_entry cur, radix_ptr *ptr) -> radix_entry {
do {
......@@ -246,6 +249,17 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
}, 0, 1L << key_bits, start, start + size);
}
radix_iterator
radix_iterator::next_change() const
{
radix_elem *cur = **this;
radix_iterator next(*this);
do {
next.advance(false);
} while (next.k_ < next.key_limit_ && *next == cur);
return next;
}
void
radix_iterator::prime_path()
{
......@@ -268,7 +282,7 @@ radix_iterator::prime_path()
}
void
radix_iterator::advance()
radix_iterator::advance(bool skip_nulls)
{
while (true) {
// As long as we haven't reached our limit or an element, advance
......@@ -296,8 +310,8 @@ radix_iterator::advance()
level_--;
}
// Did we reach a non-null leaf?
if (!entry.is_null())
// Did we reach a non-null leaf? (Or do we not care?)
if (!skip_nulls || !entry.is_null())
return;
}
}
......@@ -174,6 +174,7 @@ vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype, vmnode *vmn) :
{
assert(PGOFFSET(start) == 0);
assert(PGOFFSET(end) == 0);
assert(!vmn || end - start == vmn->npages << PGSHIFT);
if (n)
n->incref();
}
......@@ -255,18 +256,23 @@ vmap::incref()
bool
vmap::replace_vma(vma *a, vma *b)
{
assert(a->vma_start == b->vma_start);
assert(a->vma_end == b->vma_end);
auto span = vmas.search_lock(a->vma_start, a->vma_end - a->vma_start);
if (a->deleted())
return false;
#if VM_CRANGE
for (auto e: span)
assert(a == e);
#if VM_CRANGE
span.replace(b);
#endif
#if VM_RADIX
span.replace(a->vma_start, b->vma_start-a->vma_start, 0);
span.replace(b->vma_start, b->vma_end-b->vma_start, b);
span.replace(b->vma_end, a->vma_end-b->vma_end, 0);
for (auto it = span.begin(); it != span.end(); ++it) {
if (static_cast<vma*>(*it) == a)
// XXX(austin) replace should take iterators to represent the
// span so we don't have to find the keys all over again.
span.replace(it.key(), it.span(), b);
}
#endif
return true;
}
......@@ -277,28 +283,40 @@ vmap::copy(int share)
vmap *nm = new vmap();
#if VM_RADIX
void *last = 0;
radix::iterator next_it;
for (auto it = vmas.begin(); it != vmas.end(); it = next_it, it.skip_nulls()) {
next_it = it.next_change();
u64 range_start = it.key();
u64 range_end = next_it.key();
vma *e = static_cast<vma*>(*it);
#endif
#if 0
} // Ugh. Un-confuse IDE indentation.
#endif
#if VM_CRANGE
for (auto r: vmas) {
#if VM_RADIX
if (!r || r == last)
continue;
last = r;
vma *e = static_cast<vma *>(r);
u64 range_start = e->vma_start;
u64 range_end = e->vma_end;
#endif
vma *e = (vma *) r;
u64 range_size = range_end - range_start;
struct vma *ne;
if (share) {
// Because of the pages array, the new vma needs to have the
// same start and end, even if that's not where it ends up in
// the index.
ne = new vma(nm, e->vma_start, e->vma_end, COW, e->n);
// if the original vma wasn't COW, replace it with a COW vma
if (e->va_type != COW) {
vma *repl = new vma(this, e->vma_start, e->vma_end, COW, e->n);
replace_vma(e, repl);
#if VM_RADIX
last = repl;
vmas.search_lock(range_start, range_size).replace(range_start, range_size, repl);
#elif VM_CRANGE
replace_vma(e, repl);
#endif
updatepages(pml4, e->vma_start, e->vma_end, [](atomic<pme_t>* p) {
updatepages(pml4, range_start, range_end, [](atomic<pme_t>* p) {
for (;;) {
pme_t v = p->load();
if (v & PTE_LOCK)
......@@ -314,7 +332,7 @@ vmap::copy(int share)
ne = new vma(nm, e->vma_start, e->vma_end, PRIVATE, e->n->copy());
}
auto span = nm->vmas.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
auto span = nm->vmas.search_lock(range_start, range_size);
for (auto x: span) {
#if VM_RADIX
if (!x)
......@@ -328,7 +346,7 @@ vmap::copy(int share)
span.replace(ne);
#endif
#if VM_RADIX
span.replace(ne->vma_start, ne->vma_end-ne->vma_start, ne);
span.replace(range_start, range_size, ne);
#endif
}
......@@ -397,11 +415,9 @@ again:
// new scope to release the search lock before tlbflush
u64 len = n->npages * PGSIZE;
auto span = vmas.search_lock(vma_start, len);
#if VM_CRANGE
// XXX handle overlaps, set replaced=true
for (auto r: span) {
#if VM_RADIX
if (!r)
continue;
#endif
if (!fixed)
goto again;
......@@ -410,8 +426,27 @@ again:
rvma, rvma->vma_start, rvma->vma_end);
return -1;
}
// XXX handle overlaps, set replaced=true
#endif
#if VM_RADIX
// XXX(austin) span.replace also has to do this scan. It would be
// nice if we could do just one scan.
for (auto r: span) {
if (!r)
continue;
if (!fixed)
goto again;
else {
// XXX(austin) I don't think anything prevents a page fault
// from reading the old VMA now and installing the new page
// for the old VMA after the updatepages. Certainly not
// PTE_LOCK, since we don't take that here. Why not just use
// the lock in the radix tree? (We can't do that with crange,
// though, since it can only lock complete ranges.)
replaced = true;
break;
}
}
#endif
e = new vma(this, vma_start, vma_start+len, PRIVATE, n);
if (e == 0) {
......@@ -432,6 +467,11 @@ again:
updatepages(pml4, e->vma_start, e->vma_end, [&needtlb](atomic<pme_t> *p) {
for (;;) {
pme_t v = p->load();
// XXX(austin) Huh? Why is it okay to skip it if it's
// locked? The page fault could be faulting in a page from
// the old VMA, in which case we need to shoot it down
// (though if it's already faulting a page from the new VMA,
// we need to *not* shoot it down).
if (v & PTE_LOCK)
continue;
if (!(v & PTE_P))
......@@ -458,9 +498,11 @@ vmap::remove(uptr vma_start, uptr len)
{
{
// new scope to release the search lock before tlbflush
uptr vma_end = vma_start + len;
auto span = vmas.search_lock(vma_start, len);
#if VM_CRANGE
// XXX handle partial unmap
uptr vma_end = vma_start + len;
for (auto r: span) {
vma *rvma = (vma*) r;
if (rvma->vma_start < vma_start || rvma->vma_end > vma_end) {
......@@ -469,13 +511,14 @@ vmap::remove(uptr vma_start, uptr len)
return -1;
}
}
// XXX handle partial unmap
#endif
#if VM_CRANGE
span.replace(0);
#endif
#if VM_RADIX
// XXX(austin) If this could tell us that nothing was replaced, we
// could skip the updatepages.
span.replace(vma_start, len, 0);
#endif
}
......@@ -518,8 +561,20 @@ vmap::pagefault_wcow(vma *m)
vma *repl = new vma(this, m->vma_start, m->vma_end, PRIVATE, nodecopy);
// XXX(austin) This will cause sharing on parts of this range that
// have since been unmapped or replaced. But in our current design
// where we need a new vmnode we have to replace all instances of it
// at once or we'll end up with a complete vmnode copy for each page
// we fault on. If we replace it all at once, this will waste time
// and space copying pages that are no longer mapped, but will only
// do that once. Fixing this requires getting rid of the vmnode.
replace_vma(m, repl);
updatepages(pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) {
// XXX(austin) In radix, this may clear PTEs belonging to other
// VMAs that have replaced sub-ranges of the faulting VMA.
// That's unfortunate but okay because we'll just bring them
// back from the pages array. Yet another consequence of having
// to do a vmnode at a time.
for (;;) {
pme_t v = p->load();
if (v & PTE_LOCK)
......
......@@ -12,6 +12,23 @@ namespace std {
return static_cast<typename remove_reference<T>::type&&>(a);
}
template<class T>
void
swap(T& a, T& b)
{
T tmp = move(a);
a = move(b);
b = move(tmp);
}
template<class T, size_t N>
void
swap(T (&a)[N], T (&b)[N])
{
for (size_t n = 0; n < N; n++)
swap(a[n], b[n]);
}
template<class A, class B>
struct pair {
typedef A first_type;
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论