提交 481e807f 创建 作者: Silas Boyd-Wickizer's avatar Silas Boyd-Wickizer

Merge branch 'scale-amd64' of git+ssh://amsterdam.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

...@@ -8,11 +8,23 @@ ...@@ -8,11 +8,23 @@
#include <sys/mman.h> #include <sys/mman.h>
#include <utility>
char buf[2048]; char buf[2048];
char name[3]; char name[3];
const char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 }; const char *echoargv[] = { "echo", "ALL", "TESTS", "PASSED", 0 };
int stdout = 1; int stdout = 1;
// Random number generator for randomized tests
static u64 rseed;
u64
rnd(void)
{
rseed = rseed * 6364136223846793005 + 1442695040888963407;
return rseed;
}
// simple file system tests // simple file system tests
void void
...@@ -1741,6 +1753,91 @@ unmappedtest(void) ...@@ -1741,6 +1753,91 @@ unmappedtest(void)
printf("unmappedtest ok\n"); printf("unmappedtest ok\n");
} }
bool
test_fault(char *p)
{
int fds[2], pid;
char buf = 0;
if (pipe(fds) != 0)
die("test_fault: pipe failed");
if ((pid = fork(0)) < 0)
die("test_fault: fork failed");
if (pid == 0) {
close(fds[0]);
*p = 0x42;
if (write(fds[1], &buf, 1) != 1)
die("test_fault: write failed");
exit();
}
close(fds[1]);
bool faulted = (read(fds[0], &buf, 1) < 1);
wait();
close(fds[0]);
return faulted;
}
void
vmoverlap(void)
{
printf("vmoverlap\n");
char *base = (char*)0x1000;
char map[10] = {};
int mapn = 1;
rseed = 0;
for (int i = 0; i < 100; i++) {
int op = i % 20 >= 10;
int lo = rnd() % 10, hi = rnd() % 10;
if (lo > hi)
std::swap(lo, hi);
if (lo == hi)
continue;
if (op == 0) {
// Map
void *res = mmap(base + lo * 4096, (hi-lo) * 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
if (res == MAP_FAILED)
die("vmoverlap: mmap failed");
} else {
// Unmap
int res = munmap(base + lo * 4096, (hi-lo) * 4096);
if (res < 0)
die("vmoverlap: munmap failed");
}
for (int i = lo; i < hi; i++) {
if (op == 0) {
// Check that it zeroed the range
if (base[i*4096] != 0)
die("did not zero mapped-over region");
// Fill it in
base[i*4096] = mapn;
// Update the expected mapping
map[i] = mapn;
} else {
// Update the expected mapping
map[i] = 0;
}
}
// Check entire mapping
for (int i = 0; i < sizeof(map)/sizeof(map[0]); i++) {
if (map[i] && base[i*4096] != map[i])
die("page outside of mapped-over region changed");
else if (!map[i] && !test_fault(&base[i*4096]))
die("expected fault");
}
}
munmap(base, 10 * 4096);
printf("vmoverlap ok\n");
}
static int nenabled; static int nenabled;
static char **enabled; static char **enabled;
...@@ -1782,6 +1879,7 @@ main(int argc, char *argv[]) ...@@ -1782,6 +1879,7 @@ main(int argc, char *argv[])
// we should be able to grow a user process to consume all phys mem // we should be able to grow a user process to consume all phys mem
TEST(unmappedtest); TEST(unmappedtest);
TEST(vmoverlap);
TEST(validatetest); TEST(validatetest);
......
...@@ -231,13 +231,18 @@ struct radix_iterator { ...@@ -231,13 +231,18 @@ struct radix_iterator {
if (k_ != key_limit_) if (k_ != key_limit_)
prime_path(); prime_path();
} }
radix_iterator() = default;
radix_iterator(const radix_iterator &o) = default;
radix_iterator(radix_iterator &&o) = default;
// Move to the next non-null entry in the collection, or end.
radix_iterator &operator++() { radix_iterator &operator++() {
assert(k_ < key_limit_); assert(k_ < key_limit_);
advance(); advance();
return *this; return *this;
} }
radix_elem* operator*() {
radix_elem* operator*() const {
return path_[level_]->load().elem(); return path_[level_]->load().elem();
} }
...@@ -245,10 +250,28 @@ struct radix_iterator { ...@@ -245,10 +250,28 @@ struct radix_iterator {
// If the current element is non-null, does nothing. // If the current element is non-null, does nothing.
void skip_nulls() void skip_nulls()
{ {
if (path_[level_]->load().is_null()) if (!**this)
++(*this); ++(*this);
} }
// Return the key of the iterator's current element.
u64 key() const
{
return k_ << r_->shift_;
}
// Return the span of the key space occupied by the iterator's
// current element.
u64 span() const
{
return (u64)1 << (bits_per_level * level_ + r_->shift_);
}
// Return an iterator that points to the next element that is not
// equal to the current element. If no such element exists, returns
// end. Note that this element may be null.
radix_iterator next_change() const;
// Compare equality on just the key. // Compare equality on just the key.
bool operator==(const radix_iterator &other) { bool operator==(const radix_iterator &other) {
return r_ == other.r_ && k_ == other.k_; } return r_ == other.r_ && k_ == other.k_; }
...@@ -267,9 +290,9 @@ private: ...@@ -267,9 +290,9 @@ private:
// Prepare the initial path_ and level_ based on k_. // Prepare the initial path_ and level_ based on k_.
void prime_path(); void prime_path();
// Advance to the next non-null leaf. This assumes that // Advance to the next leaf. If skip_nulls is true, advances to the
// k_ < key_limit_. // next non-null leaf. This assumes that k_ < key_limit_.
void advance(); void advance(bool skip_nulls = true);
}; };
inline radix_iterator inline radix_iterator
......
...@@ -231,6 +231,9 @@ radix_range::replace(u64 start, u64 size, radix_elem *val) ...@@ -231,6 +231,9 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
assert(start >= start_); assert(start >= start_);
assert(start + size <= start_ + size_); assert(start + size <= start_ + size_);
// XXX(austin) We will deadlock with ourselves if we try to replace
// a range and the replaced range is on a different level than the
// locked range (because this update_range will try to push_down).
dprintf("%p: replace: [%lx, %lx) with %p\n", r_, start, start + size, val); dprintf("%p: replace: [%lx, %lx) with %p\n", r_, start, start + size, val);
update_range(r_->root_.load(), &r_->root_, [val](radix_entry cur, radix_ptr *ptr) -> radix_entry { update_range(r_->root_.load(), &r_->root_, [val](radix_entry cur, radix_ptr *ptr) -> radix_entry {
do { do {
...@@ -246,6 +249,17 @@ radix_range::replace(u64 start, u64 size, radix_elem *val) ...@@ -246,6 +249,17 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
}, 0, 1L << key_bits, start, start + size); }, 0, 1L << key_bits, start, start + size);
} }
radix_iterator
radix_iterator::next_change() const
{
radix_elem *cur = **this;
radix_iterator next(*this);
do {
next.advance(false);
} while (next.k_ < next.key_limit_ && *next == cur);
return next;
}
void void
radix_iterator::prime_path() radix_iterator::prime_path()
{ {
...@@ -268,7 +282,7 @@ radix_iterator::prime_path() ...@@ -268,7 +282,7 @@ radix_iterator::prime_path()
} }
void void
radix_iterator::advance() radix_iterator::advance(bool skip_nulls)
{ {
while (true) { while (true) {
// As long as we haven't reached our limit or an element, advance // As long as we haven't reached our limit or an element, advance
...@@ -296,8 +310,8 @@ radix_iterator::advance() ...@@ -296,8 +310,8 @@ radix_iterator::advance()
level_--; level_--;
} }
// Did we reach a non-null leaf? // Did we reach a non-null leaf? (Or do we not care?)
if (!entry.is_null()) if (!skip_nulls || !entry.is_null())
return; return;
} }
} }
...@@ -174,6 +174,7 @@ vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype, vmnode *vmn) : ...@@ -174,6 +174,7 @@ vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype, vmnode *vmn) :
{ {
assert(PGOFFSET(start) == 0); assert(PGOFFSET(start) == 0);
assert(PGOFFSET(end) == 0); assert(PGOFFSET(end) == 0);
assert(!vmn || end - start == vmn->npages << PGSHIFT);
if (n) if (n)
n->incref(); n->incref();
} }
...@@ -255,18 +256,23 @@ vmap::incref() ...@@ -255,18 +256,23 @@ vmap::incref()
bool bool
vmap::replace_vma(vma *a, vma *b) vmap::replace_vma(vma *a, vma *b)
{ {
assert(a->vma_start == b->vma_start);
assert(a->vma_end == b->vma_end);
auto span = vmas.search_lock(a->vma_start, a->vma_end - a->vma_start); auto span = vmas.search_lock(a->vma_start, a->vma_end - a->vma_start);
if (a->deleted()) if (a->deleted())
return false; return false;
#if VM_CRANGE
for (auto e: span) for (auto e: span)
assert(a == e); assert(a == e);
#if VM_CRANGE
span.replace(b); span.replace(b);
#endif #endif
#if VM_RADIX #if VM_RADIX
span.replace(a->vma_start, b->vma_start-a->vma_start, 0); for (auto it = span.begin(); it != span.end(); ++it) {
span.replace(b->vma_start, b->vma_end-b->vma_start, b); if (static_cast<vma*>(*it) == a)
span.replace(b->vma_end, a->vma_end-b->vma_end, 0); // XXX(austin) replace should take iterators to represent the
// span so we don't have to find the keys all over again.
span.replace(it.key(), it.span(), b);
}
#endif #endif
return true; return true;
} }
...@@ -277,28 +283,40 @@ vmap::copy(int share) ...@@ -277,28 +283,40 @@ vmap::copy(int share)
vmap *nm = new vmap(); vmap *nm = new vmap();
#if VM_RADIX #if VM_RADIX
void *last = 0; radix::iterator next_it;
for (auto it = vmas.begin(); it != vmas.end(); it = next_it, it.skip_nulls()) {
next_it = it.next_change();
u64 range_start = it.key();
u64 range_end = next_it.key();
vma *e = static_cast<vma*>(*it);
#endif
#if 0
} // Ugh. Un-confuse IDE indentation.
#endif #endif
#if VM_CRANGE
for (auto r: vmas) { for (auto r: vmas) {
#if VM_RADIX vma *e = static_cast<vma *>(r);
if (!r || r == last) u64 range_start = e->vma_start;
continue; u64 range_end = e->vma_end;
last = r;
#endif #endif
vma *e = (vma *) r; u64 range_size = range_end - range_start;
struct vma *ne; struct vma *ne;
if (share) { if (share) {
// Because of the pages array, the new vma needs to have the
// same start and end, even if that's not where it ends up in
// the index.
ne = new vma(nm, e->vma_start, e->vma_end, COW, e->n); ne = new vma(nm, e->vma_start, e->vma_end, COW, e->n);
// if the original vma wasn't COW, replace it with a COW vma // if the original vma wasn't COW, replace it with a COW vma
if (e->va_type != COW) { if (e->va_type != COW) {
vma *repl = new vma(this, e->vma_start, e->vma_end, COW, e->n); vma *repl = new vma(this, e->vma_start, e->vma_end, COW, e->n);
replace_vma(e, repl);
#if VM_RADIX #if VM_RADIX
last = repl; vmas.search_lock(range_start, range_size).replace(range_start, range_size, repl);
#elif VM_CRANGE
replace_vma(e, repl);
#endif #endif
updatepages(pml4, e->vma_start, e->vma_end, [](atomic<pme_t>* p) { updatepages(pml4, range_start, range_end, [](atomic<pme_t>* p) {
for (;;) { for (;;) {
pme_t v = p->load(); pme_t v = p->load();
if (v & PTE_LOCK) if (v & PTE_LOCK)
...@@ -314,7 +332,7 @@ vmap::copy(int share) ...@@ -314,7 +332,7 @@ vmap::copy(int share)
ne = new vma(nm, e->vma_start, e->vma_end, PRIVATE, e->n->copy()); ne = new vma(nm, e->vma_start, e->vma_end, PRIVATE, e->n->copy());
} }
auto span = nm->vmas.search_lock(ne->vma_start, ne->vma_end - ne->vma_start); auto span = nm->vmas.search_lock(range_start, range_size);
for (auto x: span) { for (auto x: span) {
#if VM_RADIX #if VM_RADIX
if (!x) if (!x)
...@@ -328,7 +346,7 @@ vmap::copy(int share) ...@@ -328,7 +346,7 @@ vmap::copy(int share)
span.replace(ne); span.replace(ne);
#endif #endif
#if VM_RADIX #if VM_RADIX
span.replace(ne->vma_start, ne->vma_end-ne->vma_start, ne); span.replace(range_start, range_size, ne);
#endif #endif
} }
...@@ -397,11 +415,9 @@ again: ...@@ -397,11 +415,9 @@ again:
// new scope to release the search lock before tlbflush // new scope to release the search lock before tlbflush
u64 len = n->npages * PGSIZE; u64 len = n->npages * PGSIZE;
auto span = vmas.search_lock(vma_start, len); auto span = vmas.search_lock(vma_start, len);
#if VM_CRANGE
// XXX handle overlaps, set replaced=true
for (auto r: span) { for (auto r: span) {
#if VM_RADIX
if (!r)
continue;
#endif
if (!fixed) if (!fixed)
goto again; goto again;
...@@ -410,8 +426,27 @@ again: ...@@ -410,8 +426,27 @@ again:
rvma, rvma->vma_start, rvma->vma_end); rvma, rvma->vma_start, rvma->vma_end);
return -1; return -1;
} }
#endif
// XXX handle overlaps, set replaced=true #if VM_RADIX
// XXX(austin) span.replace also has to do this scan. It would be
// nice if we could do just one scan.
for (auto r: span) {
if (!r)
continue;
if (!fixed)
goto again;
else {
// XXX(austin) I don't think anything prevents a page fault
// from reading the old VMA now and installing the new page
// for the old VMA after the updatepages. Certainly not
// PTE_LOCK, since we don't take that here. Why not just use
// the lock in the radix tree? (We can't do that with crange,
// though, since it can only lock complete ranges.)
replaced = true;
break;
}
}
#endif
e = new vma(this, vma_start, vma_start+len, PRIVATE, n); e = new vma(this, vma_start, vma_start+len, PRIVATE, n);
if (e == 0) { if (e == 0) {
...@@ -432,6 +467,11 @@ again: ...@@ -432,6 +467,11 @@ again:
updatepages(pml4, e->vma_start, e->vma_end, [&needtlb](atomic<pme_t> *p) { updatepages(pml4, e->vma_start, e->vma_end, [&needtlb](atomic<pme_t> *p) {
for (;;) { for (;;) {
pme_t v = p->load(); pme_t v = p->load();
// XXX(austin) Huh? Why is it okay to skip it if it's
// locked? The page fault could be faulting in a page from
// the old VMA, in which case we need to shoot it down
// (though if it's already faulting a page from the new VMA,
// we need to *not* shoot it down).
if (v & PTE_LOCK) if (v & PTE_LOCK)
continue; continue;
if (!(v & PTE_P)) if (!(v & PTE_P))
...@@ -458,9 +498,11 @@ vmap::remove(uptr vma_start, uptr len) ...@@ -458,9 +498,11 @@ vmap::remove(uptr vma_start, uptr len)
{ {
{ {
// new scope to release the search lock before tlbflush // new scope to release the search lock before tlbflush
uptr vma_end = vma_start + len;
auto span = vmas.search_lock(vma_start, len); auto span = vmas.search_lock(vma_start, len);
#if VM_CRANGE
// XXX handle partial unmap
uptr vma_end = vma_start + len;
for (auto r: span) { for (auto r: span) {
vma *rvma = (vma*) r; vma *rvma = (vma*) r;
if (rvma->vma_start < vma_start || rvma->vma_end > vma_end) { if (rvma->vma_start < vma_start || rvma->vma_end > vma_end) {
...@@ -469,13 +511,14 @@ vmap::remove(uptr vma_start, uptr len) ...@@ -469,13 +511,14 @@ vmap::remove(uptr vma_start, uptr len)
return -1; return -1;
} }
} }
#endif
// XXX handle partial unmap
#if VM_CRANGE #if VM_CRANGE
span.replace(0); span.replace(0);
#endif #endif
#if VM_RADIX #if VM_RADIX
// XXX(austin) If this could tell us that nothing was replaced, we
// could skip the updatepages.
span.replace(vma_start, len, 0); span.replace(vma_start, len, 0);
#endif #endif
} }
...@@ -518,8 +561,20 @@ vmap::pagefault_wcow(vma *m) ...@@ -518,8 +561,20 @@ vmap::pagefault_wcow(vma *m)
vma *repl = new vma(this, m->vma_start, m->vma_end, PRIVATE, nodecopy); vma *repl = new vma(this, m->vma_start, m->vma_end, PRIVATE, nodecopy);
// XXX(austin) This will cause sharing on parts of this range that
// have since been unmapped or replaced. But in our current design
// where we need a new vmnode we have to replace all instances of it
// at once or we'll end up with a complete vmnode copy for each page
// we fault on. If we replace it all at once, this will waste time
// and space copying pages that are no longer mapped, but will only
// do that once. Fixing this requires getting rid of the vmnode.
replace_vma(m, repl); replace_vma(m, repl);
updatepages(pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) { updatepages(pml4, m->vma_start, m->vma_end, [](atomic<pme_t> *p) {
// XXX(austin) In radix, this may clear PTEs belonging to other
// VMAs that have replaced sub-ranges of the faulting VMA.
// That's unfortunate but okay because we'll just bring them
// back from the pages array. Yet another consequence of having
// to do a vmnode at a time.
for (;;) { for (;;) {
pme_t v = p->load(); pme_t v = p->load();
if (v & PTE_LOCK) if (v & PTE_LOCK)
......
...@@ -12,6 +12,23 @@ namespace std { ...@@ -12,6 +12,23 @@ namespace std {
return static_cast<typename remove_reference<T>::type&&>(a); return static_cast<typename remove_reference<T>::type&&>(a);
} }
template<class T>
void
swap(T& a, T& b)
{
T tmp = move(a);
a = move(b);
b = move(tmp);
}
template<class T, size_t N>
void
swap(T (&a)[N], T (&b)[N])
{
for (size_t n = 0; n < N; n++)
swap(a[n], b[n]);
}
template<class A, class B> template<class A, class B>
struct pair { struct pair {
typedef A first_type; typedef A first_type;
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论