提交 e8faaaf6 创建 作者: Nickolai Zeldovich's avatar Nickolai Zeldovich

checkpoint:

- new crange api - fix several crange bugs (see comments) - better scoped_acquire compiles but still buggy and missing tlb shootdowns
上级 2a05dc1b
#include "types.h"
#include "amd64.h"
#include "mmu.h"
#include "kernel.hh"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.hh"
#include "kernel.hh"
#include "cpu.hh"
struct spinlock tickslock __mpalign__;
......
......@@ -39,10 +39,33 @@ class scoped_acquire {
spinlock *_l;
public:
scoped_acquire(spinlock *l) : _l(l) { acquire(_l); }
~scoped_acquire() { release(_l); }
scoped_acquire(spinlock *l) : _l(0) { acquire(l); }
~scoped_acquire() { release(); }
void release() { if (_l) { ::release(_l); _l = 0; } }
void acquire(spinlock *l) { assert(!_l); acquire(_l); _l = l; }
};
namespace std {
template<class T>
struct remove_reference
{ typedef T type; };
template<class T>
struct remove_reference<T&>
{ typedef T type; };
template<class T>
struct remove_reference<T&&>
{ typedef T type; };
template<class T>
typename remove_reference<T>::type&&
move(T&& a)
{
return static_cast<typename remove_reference<T>::type&&>(a);
}
}
/* C++ runtime */
void *operator new(unsigned long nbytes);
void *operator new(unsigned long nbytes, void *buf);
......
......@@ -47,6 +47,10 @@
// until it has been removed from the index (i.e., curlevel reaches 0), which
// maybe many epochs later.
//
// A marked pointer indicates that the range containing the marked pointer
// has been deleted. This makes it possible to update next pointers in
// higher layers without locking the range.
//
enum { crange_debug = 0 };
enum { crange_checking = 0 };
......@@ -78,7 +82,8 @@ range_draw_nlevel(int nlevel)
return l+1;
}
void range::print(int l)
void
range::print(int l)
{
cprintf ("0x%lx-0x%lx(%lu) 0x%lx, c %d, t %d, n 0x%lx m %d\n",
key, key+size, size, (long) value, curlevel.load(), nlevel,
......@@ -95,9 +100,12 @@ range::~range()
}
kmalignfree(lock);
kmfree(next);
if (value)
value->do_gc();
}
void range::dec_ref(void)
void
range::dec_ref(void)
{
int n = curlevel--;
if (n == 0) { // now removed from all levels.
......@@ -108,7 +116,7 @@ void range::dec_ref(void)
}
}
range::range(crange *crarg, u64 k, u64 sz, void *v, range *n, int nl)
range::range(crange *crarg, u64 k, u64 sz, rcu_freed *v, range *n, int nl)
: rcu_freed("range_delayed")
{
dprintf("range:range:: %lu %lu %d\n", k, sz, nl);
......@@ -133,28 +141,9 @@ range::range(crange *crarg, u64 k, u64 sz, void *v, range *n, int nl)
// Methods on a sequence (i.e., ordered list) of ranges
//
static range *insert(struct range *head, struct range *r)
{
markptr<range> n, p;
p = nullptr;
for (n = head; n != 0; p = n, n = n->next[0]) {
assert(!n.mark());
if (r->key < n->key) {
break;
}
}
if (n == head) {
r->next[0] = head;
head = r;
} else {
p->next[0] = r;
r->next[0] = n;
}
return head;
}
// lock p if next == e and p isn't marked for deletion. if not, return failure.
int range::lockif(markptr<range> e)
int
range::lockif(markptr<range> e)
{
assert(!e.mark());
acquire(lock);
......@@ -172,7 +161,8 @@ int range::lockif(markptr<range> e)
// causing curlevel to drop below nlevel, and causing add_index to add the
// node back on level on which it already has been inserted (because it hasn't
// been marked deleted yet at that level).
static void mark(range *f, range *s)
static void
mark(range *f, range *s)
{
struct range *e;
for (e = f; e && e != s; e = e->next[0].ptr()) {
......@@ -182,22 +172,12 @@ static void mark(range *f, range *s)
}
}
// Unlock ranges f through l
static void unlockn(range *f, range *l)
{
struct range *e;
for (e = f; e != l; e = e->next[0].ptr()) {
assert(e);
release(e->lock);
}
if (l) release(e->lock);
}
// Delay free ranges f through l
static void freen(struct range *f, struct range *l)
static void
freen(struct range *f, struct range *l)
{
struct range *e;
for (e = f; e != l; e = e->next[0].ptr()) {
for (e = f; e && e != l; e = e->next[0].ptr()) {
assert(e);
assert(e->curlevel >= 0);
e->dec_ref();
......@@ -208,56 +188,6 @@ static void freen(struct range *f, struct range *l)
}
}
// Compute the sequence that will replace the to-be deleted sequence. Make copies to create
// the new ranges, because readers may running through the list and looking at the old ranges.
// If the whole sequence is replaced, it will return s.
static range *replace(u64 k, u64 sz, void *v, range *f, range *l, range *s)
{
range *r;
if (f == l) { // the first range covers the range to be deleted
if (k <= f->key && f->key + f->size <= k + sz) { // range sequence covers the first range
r = s;
} else {
if (f->key < k && k+sz < f->key + f->size) { // split range?
range *right = new range(f->cr, k+sz, f->key+f->size-k-sz, v, s);
range *left = new range(f->cr, f->key, k-f->key, v, right);
r = left;
} else if (k <= f->key) { // cut front?
assert(k+sz <= f->key + f->size);
assert(!f->next[0].mark());
r = new range(f->cr, k+sz, f->key + f->size - k - sz, v, f->next[0].ptr());
} else { // cut end
assert(k > f->key);
assert(!f->next[0].mark());
r = new range(f->cr, f->key, k - f->key, v, f->next[0].ptr());
}
}
} else if (k <= f->key && k + sz >= l->key + l->size) { // delete complete range?
r = s;
} else { // first range covers part and last range other part?
range *left;
range *right;
// cprintf("f 0x%lx [%d, %d) l 0x%lx [%d, %d)\n", (long) f, f->key, f->key+f->size, (long) l, l->key, l->key+l->size);
if (k <= f->key && k + sz >= f->key + f->size) { // delete first range?
left = nullptr;
} else {
assert(k > f->key);
left = new range(f->cr, f->key, k - f->key, v, 0);
}
if (k + sz >= l->key + l->size) { // delete last range?
right = nullptr;
} else {
assert(k+sz > l->key);
assert(l->key + l->size >= k + sz);
right = new range(f->cr, k+sz, l->key+l->size - k - sz, v, s);
}
r = left ? left : right;
if (left) left->next[0] = right ? right : s;
}
return r;
}
//
// Methods on a crange
//
......@@ -296,7 +226,8 @@ crange::~crange()
}
// Check some invariants, ignoring marked nodes.
void crange::check(struct range *absent)
void
crange::check(struct range *absent)
{
if (!crange_checking)
return;
......@@ -338,7 +269,8 @@ void crange::check(struct range *absent)
// Remove e from index, if marked for deletion. Returns 1 if e isn't marked.
// Returns 0, if marked but on level 0. Returns -1 if remove fails.
// Returns 1 on success. Tricky because of races between add and del.
int crange::del_index(range *p0, range **e, int l)
int
crange::del_index(range *p0, range **e, int l)
{
int r = 1;
assert(l < (*e)->nlevel);
......@@ -375,7 +307,8 @@ int crange::del_index(range *p0, range **e, int l)
// Insert e into index one level up, between p and s, if e hasn't been inserted
// yet on that level.
void crange::add_index(int l, range *e, range *p1, markptr<range> s1)
void
crange::add_index(int l, range *e, range *p1, markptr<range> s1)
{
if (l >= e->nlevel-1) return;
if (e->next[l+1].mark()) return;
......@@ -409,7 +342,8 @@ void crange::add_index(int l, range *e, range *p1, markptr<range> s1)
// Given the range that starts the sequence, find all other ranges part of sequence and lock them,
// if l == 0
int crange::lock_range(u64 k, u64 sz, int l, range **er, range **pr, range **fr, range **lr, range **sr)
static int
lock_range(u64 k, u64 sz, int l, range **er, range **pr, range **fr, range **lr, range **sr)
{
struct range *e = *er;
assert(*pr != e);
......@@ -438,7 +372,8 @@ int crange::lock_range(u64 k, u64 sz, int l, range **er, range **pr, range **fr,
// finds and locks all ranges in sequence [k, sz). Also, returns predecessors
// and successors. Locks pred and ranges in bottom list. If range_lock_pred()
// fails, search again.
int crange::find_and_lock(u64 k, u64 sz, range **p0, range **f0, range **l0, range **s0)
int
crange::find_and_lock(u64 k, u64 sz, range **p0, range **f0, range **l0, range **s0)
{
struct range *p1, *s1;
struct range *e;
......@@ -491,11 +426,12 @@ int crange::find_and_lock(u64 k, u64 sz, range **p0, range **f0, range **l0, ran
// Search through the crange skip list for a range that intersects with [k, sz)
// return that range. Pretend that marked ranges don't exist.
range* crange::search(u64 k, u64 sz, int mod)
range* crange::search(u64 k, u64 sz)
{
struct range *p, *e, *r;
int n = (mod) ? range_draw_nlevel(nlevel) : 0;
gc_begin_epoch();
scoped_gc_epoch gc;
restart:
//read_counters(myproc()->cpuid, 0);
dprintf("crange_search: 0x%lx 0x%lx\n", (u64) this, k);
r = nullptr;
......@@ -506,12 +442,14 @@ range* crange::search(u64 k, u64 sz, int mod)
// skip all marked ranges, but don't update p because
// we don't want to descend on a marked range down.
while (e && e->next[l].mark()) {
// if we are at the bottom level, and find an intersecting deleted
// range, we must restart, because our search may have gone into a
// sequence of 2+ ranges that have been replaced.
if (l == 0 && range_intersect(k, sz, e->key, e->size))
goto restart;
e = e->next[l].ptr();
}
if (!e) break;
if (mod && l < n && l > 0) {
e->value = (void *) k;
}
if (k >= e->key+e->size)
continue;
if (range_intersect(k, sz, e->key, e->size)) {
......@@ -524,83 +462,68 @@ range* crange::search(u64 k, u64 sz, int mod)
}
end:
//read_counters(myproc()->cpuid, 1);
gc_end_epoch();
dprintf("crange_search: 0x%lx return (0x%lx,0x%lx)\n", (u64) this, r? r->key : 0, r? r->size : 0);
return r;
}
// delete the range [k, k+sz). compute the replacement list and then hook it in atomically.
void crange::del(u64 k, u64 sz)
crange_locked
crange::search_lock(u64 k, u64 sz)
{
struct range *prev;
struct range *succ;
struct range *first;
struct range *last;
struct range *repl = nullptr;
assert(this);
gc_begin_epoch();
dprintf("crange_del: 0x%lx 0x%lx-0x%lx(%ld)\n", (u64) this, k, k+sz, sz);
if (!find_and_lock(k, sz, &prev, &first, &last, &succ)) { // done?
dprintf("crange_del: [0x%lx,0x%lx) not present\n", k, sz);
release(prev->lock);
goto done;
}
repl = replace(k, sz, nullptr, first, last, succ);
mark(first, succ);
while (1) {
// hook new list into bottom list; if del resulted in a new list, use that (repl), otherwise
// set predecessor to successor.
if (prev->next[0].cmpxch(first, repl ? repl : succ)) {
release(prev->lock);
freen(first, last); // put on delayed list before unlocking
unlockn(first, last);
break;
}
cprintf("crange_del(%lu, %lu): prev was updated; try again\n", k, sz);
assert(0);
}
range *prev, *first, *last, *succ;
find_and_lock(k, sz, &prev, &first, &last, &succ);
return crange_locked(this, k, sz, prev, first, last, succ);
}
done:
check(nullptr);
// cprintf("%d: crange_del(0x%lx, 0x%lx):\n", mycpu()->id, k, sz); crange_print(cr, 1);
gc_end_epoch();
crange_locked::crange_locked(crange *cr, u64 base, u64 sz, range *p, range *f, range *l, range *s)
: cr_(cr), base_(base), size_(sz), prev_(p), first_(f), last_(l), succ_(s)
{
}
crange_locked::crange_locked(crange_locked &&x)
: gc(std::move(x.gc))
{
cr_ = x.cr_;
base_ = x.base_;
size_ = x.size_;
prev_ = x.prev_;
first_ = x.first_;
last_ = x.last_;
succ_ = x.succ_;
x.cr_ = 0;
x.prev_ = 0;
x.first_ = 0;
x.last_ = 0;
x.succ_ = 0;
}
// add the range [k, sz), which causes ranges to be deleted, if the range overlaps an
// existing range. we compute the replacement list and then hook it atomically.
void crange::add(u64 k, u64 sz, void *v)
crange_locked::~crange_locked()
{
struct range *r;
struct range *first;
struct range *prev;
struct range *last;
struct range *succ;
struct range *repl = nullptr;
dprintf("crange_add: 0x%lx 0x%lx-0x%lx(%lu)\n", (u64) this, k, k+sz, sz);
assert(this);
gc_begin_epoch();
if (find_and_lock(k, sz, &prev, &first, &last, &succ)) {
dprintf("crange_add(0x%lx,0x%lx) overlaps with [0x%lx,0x%lx)\n", k, sz, first->key, first->size);
repl = replace(k, sz, v, first, last, succ);
} else {
repl = succ;
if (prev_) {
for (range *e = prev_; e && e != succ_; e = e->next[0].ptr())
release(e->lock);
}
r = new range(this, k, sz, v, succ);
repl = insert(repl, r);
mark(first, succ);
if (prev)
assert(!prev->next[0].mark());
if (prev->next[0].cmpxch(first ? first : succ, repl)) {
release(prev->lock);
freen(first, last); // put on delayed list before unlocking
unlockn(first, last);
} else {
assert(0);
}
void
crange_locked::replace(range *repl)
{
range *newlast = 0;
for (range *e = repl; e; e = e->next[0].ptr()) {
assert(e->key >= base_ && e->key + e->size <= base_ + size_);
acquire(e->lock);
newlast = e;
}
// cprintf("crange_add(0x%lx,0x%lx):\n", k, sz); crange_print(cr, 1);
check(nullptr);
gc_end_epoch();
// do compare-exchange first, and only then mark the old ranges as deleted;
// otherwise, concurrent readers may not find either old or new ranges.
assert(prev_->next[0].cmpxch(first_?:succ_, repl?:succ_));
mark(first_, succ_);
for (range *e = first_; e && e != succ_; e = e->next[0].ptr())
release(e->lock);
freen(first_, last_);
first_ = repl;
last_ = newlast;
}
......@@ -86,14 +86,14 @@ struct range : public rcu_freed {
public:
u64 key;
u64 size;
void *value;
rcu_freed *value;
atomic<int> curlevel; // the current levels it appears on
int nlevel; // the number of levels this range should appear
crange *cr; // the crange this range is part of
markptr<range>* next; // one next pointer per level
spinlock *lock; // on separate cache line?
range(crange *cr, u64 k, u64 sz, void *v, range *n, int nlevel = 0);
range(crange *cr, u64 k, u64 sz, rcu_freed *v, range *n, int nlevel = 0);
~range();
virtual void do_gc() {
delete this;
......@@ -111,28 +111,32 @@ class range_iterator {
public:
range_iterator(range *e) : _e(e) {}
range_iterator& operator++() { _e = _e->next[0].ptr(); return *this; }
range*& operator*() { return _e; }
bool operator==(const range_iterator &other) { return _e == other._e; }
bool operator!=(const range_iterator &other) { return _e != other._e; }
};
class crange_locked;
struct crange {
private:
range *crange_head; // a crange skip list starts with a sentinel range (key 0, sz 0)
public:
int nlevel; // number of levels in the crange skip list
crange(int nlevel);
~crange(void);
void del(u64 k, u64 sz);
void add(u64 k, u64 sz, void *v);
range* search(u64 k, u64 sz, int mod = 0);
void print(int);
void check(struct range *absent);
int del_index(range *p0, range **e, int l);
void add_index(int l, range *e, range *p1, markptr<range> s1);
int lock_range(u64 k, u64 sz, int l, range **er, range **pr, range **fr, range **lr, range **sr);
int find_and_lock(u64 k, u64 sz, range **p0, range **f0, range **l0, range **s0);
friend class range;
public:
int nlevel; // number of levels in the crange skip list
crange(int nlevel);
~crange(void);
range* search(u64 k, u64 sz);
crange_locked search_lock(u64 k, u64 sz);
range_iterator begin() const { return range_iterator(crange_head->next[0].ptr()); };
range_iterator end() const { return range_iterator(0); };
......@@ -149,3 +153,38 @@ end(const crange &cr)
{
return cr.end();
}
struct crange_locked {
private:
crange *cr_;
u64 base_, size_;
range *prev_, *first_, *last_, *succ_;
scoped_gc_epoch gc;
crange_locked(crange *cr, u64 base, u64 size, range *p, range *f, range *l, range *s);
friend class crange;
crange_locked(const crange_locked&) = delete;
crange_locked& operator=(const crange_locked&) = delete;
public:
crange_locked(crange_locked &&x);
~crange_locked();
range_iterator begin() const { return range_iterator(first_); };
range_iterator end() const { return range_iterator(succ_); };
void replace(range *r);
};
static inline range_iterator
begin(const crange_locked &crl)
{
return crl.begin();
}
static inline range_iterator
end(const crange_locked &crl)
{
return crl.end();
}
#include "types.h"
#include "mmu.h"
#include "kernel.hh"
#include "spinlock.h"
#include "condvar.h"
#include "queue.h"
#include "proc.hh"
#include "kernel.hh"
#include "amd64.h"
#include "stat.h"
#include "fs.h"
......
#include "types.h"
#include "kernel.hh"
#include "spinlock.h"
#include "condvar.h"
#include "kernel.hh"
#include "fs.h"
#include "file.hh"
#include "stat.h"
......
......@@ -23,8 +23,17 @@ void gc_begin_epoch();
void gc_end_epoch();
class scoped_gc_epoch {
private:
bool valid;
public:
scoped_gc_epoch() { gc_begin_epoch(); }
~scoped_gc_epoch() { gc_end_epoch(); }
scoped_gc_epoch() { valid = true; gc_begin_epoch(); }
~scoped_gc_epoch() { if (valid) gc_end_epoch(); }
scoped_gc_epoch(const scoped_gc_epoch&) = delete;
scoped_gc_epoch(scoped_gc_epoch &&other) {
valid = other.valid;
other.valid = false;
}
};
#pragma once
extern "C" {
#include "kern_c.h"
}
......
#ifdef LWIP
extern "C" {
#include "lwip/tcp_impl.h"
#include "lwip/tcpip.h"
#include "lwip/ip.h"
#include "lwip/netif.h"
#include "lwip/dhcp.h"
#include "lwip/sockets.h"
#include "netif/etharp.h"
}
#endif
#include "types.h"
#include "kernel.hh"
#include "queue.h"
......@@ -21,6 +9,16 @@ extern "C" {
#include "net.hh"
#ifdef LWIP
extern "C" {
#include "lwip/tcp_impl.h"
#include "lwip/tcpip.h"
#include "lwip/ip.h"
#include "lwip/netif.h"
#include "lwip/dhcp.h"
#include "lwip/sockets.h"
#include "netif/etharp.h"
}
err_t if_init(struct netif *netif);
void if_input(struct netif *netif, void *buf, u16 len);
#endif
......
#include "types.h"
#include "kernel.hh"
#include "spinlock.h"
#include "condvar.h"
#include "fs.h"
#include "kernel.hh"
#include "stat.h"
#include "kalloc.h"
#include "file.hh"
......
......@@ -176,11 +176,6 @@ vmap::vmap()
vmap::~vmap()
{
for (range *r: cr) {
delete (vma*) r->value;
cr.del(r->key, r->size);
}
if (kshared)
ksfree(slab_kshared, kshared);
if (pml4)
......@@ -228,7 +223,10 @@ vmap::copy(int share)
goto err;
ne->n->ref++;
nm->cr.add(ne->vma_start, ne->vma_end - ne->vma_start, (void *) ne);
auto span = nm->cr.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
for (auto x __attribute__((unused)): span)
assert(0); /* span must be empty */
span.replace(new range(&nm->cr, ne->vma_start, ne->vma_end - ne->vma_start, ne, 0));
}
if (share)
......@@ -271,11 +269,14 @@ vmap::insert(vmnode *n, uptr vma_start)
scoped_acquire sa(&lock);
u64 len = n->npages * PGSIZE;
if (lookup(vma_start, len)) {
cprintf("vmap_insert: overlap\n");
auto span = cr.search_lock(vma_start, len);
for (auto x __attribute__((unused)): span) {
cprintf("vmap::insert: overlap\n");
return -1;
}
// XXX handle overlaps
vma *e = new vma();
if (e == 0)
return -1;
......@@ -284,7 +285,10 @@ vmap::insert(vmnode *n, uptr vma_start)
e->vma_end = vma_start + len;
e->n = n;
n->ref++;
cr.add(e->vma_start, len, (void *) e);
span.replace(new range(&cr, vma_start, len, e, 0));
// XXX shootdown
return 0;
}
......@@ -293,16 +297,21 @@ vmap::remove(uptr vma_start, uptr len)
{
scoped_acquire sa(&lock);
uptr vma_end = vma_start + len;
struct range *r = cr.search(vma_start, len);
if (r == 0)
panic("no vma?");
struct vma *e = (struct vma *) r->value;
if (e->vma_start != vma_start || e->vma_end != vma_end) {
cprintf("vmap_remove: partial unmap unsupported\n");
return -1;
auto span = cr.search_lock(vma_start, len);
for (auto x: span) {
if (x->key < vma_start || x->key + x->size > vma_end) {
cprintf("vmap::remove: partial unmap not supported\n");
return -1;
}
}
cr.del(vma_start, len);
gc_delayed(e);
// XXX handle partial unmap
span.replace(0);
// XXX shootdown
return 0;
}
......@@ -311,17 +320,17 @@ vmap::remove(uptr vma_start, uptr len)
*/
vma *
vmap::pagefault_ondemand(uptr va, u32 err, vma *m)
vmap::pagefault_ondemand(uptr va, u32 err, vma *m, scoped_acquire *mlock)
{
if (m->n->allocpg() < 0)
panic("pagefault: couldn't allocate pages");
release(&m->lock);
mlock->release();
if (m->n->demand_load() < 0)
panic("pagefault: couldn't load");
m = lookup(va, 1);
if (!m)
panic("pagefault_ondemand");
acquire(&m->lock); // re-acquire lock on m
mlock->acquire(&m->lock); // re-acquire lock on m
return m;
}
......@@ -363,21 +372,19 @@ vmap::pagefault(uptr va, u32 err)
if (m == 0)
return -1;
acquire(&m->lock);
scoped_acquire mlock(&m->lock);
u64 npg = (PGROUNDDOWN(va) - m->vma_start) / PGSIZE;
if (m->n && m->n->type == ONDEMAND && m->n->page[npg] == 0)
m = pagefault_ondemand(va, err, m);
m = pagefault_ondemand(va, err, m, &mlock);
if (vm_debug)
cprintf("pagefault: err 0x%x va 0x%lx type %d ref %lu pid %d\n",
err, va, m->va_type, m->n->ref.load(), myproc()->pid);
if (m->va_type == COW && (err & FEC_WR)) {
if (pagefault_wcow(va, pte, m, npg) < 0) {
release(&m->lock);
if (pagefault_wcow(va, pte, m, npg) < 0)
return -1;
}
} else if (m->va_type == COW) {
*pte = v2p(m->n->page[npg]) | PTE_P | PTE_U | PTE_COW;
} else {
......@@ -388,7 +395,6 @@ vmap::pagefault(uptr va, u32 err)
// XXX(sbw) Why reload hardware page tables?
lcr3(v2p(pml4)); // Reload hardware page tables
release(&m->lock);
return 1;
}
......
#include "gc.hh"
#include "atomic.hh"
#include "crange.hh"
#include "cpputil.hh"
using std::atomic;
......@@ -68,6 +69,6 @@ struct vmap {
int copyout(uptr va, void *p, u64 len);
private:
vma* pagefault_ondemand(uptr va, u32 err, vma *m);
vma* pagefault_ondemand(uptr va, u32 err, vma *m, scoped_acquire *mlock);
int pagefault_wcow(uptr va, pme_t *pte, vma *m, u64 npg);
};
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论