提交 d215683d 创建 作者: Frans Kaashoek's avatar Frans Kaashoek

Copy lastest user-level version into kernel before C++ changes

上级 b4fd7343
...@@ -8,88 +8,73 @@ ...@@ -8,88 +8,73 @@
#include "cpu.h" #include "cpu.h"
// //
// Concurrent atomic ranges operations using skip lists. Overall approach is // Concurrent atomic range operations using skip lists. An insert may split an
// that the bottom list (layer 0) contains the truth, and the index is an // existing range in several ranges. A delete may remove a sequence of ranges
// performance-accelerating hint. Inserts lazily adds a node to the index, // or split an existing range. An insert with a concurrent delete occur
// starting from layer 1, going up. Delete marks nodes atomically as deleted, // atomically, which we accomplish by taking locks on the sequence of ranges
// but lazily removes it from layers [1,n], in any order. // that are involved in the operation, compute the replacement, and atomically
// swich the locked sequence with the replacement sequence.
// //
// Searches run without locks, and ignore marked nodes (pretending that they are not // Overall implementation approach is that the bottom list (layer 0) contains
// the truth, and the index is an performance-accelerating hint. Inserts lazily
// adds a range to the index, starting from layer 1, going up. Delete marks
// the sequence atomically as deleted, but lazily removes it from layers [1,n],
// in any order.
//
// Searches run without locks, and ignore marked ranges (pretending that they are not
// part of the index). // part of the index).
// //
// Changes to the list take locks on the nodes that are involved in the change. // Changes to the bottom list acquire locks on the ranges in the effected sequence.
// Thus, if two changes are to different parts of the list, they can happen in // Thus, if two changes are to different parts of the list, they can happen in
// parallel. The invariant for deletion and insertion is that we always lock the // parallel, but operations on overlapping sequences are serialized. The
// predecessor node, to avoid races. // invariant for deletion and insertion is that we always lock the
// predecessor range, to avoid races.
// As with concurrent lists, the hard cases are: 1) a concurrent remove and // As with concurrent lists, the hard cases are: 1) a concurrent remove and
// insert, and 2) concurrent removes of two nodes in sequence. Re 1: the risk is // insert, and 2) concurrent removes of two ranges in sequence. Re 1: the risk is
// that the insert updates the next pointer of a node to be removed. // that the insert updates the next pointer of a range to be removed.
// Locking the predecessors avoids these races. // Locking the predecessors avoids these races.
// //
// A tricky case is that when we found the nodes corresponding to a range, we // A detail is that when we found the ranges corresponding to a sequence, we
// must lock the predecessor first and then the first node of the range. But, // must lock the predecessor first and then the first range of the sequence. But,
// while waiting for the lock for the predecessor, another core may be deleting // while waiting for the lock for the predecessor, another core may be deleting
// the predecessor. To handle this race, the core that deletes a node marks a // the predecessor. To handle this race, the core that deletes a range marks a
// node for deletion by marking its next pointer. // range for deletion by marking its next pointer.
// //
// After a delete of a range, a node is not put on the delayed-free list until // After a delete of a range, the range is not put on the delayed-free list
// it has been removed from the index (i.e., curlevel reaches 0), which maybe many // until it has been removed from the index (i.e., curlevel reaches 0), which
// epochs later. // maybe many epochs later.
// //
#define CRANGE_CHECKING 0
#define MINNLEVEL 10
#define MARKED(x) (((uptr) (x)) & 0x1) #define MARKED(x) (((uptr) (x)) & 0x1)
#define RANGE_MARK(x) ((struct clist_range *) (((uptr) (x)) | 0x1)) #define MARK(x) ((struct range *) (((uptr) (x)) | 0x1))
#define RANGE_WOMARK(x) ((struct clist_range *) (((uptr) (x)) & ~0x1)) #define WOMARK(x) ((struct range *) (((uptr) (x)) & ~0x1))
enum { crange_debug = 0 }; enum { crange_debug = 0 };
enum { crange_checking = 0 };
struct crange { struct crange {
int nlevel; // number of levels in the crange skip list int nlevel; // number of levels in the crange skip list
struct clist_range crange_head; // a crange skip list starts with a sentinel node (key 0, sz 0) struct range crange_head; // a crange skip list starts with a sentinel range (key 0, sz 0)
} cr; };
struct crange* void crange_check(struct crange *cr, struct range *absent);
crange_alloc(int nlevel)
{
struct crange *cr;
assert(kmalign((void **) &cr, CACHELINE, sizeof(struct crange)) == 0);
cr->nlevel = (nlevel < MINNLEVEL) ? MINNLEVEL : nlevel; // XXX
cr->crange_head.cr = cr;
cr->crange_head.key = 0;
cr->crange_head.size = 0;
assert(kmalign((void **) &cr->crange_head.lock,
CACHELINE, sizeof(struct spinlock)) == 0);
initlock(cr->crange_head.lock, "head lock", LOCKSTAT_CRANGE);
cr->crange_head.next = kmalloc(sizeof(cr->crange_head.next[0]) * nlevel);
for (int l = 0; l < nlevel; l++) cr->crange_head.next[l] = 0;
if (crange_debug) cprintf("crange_alloc: return 0x%lx\n", (u64) cr);
return cr;
}
static void clist_range_free(void *p); //
// Methods for ranges
//
void // does [k1, sz1) intersect with [k2, sz2)?
crange_free(struct crange *cr) static int
range_intersect(u64 k1, u64 sz1, u64 k2, u64 sz2)
{ {
assert(cr); if (k1+sz1 <= k2) return 0; // region1 < region2
if (crange_debug) cprintf("crange_free: 0x%lx\n", (u64) cr); else if (k1 >= k2+sz2) return 0; // region1 > region2
struct clist_range *e, *n; else return 1;
for (e = RANGE_WOMARK(cr->crange_head.next[0]); e; e = n) {
n = RANGE_WOMARK(e->next[0]);
clist_range_free(e);
}
kmfree(cr->crange_head.next);
destroylock(cr->crange_head.lock);
kmalignfree(cr->crange_head.lock);
kmalignfree(cr);
} }
// draw nlevel in [1, nlevel] // draw nlevel in [1, nlevel]
static int static int
crange_draw_nlevel(int nlevel) range_draw_nlevel(int nlevel)
{ {
int l; int l;
for (l = 0; l < nlevel-1; l++) { for (l = 0; l < nlevel-1; l++) {
...@@ -100,104 +85,51 @@ crange_draw_nlevel(int nlevel) ...@@ -100,104 +85,51 @@ crange_draw_nlevel(int nlevel)
} }
void void
crange_print_elem(struct clist_range *e, int l) range_print(struct range *e, int l)
{ {
cprintf ("0x%lx-0x%lx(%lu) 0x%lx, c %d, t %d, n 0x%lx m 0x%lx\n", e->key, e->key+e->size, e->size, (long) e->value, e->curlevel, e->nlevel, (long) e->next, MARKED(e->next[l])); cprintf ("0x%lx-0x%lx(%lu) 0x%lx, c %d, t %d, n 0x%lx m 0x%lx\n", e->key, e->key+e->size, e->size, (long) e->value, e->curlevel, e->nlevel, (long) e->next, MARKED(e->next[l]));
} }
void
crange_print(struct crange *cr, int full)
{
struct clist_range *e;
for (int l = 0; l < cr->nlevel; l++) {
int c = 0;
cprintf("crange %d: ", l);
for (e = cr->crange_head.next[l]; e; e = RANGE_WOMARK(e->next[l])) {
c++;
if (full) crange_print_elem(e, l);
}
cprintf(" cnt %d \n", c);
}
}
void
crange_check(struct crange *cr, int lockcheck, struct clist_range *absent)
{
if (!CRANGE_CHECKING)
return;
int t = mycpu()->id;
struct clist_range *e, *s, *p;
for (int l = 0; l < cr->nlevel; l++) {
p = &cr->crange_head;
for (e = cr->crange_head.next[l]; e; e = s) {
assert(e->curlevel < cr->nlevel);
if (l > 0 && e->next[l] != 0 && !MARKED(e->next[l])) {
struct clist_range *n;
// look for e level down
for (n = RANGE_WOMARK(p->next[l-1]); n && n != e; n = RANGE_WOMARK(n->next[l-1]))
;
if (!MARKED(e->next[l]) && n != e) {
cprintf("%d: check level %d failed %lu(%lu); in high level but not low?\n", t, l, e->key,
e->size);
crange_print(cr, 1);
assert(0);
}
}
if (l < e->curlevel && e->curlevel < e->nlevel-1) {
struct clist_range *n;
// look for e one level up
for (n = RANGE_WOMARK(cr->crange_head.next[l+1]); n && n != e; n = RANGE_WOMARK(n->next[l+1]))
;
if (n != e && e->curlevel > l) {
// cprintf("%d: check level %d %u(%u) failed; not in level %d\n", t, l, e->key, e->size, e->curlevel);
// crange_print(1);
// assert(0);
}
}
s = RANGE_WOMARK(e->next[l]);
assert(s != e);
if (!MARKED(e->next[l]) && s && (e->key + e->size > s->key)) {
if (crange_debug) cprintf("%d: e(%lu,%lu) overlaps with s(%lu,%lu)\n", t, e->key, e->size, s->key, e->size);
crange_print(cr, 1);
assert(0);
}
if (!MARKED(e->next[l])) p = e;
}
}
}
static void static void
clist_range_free(void *p) range_free(void *p)
{ {
struct clist_range *e = (struct clist_range *) p; struct range *e = (struct range *) p;
if (crange_debug) if (crange_debug)
cprintf("%d: clist_range_free: 0x%lx 0x%lx-0x%lx(%ld)\n", myproc()->cpuid, (u64) e, e->key, e->key+e->size, e->size); cprintf("%d: range_free: 0x%lx 0x%lx-0x%lx(%ld)\n", myproc()->cpuid, (u64) e, e->key, e->key+e->size, e->size);
crange_check(e->cr, 0, p); crange_check(e->cr, e);
assert(e->curlevel == -1); assert(e->curlevel == -1);
for (int l = 0; l < e->nlevel; l++) { for (int l = 0; l < e->nlevel; l++) {
e->next[l] = (struct clist_range *) 0xDEADBEEF; e->next[l] = (struct range *) 0xDEADBEEF;
} }
destroylock(e->lock);
kmalignfree(e->lock); kmalignfree(e->lock);
kmfree(e->next); kmfree(e->next);
kmalignfree(e); kmalignfree(e);
} }
static void static void
crange_free_delayed(struct clist_range *e) range_free_delayed(struct range *e)
{ {
if (crange_debug) if (crange_debug)
cprintf("%d: crange_free_delayed: 0x%lx 0x%lx-0x%lx(%lu) %lu\n", myproc()->pid, (long) e, e->key, e->key + (e)->size, e->size, myproc()->epoch); cprintf("%d: range_free_delayed: 0x%lx 0x%lx-0x%lx(%lu) %lu\n", myproc()->pid, (long) e, e->key, e->key + (e)->size, e->size, myproc()->epoch);
crange_check(e->cr, 0, e); crange_check(e->cr, e);
assert(e->curlevel == -1); assert(e->curlevel == -1);
gc_delayed(e, clist_range_free); gc_delayed(e, range_free);
}
static void
range_dec_ref(struct range *e)
{
int n = __sync_fetch_and_sub(&(e->curlevel), 1);
if (n == 0) { // now removed from all levels.
range_free_delayed(e);
}
} }
static struct clist_range * static struct range *
crange_new(struct crange *cr, u64 k, u64 sz, void *v, struct clist_range *n) range_alloc(struct crange *cr, u64 k, u64 sz, void *v, struct range *n)
{ {
struct clist_range *r; struct range *r;
kmalign((void **) &r, CACHELINE, sizeof(struct clist_range)); kmalign((void **) &r, CACHELINE, sizeof(struct range));
assert(r); assert(r);
r->key = k; r->key = k;
r->size = sz; r->size = sz;
...@@ -205,8 +137,8 @@ crange_new(struct crange *cr, u64 k, u64 sz, void *v, struct clist_range *n) ...@@ -205,8 +137,8 @@ crange_new(struct crange *cr, u64 k, u64 sz, void *v, struct clist_range *n)
assert(r->size > 0); assert(r->size > 0);
assert(cr->nlevel > 0); assert(cr->nlevel > 0);
r->curlevel = 0; r->curlevel = 0;
r->nlevel = crange_draw_nlevel(cr->nlevel); r->nlevel = range_draw_nlevel(cr->nlevel);
r->next = kmalloc(sizeof(sizeof(r->next[0])) * r->nlevel); // cache align? r->next = (struct range**) kmalloc(sizeof(sizeof(r->next[0])) * r->nlevel); // cache align?
assert(r->next); assert(r->next);
r->next[0] = n; r->next[0] = n;
for (int l = 1; l < r->nlevel; l++) r->next[l] = 0; for (int l = 1; l < r->nlevel; l++) r->next[l] = 0;
...@@ -217,39 +149,34 @@ crange_new(struct crange *cr, u64 k, u64 sz, void *v, struct clist_range *n) ...@@ -217,39 +149,34 @@ crange_new(struct crange *cr, u64 k, u64 sz, void *v, struct clist_range *n)
return r; return r;
} }
static struct clist_range * //
crange_insert(struct clist_range *head, struct clist_range *new) // Methods on a sequence (i.e., ordered list) of ranges
//
static struct range *
range_insert(struct range *head, struct range *r)
{ {
struct clist_range *n, *p; struct range *n, *p;
p = NULL; p = NULL;
for (n = head; n != 0; p = n, n = n->next[0]) { for (n = head; n != 0; p = n, n = n->next[0]) {
assert(!MARKED(n)); assert(!MARKED(n));
if (new->key < n->key) { if (r->key < n->key) {
break; break;
} }
} }
if (n == head) { if (n == head) {
new->next[0] = head; r->next[0] = head;
head = new; head = r;
} else { } else {
p->next[0] = new; p->next[0] = r;
new->next[0] = n; r->next[0] = n;
} }
return head; return head;
} }
// does [k1, sz1) intersect with [k2, sz2)?
static int
crange_intersect(u64 k1, u64 sz1, u64 k2, u64 sz2)
{
if (k1+sz1 <= k2) return 0; // region1 < region2
else if (k1 >= k2+sz2) return 0; // region1 > region2
else return 1;
}
// lock p if p->next == e and p isn't marked for deletion. if not, return failure. // lock p if p->next == e and p isn't marked for deletion. if not, return failure.
static int static int
crange_lockpred(struct clist_range *p, struct clist_range *e) range_lock_pred(struct range *p, struct range *e)
{ {
assert(!MARKED(e)); assert(!MARKED(e));
acquire(p->lock); acquire(p->lock);
...@@ -257,55 +184,196 @@ crange_lockpred(struct clist_range *p, struct clist_range *e) ...@@ -257,55 +184,196 @@ crange_lockpred(struct clist_range *p, struct clist_range *e)
return 1; return 1;
} }
release(p->lock); release(p->lock);
// cprintf("%d: crange_lockpred: retry %u\n", mycpu()->id, p->key); // cprintf("%d: range_lock_pred: retry %u\n", mycpu()->id, p->key);
return 0; return 0;
} }
// Mark nodes f till s for deletion from top-level down through level l // Mark ranges f till s for deletion from top-level down through level 0.
// XXX does it matter to top down? // It must be top-down to ensure that del_index/add_index don't race on
// curlevel. If bottom-up, del_index may delete a node at a low level,
// causing curlevel to drop below nlevel, and causing add_index to add the
// node back on level on which it already has been inserted (because it hasn't
// been marked deleted yet at that level).
static void static void
crange_mark(struct clist_range *f, struct clist_range *s, int level) range_mark(struct range *f, struct range *s)
{ {
struct clist_range *e; struct range *e;
for (e = f; e && e != s; e = RANGE_WOMARK(e->next[0])) { for (e = f; e && e != s; e = WOMARK(e->next[0])) {
assert(e); assert(e);
for (int l = e->nlevel-1; l >= level; l--) { for (int l = e->nlevel-1; l >= 0; l--) {
assert(l < e->nlevel);
(void) __sync_fetch_and_or(&(e->next[l]), 0x1); (void) __sync_fetch_and_or(&(e->next[l]), 0x1);
} }
} }
} }
// Unlock nodes f through l // Unlock ranges f through l
static void static void
crange_unlockn(struct clist_range *f, struct clist_range *l) range_unlockn(struct range *f, struct range *l)
{ {
struct clist_range *e; struct range *e;
for (e = f; e != l; e = RANGE_WOMARK(e->next[0])) { for (e = f; e != l; e = WOMARK(e->next[0])) {
assert(e); assert(e);
release(e->lock); release(e->lock);
} }
if (l) release(e->lock); if (l) release(e->lock);
} }
// Delay free nodes f through l // Delay free ranges f through l
static void static void
crange_freen(struct clist_range *f, struct clist_range *l) range_freen(struct range *f, struct range *l)
{ {
struct clist_range *e; struct range *e;
for (e = f; e != l; e = RANGE_WOMARK(e->next[0])) { for (e = f; e != l; e = WOMARK(e->next[0])) {
assert(e); assert(e);
assert(e->curlevel >= 0); assert(e->curlevel >= 0);
int n = __sync_fetch_and_sub(&(e->curlevel), 1); range_dec_ref(e);
if (n == 0) { // now removed from all levels.
crange_free_delayed(e);
}
} }
if (l) { if (l) {
assert(e->curlevel >= 0); assert(e->curlevel >= 0);
int n = __sync_fetch_and_sub(&(e->curlevel), 1); range_dec_ref(e);
if (n == 0) { // now removed from all levels. }
crange_free_delayed(e); }
// Compute the sequence that will replace the to-be deleted sequence. Make copies to create
// the new ranges, because readers may running through the list and looking at the old ranges.
// If the whole sequence is replaced, it will return s.
static struct range *
crange_replace(u64 k, u64 sz, void *v, struct range *f, struct range *l,
struct range *s)
{
struct range *r;
if (f == l) { // the first range covers the range to be deleted
if (k <= f->key && f->key + f->size <= k + sz) { // range sequence covers the first range
r = s;
} else {
if (f->key < k && k+sz < f->key + f->size) { // split range?
struct range *right = range_alloc(f->cr, k+sz, f->key+f->size-k-sz, v, s);
struct range *left = range_alloc(f->cr, f->key, k-f->key, v, right);
r = left;
} else if (k <= f->key) { // cut front?
assert(k+sz <= f->key + f->size);
r = range_alloc(f->cr, k+sz, f->key + f->size - k - sz, v, f->next[0]);
} else { // cut end
assert(k > f->key);
r = range_alloc(f->cr, f->key, k - f->key, v, f->next[0]);
}
}
} else if (k <= f->key && k + sz >= l->key + l->size) { // delete complete range?
r = s;
} else { // first range covers part and last range other part?
struct range *left;
struct range *right;
// cprintf("f 0x%lx [%d, %d) l 0x%lx [%d, %d)\n", (long) f, f->key, f->key+f->size, (long) l, l->key, l->key+l->size);
if (k <= f->key && k + sz >= f->key + f->size) { // delete first range?
left = NULL;
} else {
assert(k > f->key);
left = range_alloc(f->cr, f->key, k - f->key, v, 0);
}
if (k + sz >= l->key + l->size) { // delete last range?
right = NULL;
} else {
assert(k+sz > l->key);
assert(l->key + l->size >= k + sz);
right = range_alloc(f->cr, k+sz, l->key+l->size - k - sz, v, s);
}
r = left ? left : right;
if (left) left->next[0] = right ? right : s;
}
return r;
}
//
// Methods on a crange
//
void
crange_print(struct crange *cr, int full)
{
struct range *e;
for (int l = 0; l < cr->nlevel; l++) {
int c = 0;
cprintf("crange %d: ", l);
for (e = cr->crange_head.next[l]; e; e = WOMARK(e->next[l])) {
c++;
if (full) range_print(e, l);
}
cprintf(" cnt %d \n", c);
}
}
struct crange*
crange_alloc(int nlevel)
{
struct crange *cr;
assert(kmalign((void **) &cr, CACHELINE, sizeof(struct crange)) == 0);
assert(nlevel >= 0);
cr->nlevel = nlevel;
cr->crange_head.cr = cr;
cr->crange_head.key = 0;
cr->crange_head.size = 0;
assert(kmalign((void **) &cr->crange_head.lock,
CACHELINE, sizeof(struct spinlock)) == 0);
initlock(cr->crange_head.lock, "head lock", LOCKSTAT_CRANGE);
cr->crange_head.next = (struct range **) kmalloc(sizeof(cr->crange_head.next[0]) * nlevel);
for (int l = 0; l < nlevel; l++) cr->crange_head.next[l] = 0;
if (crange_debug) cprintf("crange_alloc: return 0x%lx\n", (u64) cr);
return cr;
}
void
crange_free(struct crange *cr)
{
assert(cr);
if (crange_debug) cprintf("crange_free: 0x%lx\n", (u64) cr);
struct range *e, *n;
for (e = WOMARK(cr->crange_head.next[0]); e; e = n) {
n = WOMARK(e->next[0]);
range_free(e);
}
kmfree(cr->crange_head.next);
kmalignfree(cr->crange_head.lock);
kmalignfree(cr);
}
// Check some invariants, ignoring marked nodes.
void
crange_check(struct crange *cr, struct range *absent)
{
if (!crange_checking)
return;
int t = mycpu()->id;
struct range *e, *s;
for (int l = 0; l < cr->nlevel; l++) {
for (e = cr->crange_head.next[l]; e; e = s) {
assert(e->curlevel < cr->nlevel);
if (absent == e) {
cprintf("%d: check level failed; 0x%lx is present\n", l, (u64) absent);
assert(0);
}
// look for e level down, but only for non-marked nodes.
if (l > 0 && e->next[l] != 0 && !MARKED(e->next[l])) {
struct range *n;
for (n = WOMARK(cr->crange_head.next[l-1]); n && n != e; n = WOMARK(n->next[l-1]))
;
__sync_synchronize();
// if e is marked now, skip the check (the memory barrier ensures that we reread it
// from memory (and not from a register)
if (!MARKED(e->next[l]) && n != e) {
cprintf("%d: check level %d failed 0x%lx-0x%lx(%lu) m %lu c %d t %d; in high level but not low\n", t, l, e->key, e->key+e->size, e->size, MARKED(e->next[l]), e->curlevel, e->nlevel);
crange_print(cr, 1);
assert(0);
}
}
// check if succ range is after n
s = WOMARK(e->next[l]);
assert(s != e);
if (!MARKED(e->next[l]) && s && (e->key + e->size > s->key)) {
if (crange_debug) cprintf("%d: e(%lu,%lu) overlaps with s(%lu,%lu)\n", t, e->key, e->size, s->key, e->size);
crange_print(cr, 1);
assert(0);
}
} }
} }
} }
...@@ -314,61 +382,60 @@ crange_freen(struct clist_range *f, struct clist_range *l) ...@@ -314,61 +382,60 @@ crange_freen(struct clist_range *f, struct clist_range *l)
// Returns 0, if marked but on level 0. Returns -1 if remove fails. // Returns 0, if marked but on level 0. Returns -1 if remove fails.
// Returns 1 on success. Tricky because of races between add and del. // Returns 1 on success. Tricky because of races between add and del.
static int static int
crange_del_index(struct crange *cr, struct clist_range *p0, struct clist_range **e, int l) crange_del_index(struct crange *cr, struct range *p0, struct range **e, int l)
{ {
int r = 1; int r = 1;
assert(l < (*e)->nlevel); assert(l < (*e)->nlevel);
if (!MARKED((*e)->next[l])) // don't remove unmarked nodes from index if (!MARKED((*e)->next[l])) // don't remove unmarked ranges from index
return r; return r;
if (l == 0) return 0; // but not on level 0; they are locked when removed if (l == 0) return 0; // but not on level 0; they are locked when removed
// crange_check(0, NULL); // crange_check(cr, NULL);
while (*e && MARKED((*e)->next[l])) { while (*e && MARKED((*e)->next[l])) {
#if 0 #if 0
if (l != (*e)->curlevel) { if (l != (*e)->curlevel) {
// node is still in the index one level up, back out. we want to remove it first // range is still in the index one level up, back out. we want to remove it first
// at higher levels so that we ensure the invariant nodes are removed top down. // at higher levels so that we ensure the invariant ranges are removed top down.
// cprintf("%d: crange_del_index: retry del %d %u(%u) on level %d\n", mycpu()->id, l, (*e)->key, (*e)->size, (*e)->curlevel); // cprintf("%d: crange_del_index: retry del %d %u(%u) on level %d\n", mycpu()->id, l, (*e)->key, (*e)->size, (*e)->curlevel);
r = -1; r = -1;
goto done; goto done;
} }
#endif #endif
int cas = __sync_bool_compare_and_swap(&(p0->next[l]), *e, RANGE_WOMARK((*e)->next[l])); int cas = __sync_bool_compare_and_swap(&(p0->next[l]), *e, WOMARK((*e)->next[l]));
if (cas) { if (cas) {
assert((*e)->curlevel >= 0); assert((*e)->curlevel >= 0);
int n = __sync_fetch_and_sub(&((*e)->curlevel), 1); range_dec_ref(*e);
if (n == 0) { // now removed from all levels. *e = WOMARK((*e)->next[l]);
crange_free_delayed(*e);
}
*e = RANGE_WOMARK((*e)->next[l]);
} else { } else {
// cprintf("%d: crange_del_index: retry del %u(%u)\n", mycpu()->id, (*e)->key, (*e)->key + (*e)->size); // cprintf("%d: crange_del_index: retry del %u(%u)\n", mycpu()->id, (*e)->key, (*e)->key + (*e)->size);
r = -1; r = -1;
//INCRETRY;
goto done; goto done;
} }
} }
done: done:
crange_check(cr, 0, NULL); crange_check(cr, NULL);
return r; return r;
} }
// Insert e into index one level up, between p and s, if e hasn't been inserted // Insert e into index one level up, between p and s, if e hasn't been inserted
// yet on that level. // yet on that level.
static void static void
crange_add_index(struct crange *cr, int l, struct clist_range *e, struct clist_range *p1, struct clist_range *s1) crange_add_index(struct crange *cr, int l, struct range *e, struct range *p1, struct range *s1)
{ {
if (l >= e->nlevel-1) return; if (l >= e->nlevel-1) return;
if (MARKED(e->next[l+1])) return; if (MARKED(e->next[l+1])) return;
// crange_check(0, NULL); // crange_check(cr, NULL);
if (__sync_bool_compare_and_swap(&e->curlevel, l, l+1)) { if (__sync_bool_compare_and_swap(&e->curlevel, l, l+1)) {
assert(e->curlevel < e->nlevel); assert(e->curlevel < e->nlevel);
// this is the core inserting at level l+1, but some core may be deleting // this is the core inserting at level l+1, but some core may be deleting
struct clist_range *s = RANGE_WOMARK(s1); struct range *s = WOMARK(s1);
do { do {
struct clist_range *n = e->next[l+1]; // Null and perhaps marked struct range *n = e->next[l+1]; // Null and perhaps marked
if (MARKED(n)) { if (MARKED(n)) {
// this node has been deleted, don't insert into index. // this range has been deleted, don't insert into index.
// undo increment of cur->level. // undo increment of cur->level.
__sync_fetch_and_sub(&(e->curlevel), 1); __sync_fetch_and_sub(&(e->curlevel), 1);
//INCRETRY;
goto done; goto done;
} }
assert (n == 0); assert (n == 0);
...@@ -377,25 +444,26 @@ crange_add_index(struct crange *cr, int l, struct clist_range *e, struct clist_r ...@@ -377,25 +444,26 @@ crange_add_index(struct crange *cr, int l, struct clist_range *e, struct clist_r
(void) __sync_fetch_and_and(&(e->next[l+1]), 0x1); // failed, keep mark bit (void) __sync_fetch_and_and(&(e->next[l+1]), 0x1); // failed, keep mark bit
__sync_fetch_and_sub(&(e->curlevel), 1); __sync_fetch_and_sub(&(e->curlevel), 1);
// cprintf("%d: crange_add_index: retry add level %d %u(%u)\n", mycpu()->id, l+1, e->key, e->key+e->size); // cprintf("%d: crange_add_index: retry add level %d %u(%u)\n", mycpu()->id, l+1, e->key, e->key+e->size);
//INCRETRY;
} }
} }
done: done:
crange_check(cr, 0, NULL); crange_check(cr, NULL);
} }
// Given the node that starts the range, find all other nodes part of range and lock them, // Given the range that starts the sequence, find all other ranges part of sequence and lock them,
// if l == 0 // if l == 0
static int static int
crange_lock_range(u64 k, u64 sz, int l, struct clist_range **er, struct clist_range **pr, crange_lock_range(u64 k, u64 sz, int l, struct range **er, struct range **pr,
struct clist_range **fr, struct clist_range **lr, struct clist_range **sr) struct range **fr, struct range **lr, struct range **sr)
{ {
struct clist_range *e = *er; struct range *e = *er;
assert(*pr != e); assert(*pr != e);
*fr = e; *fr = e;
*lr = e; *lr = e;
if (l == 0) { if (l == 0) {
// lock p, if still pointing to e (at the bottom level) // lock p, if still pointing to e (at the bottom level)
if (!crange_lockpred(*pr, e)) if (!range_lock_pred(*pr, e))
return 0; return 0;
// locked p and e; we are in business // locked p and e; we are in business
} }
...@@ -404,46 +472,47 @@ crange_lock_range(u64 k, u64 sz, int l, struct clist_range **er, struct clist_ra ...@@ -404,46 +472,47 @@ crange_lock_range(u64 k, u64 sz, int l, struct clist_range **er, struct clist_ra
assert(*fr); assert(*fr);
*lr = e; *lr = e;
if (l == 0) { if (l == 0) {
acquire(e->lock); // lock all nodes in the range acquire(e->lock); // lock all ranges in the sequence
} }
e = RANGE_WOMARK(e->next[l]); e = WOMARK(e->next[l]);
} }
*sr = e; *sr = e;
*er = e; *er = e;
return 1; return 1;
} }
// finds and locks all nodes in range [k, sz). Also, returns predecessors // finds and locks all ranges in sequence [k, sz). Also, returns predecessors
// and successors. Locks pred and nodes in bottom list. If crange_lockpred() fails, we search // and successors. Locks pred and ranges in bottom list. If range_lock_pred()
// again. // fails, search again.
static int static int
crange_find_and_lock(struct crange *cr, u64 k, u64 sz, struct clist_range **p0, crange_find_and_lock(struct crange *cr, u64 k, u64 sz, struct range **p0,
struct clist_range **f0, struct clist_range **l0, struct clist_range **s0) struct range **f0, struct range **l0, struct range **s0)
{ {
// need only current and last; struct range *p1, *s1;
struct clist_range *p1, *s1; struct range *e;
struct clist_range *e; //read_counters(myproc()->cpuid, 0);
retry: retry:
*p0 = NULL; *p0 = NULL;
*s0 = NULL; *s0 = NULL;
for (int l = cr->nlevel-1; l >= 0; l--) { for (int l = cr->nlevel-1; l >= 0; l--) {
*f0 = NULL; *f0 = NULL;
*l0 = NULL; *l0 = NULL;
p1 = *p0; p1 = *p0; // remember last previous (p0) as the previous one level up (p1)
*p0 = (l == cr->nlevel-1) ? &cr->crange_head : p1; *p0 = (l == cr->nlevel-1) ? &cr->crange_head : p1; // set current previous
s1 = *s0; s1 = *s0;
for (e = RANGE_WOMARK((*p0)->next[l]); e; *p0 = e, e = RANGE_WOMARK(e->next[l])) { for (e = WOMARK((*p0)->next[l]); e; *p0 = e, e = WOMARK(e->next[l])) {
assert(l < e->nlevel); assert(l < e->nlevel);
int r = crange_del_index(cr, *p0, &e, l); int r = crange_del_index(cr, *p0, &e, l);
if (r == -1) goto retry; // deletion failed because some other core did it; try again if (r == -1) goto retry; // deletion failed because some other core did it; try again
if (r == 0) continue; // node was marked but we are level 0, skip it; lock holder will remove if (r == 0) continue; // range was marked but we are level 0, skip it; lock holder will remove
if (e == 0) break; // all nodes on this level were removed if (e == 0) break; // all ranges on this level were removed
if (k >= e->key+e->size) { // is e before k? if (k >= e->key+e->size) { // is e before k?
crange_add_index(cr, l, e, p1, s1); // maybe add to index crange_add_index(cr, l, e, p1, s1); // maybe add to index
continue; continue;
} }
if (crange_intersect(k, sz, e->key, e->size)) { // first node of range? if (range_intersect(k, sz, e->key, e->size)) { // first range of sequence?
if (!crange_lock_range(k, sz, l, &e, p0, f0, l0, s0)) { if (!crange_lock_range(k, sz, l, &e, p0, f0, l0, s0)) {
// INCRETRY;
goto retry; goto retry;
} }
} else { // not present on this level; e succeeds [k, sz) } else { // not present on this level; e succeeds [k, sz)
...@@ -454,88 +523,48 @@ crange_find_and_lock(struct crange *cr, u64 k, u64 sz, struct clist_range **p0, ...@@ -454,88 +523,48 @@ crange_find_and_lock(struct crange *cr, u64 k, u64 sz, struct clist_range **p0,
} }
} }
if (*f0 == NULL) { // range isn't present, lock predecessor of key if (*f0 == NULL) { // range isn't present, lock predecessor of key
if (!crange_lockpred(*p0, *s0)) goto retry; if (!range_lock_pred(*p0, *s0)) {
//INCRETRY;
goto retry;
}
} }
assert(!*l0 || !MARKED((*l0)->next[0])); //assert(!*l0 || !MARKED((*l0)->next[0]));
assert(!MARKED((*p0)->next)); //assert(!MARKED((*p0)->next));
assert(!(*p0)->next[0] || !MARKED((*p0)->next[0]->next[0])); //assert(!(*p0)->next[0] || !MARKED((*p0)->next[0]->next[0]));
// cprintf("find(%d,%d): ", k, sz); crange_print(1); //cprintf("find(%d,%d): ", k, sz); crange_print(1);
//read_counters(myproc()->cpuid, 1);
return *f0 != 0; return *f0 != 0;
} }
// Compute the sublist that will replace the to-be deleted range. Make copies to create
// the new nodes, because readers may running through the list and looking at the old nodes.
// If the whole list is replaced, it will return s.
static struct clist_range *
crange_replace(u64 k, u64 sz, void *v, struct clist_range *f, struct clist_range *l,
struct clist_range *s)
{
struct clist_range *r;
if (f == l) { // the first node covers range to be deleted
if (k <= f->key && f->key + f->size <= k + sz) { // range covers the first node
r = s;
} else {
if (f->key < k && k+sz < f->key + f->size) { // split node?
struct clist_range *right = crange_new(f->cr, k+sz, f->key+f->size-k-sz, v, s);
struct clist_range *left = crange_new(f->cr, f->key, k-f->key, v, right);
r = left;
} else if (k <= f->key) { // cut front?
assert(k+sz <= f->key + f->size);
r = crange_new(f->cr, k+sz, f->key + f->size - k - sz, v, f->next[0]);
} else { // cut end
assert(k > f->key);
r = crange_new(f->cr, f->key, k - f->key, v, f->next[0]);
}
}
} else if (k <= f->key && k + sz >= l->key + l->size) { // delete complete range?
r = s;
} else { // first node covers part and last node other part?
struct clist_range *left;
struct clist_range *right;
// cprintf("f 0x%lx [%d, %d) l 0x%lx [%d, %d)\n", (long) f, f->key, f->key+f->size, (long) l, l->key, l->key+l->size);
if (k <= f->key && k + sz >= f->key + f->size) { // delete first node?
left = NULL;
} else {
assert(k > f->key);
left = crange_new(f->cr, f->key, k - f->key, v, 0);
}
if (k + sz >= l->key + l->size) { // delete last node?
right = NULL;
} else {
assert(k+sz > l->key);
assert(l->key + l->size >= k + sz);
right = crange_new(f->cr, k+sz, l->key+l->size - k - sz, v, s);
}
r = left ? left : right;
if (left) left->next[0] = right ? right : s;
}
return r;
}
// Search through the crange skip list for a node that intersects with [k, sz) and return that node. // Search through the crange skip list for a range that intersects with [k, sz)
// Pretend that marked nodes don't exist. // return that range. Pretend that marked ranges don't exist.
struct clist_range* struct range*
crange_search(struct crange *cr, u64 k, u64 sz) crange_search(struct crange *cr, u64 k, u64 sz, int mod)
{ {
struct clist_range *p, *e, *r; struct range *p, *e, *r;
int n = (mod) ? range_draw_nlevel(cr->nlevel) : 0;
gc_begin_epoch(); gc_begin_epoch();
//read_counters(myproc()->cpuid, 0);
if (crange_debug) cprintf("crange_search: 0x%lx 0x%lx\n", (u64) cr, k); if (crange_debug) cprintf("crange_search: 0x%lx 0x%lx\n", (u64) cr, k);
r = NULL; r = NULL;
p = &cr->crange_head; p = &cr->crange_head;
for (int l = cr->nlevel-1; l >= 0; l--) { for (int l = cr->nlevel-1; l >= 0; l--) {
for (e = RANGE_WOMARK(p->next[l]); e; p = e, e = RANGE_WOMARK(e->next[l])) { for (e = WOMARK(p->next[l]); e; p = e, e = WOMARK(e->next[l])) {
if (crange_debug) if (crange_debug)
cprintf("level %d: 0x%lx 0x%lx-%lx(%lu) 0x%lx-0x%lx(%lu)\n", l, (u64) p, p->key, p->key+p->size, p->size, e->key, e->key+e->size, e->size); cprintf("level %d: 0x%lx 0x%lx-%lx(%lu) 0x%lx-0x%lx(%lu)\n", l, (u64) p, p->key, p->key+p->size, p->size, e->key, e->key+e->size, e->size);
// skip all marked nodes, but don't update p because // skip all marked ranges, but don't update p because
// we don't want to descend on a marked node down. // we don't want to descend on a marked range down.
while (e && MARKED(e->next[l])) { while (e && MARKED(e->next[l])) {
e = RANGE_WOMARK(e->next[l]); e = WOMARK(e->next[l]);
} }
if (!e) break; if (!e) break;
if (mod && l < n && l > 0) {
e->value = (void *) k;
}
if (k >= e->key+e->size) if (k >= e->key+e->size)
continue; continue;
if (crange_intersect(k, sz, e->key, e->size)) { if (range_intersect(k, sz, e->key, e->size)) {
r = e; r = e;
goto end; goto end;
} }
...@@ -544,20 +573,21 @@ crange_search(struct crange *cr, u64 k, u64 sz) ...@@ -544,20 +573,21 @@ crange_search(struct crange *cr, u64 k, u64 sz)
} }
} }
end: end:
//read_counters(myproc()->cpuid, 1);
gc_end_epoch(); gc_end_epoch();
// cprintf("crange_search: 0x%x return (0x%lx,0x%lx)\n", cr, r? r->key : 0, r? r->size : 0); // cprintf("crange_search: 0x%x return (0x%lx,0x%lx)\n", cr, r? r->key : 0, r? r->size : 0);
return r; return r;
} }
// delete the range [k, k+sz). We compute the replacement list and then hook it in atomically. // delete the range [k, k+sz). compute the replacement list and then hook it in atomically.
void void
crange_del(struct crange *cr, u64 k, u64 sz) crange_del(struct crange *cr, u64 k, u64 sz)
{ {
struct clist_range *prev; struct range *prev;
struct clist_range *succ; struct range *succ;
struct clist_range *first; struct range *first;
struct clist_range *last; struct range *last;
struct clist_range *repl = NULL; struct range *repl = NULL;
assert(cr); assert(cr);
gc_begin_epoch(); gc_begin_epoch();
...@@ -569,37 +599,37 @@ crange_del(struct crange *cr, u64 k, u64 sz) ...@@ -569,37 +599,37 @@ crange_del(struct crange *cr, u64 k, u64 sz)
goto done; goto done;
} }
repl = crange_replace(k, sz, NULL, first, last, succ); repl = crange_replace(k, sz, NULL, first, last, succ);
crange_mark(first, succ, 0); // mark first till s on level >= 0 range_mark(first, succ);
while (1) { while (1) {
// hook new list into bottom list; if del resulted in a new list, use that (repl), otherwise // hook new list into bottom list; if del resulted in a new list, use that (repl), otherwise
// set predecessor to successor. // set predecessor to successor.
if (__sync_bool_compare_and_swap(&(prev->next[0]), first, repl ? repl : succ)) { if (__sync_bool_compare_and_swap(&(prev->next[0]), first, repl ? repl : succ)) {
release(prev->lock); release(prev->lock);
crange_freen(first, last); // put on delayed list before unlocking range_freen(first, last); // put on delayed list before unlocking
crange_unlockn(first, last); range_unlockn(first, last);
break; break;
} }
cprintf("crange_del(%lu, %lu): prev was updated; try again\n", k, sz); cprintf("crange_del(%lu, %lu): prev was updated; try again\n", k, sz);
assert(0); assert(0);
} }
done: done:
crange_check(cr, 1, NULL); crange_check(cr, NULL);
// cprintf("%d: crange_del(0x%lx, 0x%lx):\n", mycpu()->id, k, sz); crange_print(cr, 1); // cprintf("%d: crange_del(0x%lx, 0x%lx):\n", mycpu()->id, k, sz); crange_print(cr, 1);
gc_end_epoch(); gc_end_epoch();
} }
// add the range [k, sz), which causes nodes to be deleted, if the range overlaps an // add the range [k, sz), which causes ranges to be deleted, if the range overlaps an
// existing range. we compute the replacement list and then hook it atomically. // existing range. we compute the replacement list and then hook it atomically.
void void
crange_add(struct crange *cr, u64 k, u64 sz, void *v) crange_add(struct crange *cr, u64 k, u64 sz, void *v)
{ {
struct clist_range *new; struct range *r;
struct clist_range *first; struct range *first;
struct clist_range *prev; struct range *prev;
struct clist_range *last; struct range *last;
struct clist_range *succ; struct range *succ;
struct clist_range *repl = NULL; struct range *repl = NULL;
if (crange_debug) cprintf("crange_add: 0x%lx 0x%lx-0x%lx(%lu)\n", (u64) cr, k, k+sz, sz); if (crange_debug) cprintf("crange_add: 0x%lx 0x%lx-0x%lx(%lu)\n", (u64) cr, k, k+sz, sz);
assert(cr); assert(cr);
...@@ -610,29 +640,29 @@ crange_add(struct crange *cr, u64 k, u64 sz, void *v) ...@@ -610,29 +640,29 @@ crange_add(struct crange *cr, u64 k, u64 sz, void *v)
} else { } else {
repl = succ; repl = succ;
} }
new = crange_new(cr, k, sz, v, succ); r = range_alloc(cr, k, sz, v, succ);
repl = crange_insert(repl, new); repl = range_insert(repl, r);
crange_mark(first, succ, 0); // mark first till s on all levels range_mark(first, succ);
if (prev) if (prev)
assert(!MARKED(prev->next[0])); assert(!MARKED(prev->next[0]));
if (__sync_bool_compare_and_swap(&(prev->next[0]), first ? first : succ, repl)) { if (__sync_bool_compare_and_swap(&(prev->next[0]), first ? first : succ, repl)) {
release(prev->lock); release(prev->lock);
crange_freen(first, last); // put on delayed list before unlocking range_freen(first, last); // put on delayed list before unlocking
crange_unlockn(first, last); range_unlockn(first, last);
} else { } else {
assert(0); assert(0);
} }
// cprintf("crange_add(0x%lx,0x%lx):\n", k, sz); crange_print(cr, 1); // cprintf("crange_add(0x%lx,0x%lx):\n", k, sz); crange_print(cr, 1);
crange_check(cr, 1, NULL); crange_check(cr, NULL);
gc_end_epoch(); gc_end_epoch();
} }
int int
crange_foreach(struct crange *cr, int (*cb)(struct clist_range *r, void *), void *st) crange_foreach(struct crange *cr, int (*cb)(struct range *r, void *), void *st)
{ {
struct clist_range *e; struct range *e;
assert(cr); assert(cr);
for (e = RANGE_WOMARK(cr->crange_head.next[0]); e; e = RANGE_WOMARK(e->next[0])) { for (e = WOMARK(cr->crange_head.next[0]); e; e = WOMARK(e->next[0])) {
if (!cb(e, st)) if (!cb(e, st))
return 0; return 0;
} }
......
...@@ -57,25 +57,23 @@ void consoleintr(int(*)(void)); ...@@ -57,25 +57,23 @@ void consoleintr(int(*)(void));
// crange.c // crange.c
struct clist_range { struct range {
u64 key; u64 key;
u64 size; u64 size;
void *value; void *value;
int curlevel; int curlevel; // the current levels it appears on
int nlevel; int nlevel; // the number of levels this range should appear
struct crange *cr; struct crange *cr; // the crange this range is part of
struct clist_range** next; // one next pointer per level struct range** next; // one next pointer per level
struct spinlock *lock; // on separate cache line? struct spinlock *lock; // on separate cache line?
} __mpalign__; } __mpalign__;
struct crange;
struct crange* crange_alloc(int nlevel); struct crange* crange_alloc(int nlevel);
void crange_free(struct crange *cr); void crange_free(struct crange *cr);
void crange_del(struct crange *cr, u64 k, u64 sz); void crange_del(struct crange *cr, u64 k, u64 sz);
void crange_add(struct crange *cr, u64 k, u64 sz, void *v); void crange_add(struct crange *cr, u64 k, u64 sz, void *v);
struct clist_range* crange_search(struct crange *cr, u64 k, u64 sz); struct range* crange_search(struct crange *cr, u64 k, u64 sz, int mod);
int crange_foreach(struct crange *crk, int (*f)(struct clist_range *r, void *st), void *st); int crange_foreach(struct crange *crk, int (*f)(struct range *r, void *st), void *st);
void crange_print(struct crange *cr, int); void crange_print(struct crange *cr, int);
// e1000.c // e1000.c
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论