提交 89c11d1a 创建 作者: David Benjamin's avatar David Benjamin

Drop entry_dead

Simplifies things. radix_node is not RCU-freed and remains a power of two. update_range always succeeds and doesn't need to return how far it got.
上级 d664c102
...@@ -46,6 +46,12 @@ class radix_node; ...@@ -46,6 +46,12 @@ class radix_node;
* Once a pointer is dead, it stays dead until the containing * Once a pointer is dead, it stays dead until the containing
* radix_node is deallocated. Dead pointers do not own references. * radix_node is deallocated. Dead pointers do not own references.
* *
* For now we do not implement the dead state. It is only necessary
* when collapsing an already-expanded node. It's unclear this
* optimization is very useful as it requires RCU-freeing radix_nodes,
* which makes them just over a power of 2 and inefficient to
* allocate.
*
* Races: * Races:
* *
* - If a leaf to be locked (or pushed down) gets pushed down, lock * - If a leaf to be locked (or pushed down) gets pushed down, lock
...@@ -62,8 +68,8 @@ class radix_node; ...@@ -62,8 +68,8 @@ class radix_node;
enum entry_state { enum entry_state {
entry_unlocked = 0, entry_unlocked = 0,
entry_locked = 1, entry_locked = 1,
entry_dead = 2, // entry_dead = 2,
entry_node = 3, entry_node = 2,
entry_mask = 3 entry_mask = 3
}; };
...@@ -155,14 +161,11 @@ class radix_elem : public rcu_freed { ...@@ -155,14 +161,11 @@ class radix_elem : public rcu_freed {
void incref(u64 delta = 1) { ref_ += delta; } void incref(u64 delta = 1) { ref_ += delta; }
}; };
struct radix_node : public rcu_freed { struct radix_node {
radix_ptr child[1 << bits_per_level]; radix_ptr child[1 << bits_per_level];
radix_node() : rcu_freed("radix_node") { radix_node() { }
}
~radix_node(); ~radix_node();
virtual void do_gc() { delete this; }
NEW_DELETE_OPS(radix_node) NEW_DELETE_OPS(radix_node)
}; };
......
...@@ -27,7 +27,7 @@ level_size(u32 level) ...@@ -27,7 +27,7 @@ level_size(u32 level)
static radix_entry static radix_entry
push_down(radix_entry cur, radix_ptr *ptr) push_down(radix_entry cur, radix_ptr *ptr)
{ {
while (cur.state() != entry_dead && cur.state() != entry_node) { while (cur.state() != entry_node) {
// If we're locked, just spin and try again. // If we're locked, just spin and try again.
if (cur.state() == entry_locked) { if (cur.state() == entry_locked) {
cur = ptr->load(); cur = ptr->load();
...@@ -63,34 +63,29 @@ push_down(radix_entry cur, radix_ptr *ptr) ...@@ -63,34 +63,29 @@ push_down(radix_entry cur, radix_ptr *ptr)
elem->decref(1<<bits_per_level); elem->decref(1<<bits_per_level);
} }
new_rn->do_gc(); delete new_rn;
} }
} }
return cur; return cur;
} }
// Returns the next node to be processed, whether or not it falls in // Runs CB of a set of leaves whose disjoint union is the range
// the range. Success is to return cur_start + cur_size. Otherwise we // [start, end)
// stopped early and bubble up the error.
template <class CB> template <class CB>
u64 void
update_range(radix_entry cur, radix_ptr *ptr, CB cb, update_range(radix_entry cur, radix_ptr *ptr, CB cb,
u64 cur_start, u64 cur_end, u64 cur_start, u64 cur_end,
u64 start, u64 end, u32 level = radix_levels) u64 start, u64 end, u32 level = radix_levels)
{ {
assert(level_size(level) == cur_end - cur_start); assert(level_size(level) == cur_end - cur_start);
// If ranges are disjoint, do nothing. We manage to process everyone // If ranges are disjoint, do nothing.
// for free.
if (cur_start >= end || start >= cur_end) if (cur_start >= end || start >= cur_end)
return cur_end; return;
// If our range is not strictly contained in the target, ensure we // If our range is not strictly contained in the target, ensure we
// are at a node. // are at a node.
if (start > cur_start || end < cur_end) { if (start > cur_start || end < cur_end) {
cur = push_down(cur, ptr); cur = push_down(cur, ptr);
// Failed. Next time resume at cur_start.
if (cur.state() == entry_dead)
return cur_start;
} }
if (cur.is_node()) { if (cur.is_node()) {
...@@ -104,18 +99,15 @@ update_range(radix_entry cur, radix_ptr *ptr, CB cb, ...@@ -104,18 +99,15 @@ update_range(radix_entry cur, radix_ptr *ptr, CB cb,
for (; (i < (1<<bits_per_level)) && (child_start < cur_end); for (; (i < (1<<bits_per_level)) && (child_start < cur_end);
i++, child_start += child_size) { i++, child_start += child_size) {
radix_ptr *child = &cur.node()->child[i]; radix_ptr *child = &cur.node()->child[i];
u64 ret = update_range(child->load(), child, cb, update_range(child->load(), child, cb,
child_start, child_start + child_size, child_start, child_start + child_size,
start, end, level - 1); start, end, level - 1);
if (ret != child_start + child_size) return ret;
} }
return cur_end;
} else { } else {
// If we're here, the target range must completely contain this // If we're here, the target range must completely contain this
// element. // element.
assert(start <= cur_start && cur_end <= end); assert(start <= cur_start && cur_end <= end);
// Callback returns how far it processed. cb(cur, ptr, cur_start, cur_end, level);
return cb(cur, ptr, cur_start, cur_end, level);
} }
} }
...@@ -124,10 +116,8 @@ radix_entry::release() ...@@ -124,10 +116,8 @@ radix_entry::release()
{ {
if (is_null()) return; if (is_null()) return;
if (is_node()) { if (is_node()) {
node()->do_gc(); delete node();
} else if (state() != entry_dead) { } else {
// Only decref live pointers. Dead ones are part of pages which
// were RCU-freed and no longer own references.
elem()->decref(); elem()->decref();
} }
} }
...@@ -169,8 +159,8 @@ struct entry_locker { ...@@ -169,8 +159,8 @@ struct entry_locker {
u64 end_; u64 end_;
entry_locker(u64 start, u64 end) : start_(start), end_(end) { } entry_locker(u64 start, u64 end) : start_(start), end_(end) { }
u64 operator()(radix_entry cur, radix_ptr *ptr, u64 cur_start, u64 cur_end, u32 level) const { void operator()(radix_entry cur, radix_ptr *ptr, u64 cur_start, u64 cur_end, u32 level) const {
while (cur.state() != entry_dead && cur.state() != entry_node) { while (cur.state() != entry_node) {
// Locked -> spin and try again. // Locked -> spin and try again.
if (cur.state() == entry_locked) { if (cur.state() == entry_locked) {
cur = ptr->load(); cur = ptr->load();
...@@ -183,30 +173,24 @@ struct entry_locker { ...@@ -183,30 +173,24 @@ struct entry_locker {
break; break;
} }
} }
// Someone deleted this leaf. Abort this iteration.
if (cur.state() == entry_dead)
return cur_start;
// Someone pushed down. Recurse some more. // Someone pushed down. Recurse some more.
if (cur.state() == entry_node) if (cur.state() == entry_node) {
return update_range(cur, ptr, *this, cur_start, cur_end, start_, end_, level-1); update_range(cur, ptr, *this, cur_start, cur_end, start_, end_, level-1);
// We managed to lock! } else {
assert(cur.state() == entry_locked); // We managed to lock!
return cur_end; assert(cur.state() == entry_locked);
}
} }
}; };
radix_range::radix_range(radix *r, u64 start, u64 size) radix_range::radix_range(radix *r, u64 start, u64 size)
: r_(r), start_(start), size_(size) : r_(r), start_(start), size_(size)
{ {
u64 next_start = start_;
u64 end = start_ + size_; u64 end = start_ + size_;
// Lock the range from left to right. If we hid a dead element re-load the root. // Lock the range from left to right.
while (next_start < end) { const entry_locker& cb = entry_locker(start_, end);
const entry_locker& cb = entry_locker(next_start, end); update_range(r_->root_.load(), &r_->root_, cb,
next_start = update_range(r_->root_.load(), &r_->root_, cb, 0, 1L << key_bits, start_, end);
0, 1L << key_bits, next_start, end);
assert(next_start >= start_);
}
} }
radix_range::~radix_range() radix_range::~radix_range()
...@@ -214,16 +198,12 @@ radix_range::~radix_range() ...@@ -214,16 +198,12 @@ radix_range::~radix_range()
if (!r_) if (!r_)
return; return;
u64 ret = update_range(r_->root_.load(), &r_->root_, [](radix_entry cur, radix_ptr *ptr, u64 cur_start, u64 cur_end, u32 level) -> u64 { update_range(r_->root_.load(), &r_->root_, [](radix_entry cur, radix_ptr *ptr, u64 cur_start, u64 cur_end, u32 level) {
do { do {
// It had better still be locked. // It had better still be locked.
assert(cur.state() == entry_locked); assert(cur.state() == entry_locked);
} while (!ptr->compare_exchange_weak(cur, cur.with_state(entry_unlocked))); } while (!ptr->compare_exchange_weak(cur, cur.with_state(entry_unlocked)));
return cur_end;
}, 0, 1L << key_bits, start_, start_ + size_); }, 0, 1L << key_bits, start_, start_ + size_);
// Impossible to hit entry_dead. We own the lock.
if (ret != 1L << key_bits)
panic("~radix_range");
} }
void void
...@@ -236,7 +216,7 @@ radix_range::replace(u64 start, u64 size, radix_elem *val) ...@@ -236,7 +216,7 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
assert(start >= start_); assert(start >= start_);
assert(start + size <= start_ + size_); assert(start + size <= start_ + size_);
u64 ret = update_range(r_->root_.load(), &r_->root_, [val](radix_entry cur, radix_ptr *ptr, u64 cur_start, u64 cur_end, u32 level) -> u64 { update_range(r_->root_.load(), &r_->root_, [val](radix_entry cur, radix_ptr *ptr, u64 cur_start, u64 cur_end, u32 level) {
dprintf(" -> [%lx, %lx); size = %lx\n", cur_start, cur_end, cur_end - cur_start); dprintf(" -> [%lx, %lx); size = %lx\n", cur_start, cur_end, cur_end - cur_start);
do { do {
assert(cur.state() == entry_locked); assert(cur.state() == entry_locked);
...@@ -245,14 +225,7 @@ radix_range::replace(u64 start, u64 size, radix_elem *val) ...@@ -245,14 +225,7 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
val->incref(); val->incref();
if (!cur.is_null()) if (!cur.is_null())
cur.elem()->decref(); cur.elem()->decref();
return cur_end;
}, 0, 1L << key_bits, start, start + size); }, 0, 1L << key_bits, start, start + size);
// Impossible to hit entry_dead. We own the lock.
if (ret != 1L << key_bits)
panic("radix_range::replace");
// TODO: If we can, collapse some intermediate nodes, RCU-freeing
// them.
} }
radix_iterator::radix_iterator(const radix* r, u64 k) radix_iterator::radix_iterator(const radix* r, u64 k)
......
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论