Merge branch 'scale-amd64' of git+ssh://amsterdam.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

406778a3 · Silas Boyd-Wickizer · 217f81e0 · a48cb5e5 · 406778a3 · 406778a3
--- a/bin/usertests.cc
+++ b/bin/usertests.cc
@@ -1741,6 +1741,24 @@ unmappedtest(void)
  printf("unmappedtest ok\n");
 }
+static int nenabled;
+static char **enabled;
+void
+run_test(const char *name, void (*test)())
+{
+  if (!nenabled) {
+    test();
+  } else {
+    for (int i = 0; i < nenabled; i++) {
+      if (strcmp(name, enabled[i]) == 0) {
+        test();
+        break;
+      }
+    }
+  }
+}
 int
 main(int argc, char *argv[])
 {
@@ -1752,47 +1770,52 @@ main(int argc, char *argv[])
  }
  close(open("usertests.ran", O_CREATE));
-  unopentest();
+  nenabled = argc - 1;
-  bigargtest();
+  enabled = argv + 1;
-  bsstest();
-  sbrktest();
+#define TEST(name) run_test(#name, name)
+  TEST(unopentest);
+  TEST(bigargtest);
+  TEST(bsstest);
+  TEST(sbrktest);
  // we should be able to grow a user process to consume all phys mem
-  unmappedtest();
+  TEST(unmappedtest);
-  validatetest();
+  TEST(validatetest);
-  opentest();
+  TEST(opentest);
-  writetest();
+  TEST(writetest);
-  writetest1();
+  TEST(writetest1);
-  createtest();
+  TEST(createtest);
-  preads();
+  TEST(preads);
-  // mem();
+  // TEST(mem);
-  pipe1();
+  TEST(pipe1);
-  preempt();
+  TEST(preempt);
-  exitwait();
+  TEST(exitwait);
-  rmdot();
+  TEST(rmdot);
-  thirteen();
+  TEST(thirteen);
-  longname();
+  TEST(longname);
-  bigfile();
+  TEST(bigfile);
-  subdir();
+  TEST(subdir);
-  concreate();
+  TEST(concreate);
-  linktest();
+  TEST(linktest);
-  unlinkread();
+  TEST(unlinkread);
-  createdelete();
+  TEST(createdelete);
-  twofiles();
+  TEST(twofiles);
-  sharedfd();
+  TEST(sharedfd);
-  dirfile();
+  TEST(dirfile);
-  iref();
+  TEST(iref);
-  forktest();
+  TEST(forktest);
-  bigdir(); // slow
+  TEST(bigdir); // slow
-  tls_test();
+  TEST(tls_test);
-  thrtest();
+  TEST(thrtest);
-  ftabletest();
+  TEST(ftabletest);
-  exectest();
+  TEST(exectest);
  exit();
 }
--- a/include/cpputil.hh
+++ b/include/cpputil.hh
 #pragma once
-template<class A, class B>
+#include <type_traits>
-class pair {
+#include <utility>
- public:
-  A _a;
-  B _b;
-  pair(const A &a, const B &b) : _a(a), _b(b) {}
+using std::pair;
+using std::make_pair;
-  bool operator==(const pair<A, B> &other) {
-    return _a == other._a && _b == other._b;
-  }
-};
 template<int N>
 class strbuf {
@@ -27,13 +20,6 @@ class strbuf {
  }
 };
-template<class A, class B>
-pair<A, B>
-mkpair(const A &a, const B &b)
-{
-  return pair<A, B>(a, b);
-}
 class scoped_acquire {
 private:
  spinlock *_l;
@@ -48,25 +34,6 @@ class scoped_acquire {
 class retryable {};
 namespace std {
-  template<class T>
-  struct remove_reference
-  { typedef T type; };
-  template<class T>
-  struct remove_reference<T&>
-  { typedef T type; };
-  template<class T>
-  struct remove_reference<T&&>
-  { typedef T type; };
-  template<class T>
-  typename remove_reference<T>::type&&
-  move(T&& a)
-  {
-    return static_cast<typename remove_reference<T>::type&&>(a);
-  }
  struct ostream { int next_width; };
  extern ostream cout;

--- a/include/radix.hh
+++ b/include/radix.hh
@@ -5,12 +5,145 @@
 */
 #include "gc.hh"
-#include "markptr.hh"
-enum { bits_per_level = 9 };
+enum { bits_per_level = 6 };
 enum { key_bits = 36 };
 enum { radix_levels = (key_bits + bits_per_level - 1) / bits_per_level };
+class radix_elem;
+class radix_node;
+/*
+ * Each pointer to a radix_elem or radix_node can be in one of four
+ * states:
+ *
+ * - pointer to radix_node
+ * - unlocked leaf
+ * - locked leaf
+ * - dead leaf
+ *
+ * A leaf is either a pointer to a radix_elem or null.
+ *
+ * Before making semantic modifications to a range, the range must be
+ * locked. This is done by locking the leaf pointers (be they to
+ * radix_entry or null) corresponding to that range. If necessary, a
+ * leaf may be "pushed down" and replaced with a pointer to radix_node
+ * full of the old value to get the endpoints accurate. Locking NEVER
+ * happens higher level than the current set of leaves.
+ *
+ * We assuming that a thread attempting to push down a leaf is doing
+ * so to lock it.
+ *
+ * When replacing a range, we'd like to possibly retire old
+ * radix_nodes when their contents are all set to be the same. Before
+ * doing this, all leaves under that radix_node must be locked. We
+ * transition them to 'dead leaf' state. This informs all others
+ * attempting to lock the pointer to retry. The radix_node itself is
+ * RCU-freed. To avoid restarting writers, set the leaves to the right
+ * value too. Replaced elements are written in locked state, to be
+ * unlocked when the radix_range goes away.
+ *
+ * Once a pointer is dead, it stays dead until the containing
+ * radix_node is deallocated. Dead pointers do not own references.
+ *
+ * For now we do not implement the dead state. It is only necessary
+ * when collapsing an already-expanded node. It's unclear this
+ * optimization is very useful as it requires RCU-freeing radix_nodes,
+ * which makes them just over a power of 2 and inefficient to
+ * allocate.
+ *
+ * Races:
+ *
+ * - If a leaf to be locked (or pushed down) gets pushed down, lock
+ *   the new radix_node at a more granular level.
+ *
+ * - If a leaf to be locked (or pushed down) goes dead, restart
+ *   everything from the root. Many values may have gone invalid.
+ *
+ * - If a leaf to be locked (or pushed down) gets locked, spin.
+ *
+ * [*] XXX: Try not to bounce on the radix_elem refcount too much.
+ */
+enum entry_state {
+  entry_unlocked = 0,
+  entry_locked = 1,
+//  entry_dead = 2,
+  entry_node = 2,
+  entry_mask = 3
+};
+class radix_entry {
+public:
+  radix_entry()
+    : value_(0 | entry_unlocked) { }
+  explicit radix_entry(uptr value)
+    : value_(value) { }
+  explicit radix_entry(radix_node *ptr)
+    : value_(reinterpret_cast<uptr>(ptr) | entry_node) {
+    // XXX: This is kinda wonky. Maybe switch the status to
+    // entry_unlocked is ptr is null, make null pass both is_elem()
+    // and is_node().
+    assert(ptr != nullptr);
+  }
+  explicit radix_entry(radix_elem *ptr, entry_state state = entry_unlocked)
+    : value_(reinterpret_cast<uptr>(ptr) | state) {
+    assert(state != entry_node);
+  }
+  explicit radix_entry(decltype(nullptr) nullp,
+                       entry_state state = entry_unlocked)
+    : value_(0 | state) {
+    assert(state != entry_node);
+  }
+  uptr value() const { return value_; }
+  uptr& value() { return value_; }
+  entry_state state() const {
+    return static_cast<entry_state>(value_ & entry_mask);
+  }
+  uptr ptr() const { return value_ & ~entry_mask; }
+  bool is_node() const { return state() == entry_node; }
+  bool is_elem() const { return !is_node(); }
+  bool is_null() const { return ptr() == 0; }
+  // Convenience function
+  radix_entry with_state(entry_state state) {
+    return radix_entry(elem(), state);
+  }
+  radix_elem *elem() const {
+    assert(is_elem());
+    return reinterpret_cast<radix_elem*>(ptr());
+  }
+  radix_node *node() const {
+    assert(is_node());
+    return reinterpret_cast<radix_node*>(ptr());
+  }
+  void release();
+private:
+  uptr value_;
+};
+// Our version of std::atomic doesn't work for structs, even if they
+// are integer sized.
+class radix_ptr {
+public:
+  radix_ptr() : ptr_(radix_entry().value()) { }
+  radix_ptr(radix_entry e) : ptr_(e.value()) { }
+  radix_entry load() const { return radix_entry(ptr_.load()); }
+  void store(radix_entry e) { ptr_.store(e.value()); }
+  bool compare_exchange_weak(radix_entry &old, radix_entry val) {
+    return ptr_.compare_exchange_weak(old.value(), val.value());
+  }
+private:
+  static_assert(sizeof(uptr) == sizeof(radix_entry),
+                "radix_entry is a uptr");
+  std::atomic<uptr> ptr_;
+};
 class radix_elem : public rcu_freed {
 private:
  bool deleted_;
@@ -19,19 +152,29 @@ class radix_elem : public rcu_freed {
 public:
  radix_elem() : rcu_freed("radix_elem"), deleted_(false), ref_(0) {}
  bool deleted() { return deleted_; }
-  void decref() { if (--ref_ == 0) { deleted_ = true; gc_delayed(this); } }
+  void decref(u64 delta = 1) {
-  void incref() { ref_++; }
+    if ((ref_ -= delta) == 0) {
+      deleted_ = true;
+      gc_delayed(this);
+    }
+  }
+  void incref(u64 delta = 1) { ref_ += delta; }
 };
 struct radix_node {
-  markptr<void> ptr[1 << bits_per_level];
+  radix_ptr child[1 << bits_per_level];
-  radix_node() {
+  radix_node() { }
-    for (int i = 0; i < sizeof(ptr) / sizeof(ptr[0]); i++)
+  ~radix_node();
-      ptr[i] = 0;
-  }
  NEW_DELETE_OPS(radix_node)
 };
+// Assert we have enough spare bits for all flags.
+static_assert(alignof(radix_node) > entry_mask,
+              "radix_node sufficiently aligned");
+static_assert(alignof(radix_elem) > entry_mask,
+              "radix_elem sufficiently aligned");
 struct radix;
 struct radix_range {
@@ -50,58 +193,43 @@ struct radix_range {
 };
 struct radix {
-  markptr<void> root_;
+  radix_ptr root_;
  u32 shift_;
-  radix(u32 shift) : root_(0), shift_(shift) {
+  radix(u32 shift) : root_(radix_entry(new radix_node())), shift_(shift) {
-    root_.ptr() = new radix_node();
  }
+  ~radix();
  radix_elem* search(u64 key);
  radix_range search_lock(u64 start, u64 size);
-  // k is shifted value.
-  u64 skip_empty(u64 k) const;
  NEW_DELETE_OPS(radix)
 };
 struct radix_iterator {
  const radix* r_;
  u64 k_;
-  radix_iterator(const radix* r, u64 k) : r_(r), k_(r->skip_empty(k)) {}
-  radix_iterator &operator++() { k_++; k_ = r_->skip_empty(k_); return *this; }
-  radix_elem* operator*();
-  bool operator==(const radix_iterator &other) {
-    return r_ == other.r_ && k_ == other.k_; }
-  bool operator!=(const radix_iterator &other) {
-    return r_ != other.r_ || k_ != other.k_; }
-};
-struct radix_iterator2 {
-  const radix* r_;
-  u64 k_;
  // path_[i] is the node at level i. Note that the leaf is at zero
  // and is radix_elem. The rest are radix_node. For now we assume all
  // leaves are at level 0. Later we'll steal a bit for them. The root
  // is path_[radix_levels].
-  void *path_[radix_levels+1];
+  radix_entry path_[radix_levels+1];
+  u32 leaf_;
-  radix_iterator2(const radix* r, u64 k);
+  radix_iterator(const radix* r, u64 k);
-  radix_iterator2 &operator++() {
+  radix_iterator &operator++() {
    if (!advance(radix_levels-1)) k_ = ~0ULL;
    return *this;
  }
  radix_elem* operator*() {
-    return (radix_elem*)path_[0];
+    return path_[leaf_].elem();
  }
-  radix_node* node(u32 level) { return (radix_node*)path_[level]; }
+  radix_node* node(u32 level) { return path_[level].node(); }
  // Compare equality on just the key.
-  bool operator==(const radix_iterator2 &other) {
+  bool operator==(const radix_iterator &other) {
    return r_ == other.r_ && k_ == other.k_; }
-  bool operator!=(const radix_iterator2 &other) {
+  bool operator!=(const radix_iterator &other) {
    return r_ != other.r_ || k_ != other.k_; }
 private:
@@ -109,8 +237,6 @@ private:
  bool advance(u32 level);
 };
-#define radix_iterator radix_iterator2
 static inline radix_iterator
 begin(const radix &r) { return radix_iterator(&r, 0); }
@@ -123,5 +249,3 @@ begin(const radix_range &rr) { return radix_iterator(rr.r_, rr.start_); }
 static inline radix_iterator
 end(const radix_range &rr) { return radix_iterator(rr.r_, rr.start_ + rr.size_); }
-#undef radix_iterator
--- a/include/vm.hh
+++ b/include/vm.hh
@@ -72,11 +72,11 @@ struct vma
 // The elements of e[] are not ordered by address.
 struct vmap {
 #if VM_CRANGE
-  struct crange cr;
+  struct crange vmas;
 #endif
 #if VM_RADIX
-  struct radix rx;
+  struct radix vmas;
 #endif
  static vmap* alloc();

--- a/kernel/bio.cc
+++ b/kernel/bio.cc
@@ -32,7 +32,7 @@
 u64
 bio_hash(const pair<u32, u64> &p)
 {
-  return p._a ^ p._b;
+  return p.first ^ p.second;
 }
 static xns<pair<u32, u64>, buf*, bio_hash> *bufns;
@@ -51,7 +51,7 @@ bget(u32 dev, u64 sector, int *writer)
 loop:
  // Try for cached block.
  // XXX ignore dev
-  b = bufns->lookup(mkpair(dev, sector));
+  b = bufns->lookup(make_pair(dev, sector));
  if (b) {
    if (b->dev != dev || b->sector != sector)
      panic("block mismatch");
@@ -76,7 +76,7 @@ bget(u32 dev, u64 sector, int *writer)
  b = new buf(dev, sector);
  b->flags = B_BUSY;
  *writer = 1;
-  if (bufns->insert(mkpair(b->dev, b->sector), b) < 0) {
+  if (bufns->insert(make_pair(b->dev, b->sector), b) < 0) {
    gc_delayed(b);
    goto loop;
  }

--- a/kernel/fs.cc
+++ b/kernel/fs.cc
@@ -152,7 +152,7 @@ bfree(int dev, u64 x)
 u64
 ino_hash(const pair<u32, u32> &p)
 {
-  return p._a ^ p._b;
+  return p.first ^ p.second;
 }
 static xns<pair<u32, u32>, inode*, ino_hash> *ins;
@@ -268,7 +268,7 @@ igetnoref(u32 dev, u32 inum)
  // Try for cached inode.
  {
    scoped_gc_epoch e;
-    struct inode *ip = ins->lookup(mkpair(dev, inum));
+    struct inode *ip = ins->lookup(make_pair(dev, inum));
    if (ip) {
      if (!(ip->flags & I_VALID)) {
        acquire(&ip->lock);
@@ -290,7 +290,7 @@ igetnoref(u32 dev, u32 inum)
  snprintf(ip->lockname, sizeof(ip->lockname), "cv:ino:%d", ip->inum);
  initlock(&ip->lock, ip->lockname+3, LOCKSTAT_FS);
  initcondvar(&ip->cv, ip->lockname);
-  if (ins->insert(mkpair(ip->dev, ip->inum), ip) < 0) {
+  if (ins->insert(make_pair(ip->dev, ip->inum), ip) < 0) {
    gc_delayed(ip);
    goto retry;
  }
@@ -399,7 +399,7 @@ iput(struct inode *ip)
      ip->gen += 1;
      iupdate(ip);
-      ins->remove(mkpair(ip->dev, ip->inum), &ip);
+      ins->remove(make_pair(ip->dev, ip->inum), &ip);
      gc_delayed(ip);
      icache_free[mycpu()->id].x++;
      return;

--- a/kernel/radix.cc
+++ b/kernel/radix.cc
 #include "crange_arch.hh"
 #include "radix.hh"
-enum { crange_debug = 0 };
+enum { radix_debug = 0 };
-#define dprintf(...) do { if (crange_debug) cprintf(__VA_ARGS__); } while(0)
+#define dprintf(...) do { if (radix_debug) cprintf(__VA_ARGS__); } while(0)
+static_assert(key_bits == bits_per_level * radix_levels,
+              "for now, we only support exact multiples of bits_per_level");
 // Returns the index needed to reach |level| from |level+1|.
 static u32
@@ -14,48 +17,141 @@ index(u64 key, u32 level)
  return idx;
 }
-// Returns the level we stopped at.
+// Returns the size of a subtree for a node at |level|.
-template<class CB>
+static u64
-u32
+level_size(u32 level)
-descend(u64 key, markptr<void> *n, u32 level, CB cb, bool create)
 {
-  static_assert(key_bits == bits_per_level * radix_levels,
+  return 1L << (bits_per_level * level);
-                "for now, we only support exact multiples of bits_per_level");
+}
-  assert(n);
+static radix_entry
+push_down(radix_entry cur, radix_ptr *ptr)
+{
+  while (cur.state() != entry_node) {
+    // If we're locked, just spin and try again.
+    if (cur.state() == entry_locked) {
+      cur = ptr->load();
+      continue;
+    }
-  void *v = n->ptr();
+    // Make a new node.
-  if (v == 0 && create) {
+    assert(cur.state() == entry_unlocked);
+    radix_elem *elem = cur.elem();
+    // FIXME: This might throw. Might need to unlock the things you've
+    // already hit.
    radix_node *new_rn = new radix_node();
-    if (n->ptr().cmpxch_update(&v, (void*) new_rn))
+    if (elem != nullptr) {
-      v = new_rn;
+      for (int i = 0; i < (1<<bits_per_level); i++) {
-    else
+        new_rn->child[i].store(radix_entry(elem));
+      }
+      elem->incref(1<<bits_per_level);
+    }
+    if (ptr->compare_exchange_weak(cur, radix_entry(new_rn))) {
+      // Release the ref from the pointer we replaced. FIXME: Bouncing
+      // on the reference count here is annoying. Maybe the reference
+      // count should be dependent on the high of the leaf?
+      if (elem != nullptr)
+        elem->decref();
+    } else {
+      // Someone else beat us to it. Back out. FIXME: docs say
+      // compare_exchange_weak can return spuriously. Should avoid
+      // reallocating new_rn if elem doesn't change.
+      // Avoid bouncing on the refcount 1<<bits_per_level times.
+      if (elem != nullptr) {
+        for (int i = 0; i < (1<<bits_per_level); i++) {
+          new_rn->child[i].store(radix_entry(nullptr));
+        }
+        elem->decref(1<<bits_per_level);
+      }
      delete new_rn;
    }
-  // Node isn't there. Just return.
+  }
-  if (v == 0) {
+  return cur;
-    return level+1;
+}
+// Runs |cb| of a set of leaves whose disjoint union is the range
+// [start, end). Callback returns the last known state of the
+// radix_ptr. It is assumed |cb| does not convert the leaf into a
+// node. If |cb| returns an entry_node, we recurse into the node and
+// call |cb| on the new subtree.
+template <class CB>
+void
+update_range(radix_entry cur, radix_ptr *ptr, CB cb,
+             u64 cur_start, u64 cur_end,
+             u64 start, u64 end, u32 level = radix_levels)
+{
+  assert(level_size(level) == cur_end - cur_start);
+  // Assert that our ranges intersect; if this fails, we got the loop
+  // below wrong.
+  assert(cur_start < end && start < cur_end)
+  // If our range is not strictly contained in the target, ensure we
+  // are at a node.
+  if (start > cur_start || end < cur_end) {
+    cur = push_down(cur, ptr);
+  }
+  if (cur.is_elem()) {
+    // If we're here, the target range must completely contain this
+    // element.
+    assert(start <= cur_start && cur_end <= end);
+    dprintf(" -> [%lx, %lx); size = %lx\n", cur_start, cur_end, cur_end - cur_start);
+    cur = cb(cur, ptr);
  }
-  radix_node *rn = (radix_node*) v;
+  // Recurse if we became a node or we already one.
+  if (cur.is_node()) {
+    // Find the place to start.
+    if (start < cur_start)
+      start = cur_start;
+    assert(level > 0);
+    int i = index(start, level - 1);
+    u64 child_size = (cur_end - cur_start) >> bits_per_level;
+    u64 child_start = cur_start + i * child_size;
+    for (; (i < (1<<bits_per_level)) && (child_start < end);
+         i++, child_start += child_size) {
+      radix_ptr *child = &cur.node()->child[i];
+      update_range(child->load(), child, cb,
+                   child_start, child_start + child_size,
+                   start, end, level - 1);
+    }
+  }
+}
-  markptr<void> *vptr = &rn->ptr[index(key, level)];
+void
-  if (level == 0) {
+radix_entry::release()
-    cb(vptr);
+{
-    return level;
+  if (is_null()) return;
+  if (is_node()) {
+    delete node();
  } else {
-    return descend(key, vptr, level-1, cb, create);
+    elem()->decref();
+  }
+}
+radix_node::~radix_node()
+{
+  for (int i = 0; i < (1<<bits_per_level); i++) {
+    child[i].load().release();
  }
 }
+radix::~radix()
+{
+  root_.load().release();
+}
 radix_elem*
 radix::search(u64 key)
 {
-  radix_elem *result = 0;
+  radix_entry cur = root_.load();
-  descend(key >> shift_, &root_, radix_levels-1, [&result](markptr<void> *v) {
+  for (u32 level = radix_levels-1; level >= 0 && !cur.is_elem(); level--) {
-      result = (radix_elem*) v->ptr().load();
+    cur = cur.node()->child[index(key >> shift_, level)].load();
-    }, false);
+  }
-  dprintf("%p: search(%lu) -> %p\n", this, key >> shift_, result);
+  dprintf("%p: search(%lx) -> %p\n", this, key >> shift_, cur.elem());
-  return result;
+  return cur.elem();
 }
 radix_range
@@ -64,37 +160,30 @@ radix::search_lock(u64 start, u64 size)
  return radix_range(this, start >> shift_, size >> shift_);
 }
-u64
-radix::skip_empty(u64 k) const
-{
-  u64 next_k = k;
-  while (next_k < (1UL<<key_bits)) {
-    // Does next_k exist?
-    // FIXME: evil evil const_cast
-    u32 level = descend(next_k, const_cast<markptr<void>*>(&root_),
-                        radix_levels-1, [](markptr<void> *v){}, false);
-    if (level == 0) {
-      return next_k;
-    }
-    u64 mask = 1UL<<(bits_per_level * level);
-    // Skip past everything we know is missing.
-    next_k = (next_k & ~(mask-1)) + mask;
-  }
-  // Nope, no successor.
-  return ~0ULL;
-}
 radix_range::radix_range(radix *r, u64 start, u64 size)
  : r_(r), start_(start), size_(size)
 {
-  for (u64 k = start_; k != start_ + size_; k++) {
+  u64 end = start_ + size_;
-    if (descend(k, &r_->root_, radix_levels-1, [](markptr<void> *v) {
+  // Lock the range from left to right.
-          while (!v->mark().xchg(true))
+  dprintf("%p: lock [%lx, %lx)\n", r_, start, start + size);
-            ; // spin
+  update_range(r_->root_.load(), &r_->root_, [](radix_entry cur, radix_ptr *ptr) -> radix_entry {
-        }, true) != 0) {
+      while (cur.state() != entry_node) {
-      panic("radix_range");
+        // Locked -> spin and try again.
+        if (cur.state() == entry_locked) {
+          cur = ptr->load();
+          continue;
+        }
+        // Otherwise it's unlocked. Try to load it.
+        if (ptr->compare_exchange_weak(cur, cur.with_state(entry_locked))) {
+          // Success. Remember the current value and break out.
+          cur = cur.with_state(entry_locked);
+          break;
        }
      }
+      // We either managed a lock or someone turned us into a node.
+      assert(cur.state() == entry_node || cur.state() == entry_locked);
+      return cur;
+    }, 0, 1L << key_bits, start_, end);
 }
 radix_range::~radix_range()
@@ -102,13 +191,15 @@ radix_range::~radix_range()
  if (!r_)
    return;
-  for (u64 k = start_; k != start_ + size_; k++) {
+  dprintf("%p: unlock [%lx, %lx)\n", r_, start_, start_ + size_);
-    if (descend(k, &r_->root_, radix_levels-1, [](markptr<void> *v) {
+  update_range(r_->root_.load(), &r_->root_, [](radix_entry cur, radix_ptr *ptr) -> radix_entry {
-          v->mark() = false;
+      do {
-        }, true) != 0) {
+        // It had better still be locked.
-      panic("~radix_range");
+        assert(cur.state() == entry_locked);
-    }
+      } while (!ptr->compare_exchange_weak(cur, cur.with_state(entry_unlocked)));
-  }
+      // Not a node, but let's return what it wants anyway.
+      return cur.with_state(entry_unlocked);
+    }, 0, 1L << key_bits, start_, start_ + size_);
 }
 void
@@ -116,52 +207,43 @@ radix_range::replace(u64 start, u64 size, radix_elem *val)
 {
  start = start >> r_->shift_;
  size = size >> r_->shift_;
-  dprintf("%p: replace: [%lu, %lu) with %p\n", r_, start, start + size, val);
  assert(start >= start_);
  assert(start + size <= start_ + size_);
-  for (u64 k = start; k != start + size; k++) {
+  dprintf("%p: replace: [%lx, %lx) with %p\n", r_, start, start + size, val);
-    if (descend(k, &r_->root_, radix_levels-1, [val](markptr<void> *v) {
+  update_range(r_->root_.load(), &r_->root_, [val](radix_entry cur, radix_ptr *ptr) -> radix_entry {
-          void* cur = v->ptr().load();
+      do {
-          while (!v->ptr().cmpxch_update(&cur, val))
+        assert(cur.state() == entry_locked);
-            ; // spin
+      } while (!ptr->compare_exchange_weak(cur, radix_entry(val, entry_locked)));
      if (val)
        val->incref();
-          if (cur)
+      // cur is now the old value.
-            ((radix_elem*) cur)->decref();
+      if (!cur.is_null())
-        }, true)) {
+        cur.elem()->decref();
-      panic("radix_range::replace");
+      // Not a node, but let's return what it wants anyway.
-    }
+      return radix_entry(val, entry_locked);
-  }
+    }, 0, 1L << key_bits, start, start + size);
-}
-radix_elem*
-radix_iterator::operator*()
-{
-  radix_elem *result = 0;
-  descend(k_, (markptr<void>*) &r_->root_, radix_levels-1, [&result](markptr<void> *v) {
-      result = (radix_elem*) v->ptr().load();
-    }, false);
-  return result;
 }
-radix_iterator2::radix_iterator2(const radix* r, u64 k)
+radix_iterator::radix_iterator(const radix* r, u64 k)
  : r_(r), k_(k) {
-  dprintf("%p: Made iterator with k = %lu\n", r_, k_);
+  dprintf("%p: Made iterator with k = %lx\n", r_, k_);
  if (k_ != ~0ULL) {
-    path_[radix_levels] = r_->root_.ptr().load();
+    path_[radix_levels] = r_->root_.load();
-    if (!find_first_leaf(radix_levels - 1))
+    if (path_[radix_levels].is_elem())
+      leaf_ = radix_levels; // Maybe best to not do this...
+    else if (!find_first_leaf(radix_levels - 1))
      k_ = ~0ULL;
  }
-  dprintf("%p: Adjusted: k = %lu\n", r_, k_);
+  dprintf("%p: Adjusted: k = %lx\n", r_, k_);
 }
 bool
-radix_iterator2::advance(u32 level)
+radix_iterator::advance(u32 level)
 {
  // First, see if we can advance a lower level.
-  if (level > 0 && advance(level-1)) {
+  if (level > leaf_ && advance(level-1)) {
    // Nothing more to do.
    return true;
  }
@@ -179,12 +261,12 @@ radix_iterator2::advance(u32 level)
 }
 bool
-radix_iterator2::find_first_leaf(u32 level)
+radix_iterator::find_first_leaf(u32 level)
 {
  // Find the first non-empty node after k_ on this level.
  for (u32 idx = index(k_, level); idx < (1<<bits_per_level); idx++) {
-    void *next = node(level+1)->ptr[idx].ptr().load();
+    radix_entry next = node(level+1)->child[idx].load();
-    if (next != nullptr) {
+    if (!next.is_null()) {
      if (index(k_, level) != idx) {
        // We had to advance; clear everything this level and under
        // and set this one.
@@ -193,7 +275,12 @@ radix_iterator2::find_first_leaf(u32 level)
      }
      path_[level] = next;
-      if (level == 0 || find_first_leaf(level-1))
+      if (next.is_elem()) {
+        // Found a leaf. Stop now.
+        leaf_ = level;
+        return true;
+      } else if (find_first_leaf(level-1))
+        // Keep looking.
        return true;
    }
  }

--- a/kernel/vm.cc
+++ b/kernel/vm.cc
@@ -168,7 +168,7 @@ vmnode::loadall()
 vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype, vmnode *vmn) :
 #if VM_CRANGE
-    range(&vmap->cr, start, end-start),
+    range(&vmap->vmas, start, end-start),
 #endif
    vma_start(start), vma_end(end), va_type(vtype), n(vmn)
 {
@@ -194,10 +194,10 @@ vmap::alloc(void)
 vmap::vmap() : 
 #if VM_CRANGE
-  cr(10),
+  vmas(10),
 #endif
 #if VM_RADIX
-  rx(PGSHIFT),
+  vmas(PGSHIFT),
 #endif
  ref(1), pml4(setupkvm()), kshared((char*) ksalloc(slab_kshared)),
  brk_(0)
@@ -253,12 +253,7 @@ vmap::incref()
 bool
 vmap::replace_vma(vma *a, vma *b)
 {
-#if VM_CRANGE
+  auto span = vmas.search_lock(a->vma_start, a->vma_end - a->vma_start);
-  auto span = cr.search_lock(a->vma_start, a->vma_end - a->vma_start);
-#endif
-#if VM_RADIX
-  auto span = rx.search_lock(a->vma_start, a->vma_end - a->vma_start);
-#endif
  if (a->deleted())
    return false;
  for (auto e: span)
@@ -279,12 +274,11 @@ vmap::copy(int share)
 {
  vmap *nm = new vmap();
-#if VM_CRANGE
-  for (auto r: cr) {
-#endif
 #if VM_RADIX
  void *last = 0;
-  for (auto r: rx) {
+#endif
+  for (auto r: vmas) {
+#if VM_RADIX
    if (!r || r == last)
      continue;
    last = r;
@@ -318,12 +312,7 @@ vmap::copy(int share)
      ne = new vma(nm, e->vma_start, e->vma_end, PRIVATE, e->n->copy());
    }
-#if VM_CRANGE
+    auto span = nm->vmas.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
-    auto span = nm->cr.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
-#endif
-#if VM_RADIX
-    auto span = nm->rx.search_lock(ne->vma_start, ne->vma_end - ne->vma_start);
-#endif
    for (auto x: span) {
 #if VM_RADIX
      if (!x)
@@ -367,11 +356,11 @@ vmap::lookup(uptr start, uptr len)
    panic("vmap::lookup bad len");
 #if VM_CRANGE
-  auto r = cr.search(start, len);
+  auto r = vmas.search(start, len);
 #endif
 #if VM_RADIX
  assert(len <= PGSIZE);
-  auto r = rx.search(start);
+  auto r = vmas.search(start);
 #endif
  if (r != 0) {
    vma *e = (vma *) r;
@@ -405,12 +394,7 @@ again:
  {
    // new scope to release the search lock before tlbflush
    u64 len = n->npages * PGSIZE;
-#if VM_CRANGE
+    auto span = vmas.search_lock(vma_start, len);
-    auto span = cr.search_lock(vma_start, len);
-#endif
-#if VM_RADIX
-    auto span = rx.search_lock(vma_start, len);
-#endif
    for (auto r: span) {
 #if VM_RADIX
      if (!r)
@@ -474,12 +458,7 @@ vmap::remove(uptr vma_start, uptr len)
    // new scope to release the search lock before tlbflush
    uptr vma_end = vma_start + len;
-#if VM_CRANGE
+    auto span = vmas.search_lock(vma_start, len);
-    auto span = cr.search_lock(vma_start, len);
-#endif
-#if VM_RADIX
-    auto span = rx.search_lock(vma_start, len);
-#endif
    for (auto r: span) {
      vma *rvma = (vma*) r;
      if (rvma->vma_start < vma_start || rvma->vma_end > vma_end) {
@@ -756,17 +735,13 @@ vmap::sbrk(ssize_t n, uptr *addr)
  s64 newn = PGROUNDUP(n + curbrk - newstart);
 #if VM_CRANGE
  range *prev = 0;
-  auto span = cr.search_lock(newstart, newn + PGSIZE);
 #endif
 #if VM_RADIX
-  auto span = rx.search_lock(newstart, newn + PGSIZE);
+  void *last = 0;
 #endif
-#if VM_CRANGE
+  auto span = vmas.search_lock(newstart, newn + PGSIZE);
  for (auto r: span) {
-#endif
 #if VM_RADIX
-  void *last = 0;
-  for (auto r: span) {
    if (!r || r == last)
      continue;
    last = r;
@@ -824,7 +799,7 @@ vmap::unmapped_area(size_t npages)
  while (addr < USERTOP) {
 #if VM_CRANGE
-    auto x = cr.search(addr, n);
+    auto x = vmas.search(addr, n);
    if (x == nullptr)
      return addr;
    vma* a = (vma*) x;
@@ -834,7 +809,7 @@ vmap::unmapped_area(size_t npages)
 #if VM_RADIX
    bool overlap = false;
    for (uptr ax = addr; ax < addr+n; ax += PGSIZE) {
-      auto x = rx.search(ax);
+      auto x = vmas.search(ax);
      if (x != nullptr) {
        overlap = true;
        vma* a = (vma*) x;

--- a/stdinc/type_traits
+++ b/stdinc/type_traits
+// -*- c++ -*-
+#pragma once
+namespace std {
+  template<class T>
+  struct remove_reference
+  { typedef T type; };
+  template<class T>
+  struct remove_reference<T&>
+  { typedef T type; };
+  template<class T>
+  struct remove_reference<T&&>
+  { typedef T type; };
+}
--- a/stdinc/utility
+++ b/stdinc/utility
+// -*- c++ -*-
+#pragma once
+#include <type_traits>
+namespace std {
+  template<class T>
+  typename remove_reference<T>::type&&
+  move(T&& a)
+  {
+    return static_cast<typename remove_reference<T>::type&&>(a);
+  }
+  template<class A, class B>
+  struct pair {
+    typedef A first_type;
+    typedef B second_type;
+    A first;
+    B second;
+    pair(const pair&) = default;
+    pair(pair&&) = default;
+    constexpr pair() : first(), second() {}
+    pair(const A &a, const B &b) : first(a), second(b) {}
+    bool operator==(const pair<A, B> &other) {
+      return first == other.first && second == other.second;
+    }
+  };
+  template<class A, class B>
+  pair<A, B>
+  make_pair(const A &a, const B &b)
+  {
+    return pair<A, B>(a, b);
+  }
+}