Merge branch 'scale-amd64' of git+ssh://amsterdam.csail.mit.edu/home/am0/6.828/xv6 into scale-amd64

4c05b764 · Silas Boyd-Wickizer · 21c44a5e · 9d8d11ae · 4c05b764 · 4c05b764
--- a/include/kalloc.hh
+++ b/include/kalloc.hh
 #include "atomic.hh"
+#include "percpu.hh"
 template<class T>
 struct vptr64 {
@@ -59,7 +60,7 @@ struct kmem {
  run* alloc(const char* name);
  void free(run* r);
-} __mpalign__;
+};
 enum {
  slab_stack,
@@ -70,5 +71,5 @@ enum {
  slab_type_max
 };
-extern struct kmem kmems[NCPU];
+extern percpu<kmem> kmems;
-extern struct kmem slabmem[slab_type_max][NCPU];
+extern percpu<kmem> slabmem[slab_type_max];
--- a/include/kernel.hh
+++ b/include/kernel.hh
@@ -248,7 +248,7 @@ void*           pagelookup(struct vmap*, uptr);
 // zalloc.cc
 char*           zalloc(const char* name);
-void            zfree(char* p);
+void            zfree(void* p);
 // other exported/imported functions
 void cmain(u64 mbmagic, u64 mbaddr);

--- a/include/percpu.hh
+++ b/include/percpu.hh
@@ -4,31 +4,38 @@ extern int mycpuid(void);
 template <typename T>
 struct percpu {
-  const T* operator->() const {
+  percpu() = default;
+  percpu(const percpu &o) = delete;
+  percpu(percpu &&o) = delete;
+  percpu &operator=(const percpu &o) = delete;
+  T* get() const {
    return cpu(mycpuid());
  }
-  T* operator->() {
+  T* operator->() const {
    return cpu(mycpuid());
  }
-  T& operator*() {
+  T& operator*() const {
    return *cpu(mycpuid());
  }
-  T& operator[](int id) { 
+  T& operator[](int id) const {
-    return *cpu(id);
-  }
-  const T& operator[](int id) const { 
    return *cpu(id);
  }
-  T* cpu(int id) {
+private:
+  T* cpu(int id) const {
    return &pad_[id].v_;
  }
-  struct {
+  // percpu acts like a T* const, but since it's actually storing the
+  // data directly, we have to strip the const-ness away from the data
+  // being stored.  This lets const members return non-const pointers
+  // to this data, just like a T* const.
+  mutable struct {
    T v_ __mpalign__;
    __padout__;
  } pad_[NCPU];

--- a/include/radix.hh
+++ b/include/radix.hh
@@ -6,7 +6,7 @@
 #include "gc.hh"
-enum { bits_per_level = 6 };
+enum { bits_per_level = 9 };
 enum { key_bits = 36 };
 enum { radix_levels = (key_bits + bits_per_level - 1) / bits_per_level };
@@ -163,10 +163,18 @@ class radix_elem : public rcu_freed {
 struct radix_node {
  radix_ptr child[1 << bits_per_level];
-  radix_node() { }
+  // We need to customize not only allocation but initialization, so
+  // radix_node has no constructors.  Instead, use create.
+  radix_node() = delete;
+  radix_node(const radix_node &o) = delete;
+  static radix_node *create();
  ~radix_node();
-  NEW_DELETE_OPS(radix_node)
+  // Since we use custom allocation for radix_node's, we must also
+  // custom delete them.  Note that callers may alternatively use
+  // zfree when freeing a radix_node that's known to be empty (for
+  // example, after failed optimistic concurrency).
+  static void operator delete(void *p);
 };
 // Assert we have enough spare bits for all flags.
@@ -196,7 +204,7 @@ struct radix {
  radix_ptr root_;
  u32 shift_;
-  radix(u32 shift) : root_(radix_entry(new radix_node())), shift_(shift) {
+  radix(u32 shift) : root_(radix_entry(radix_node::create())), shift_(shift) {
  }
  ~radix();
  radix_elem* search(u64 key);

--- a/kernel/kalloc.cc
+++ b/kernel/kalloc.cc
@@ -16,8 +16,8 @@
 static struct Mbmem mem[128];
 static u64 nmem;
 static u64 membytes;
-struct kmem kmems[NCPU];
+percpu<kmem> kmems;
-struct kmem slabmem[slab_type_max][NCPU];
+percpu<kmem> slabmem[slab_type_max];
 extern char end[]; // first address after kernel loaded from ELF file
 char *newend;
@@ -128,8 +128,7 @@ kmem::alloc(const char* name)
        panic("kmem:alloc: aba race %p %p %p\n",
              r, r->next, nxt);
      nfree--;
-      if (name)
+      mtlabel(mtrace_label_block, r, size, name, strlen(name));
-        mtlabel(mtrace_label_block, r, size, name, strlen(name));
      return r;
    }
  }
@@ -170,7 +169,7 @@ kfree_pool(struct kmem *m, char *v)
 }
 static void
-kmemprint_pool(struct kmem *km)
+kmemprint_pool(const percpu<kmem> &km)
 {
  cprintf("pool %s: [ ", &km[0].name[1]);
  for (u32 i = 0; i < NCPU; i++)
@@ -191,7 +190,7 @@ kmemprint()
 static char*
-kalloc_pool(struct kmem *km, const char *name)
+kalloc_pool(const percpu<kmem> &km, const char *name)
 {
  struct run *r = 0;
  struct kmem *m;
@@ -321,5 +320,5 @@ kfree(void *v)
 void
 ksfree(int slab, void *v)
 {
-  kfree_pool(slabmem[slab], (char*) v);
+  kfree_pool(slabmem[slab].get(), (char*) v);
 }
--- a/kernel/kmalloc.cc
+++ b/kernel/kmalloc.cc
@@ -37,7 +37,7 @@ kminit(void)
 static int
 morecore(int c, int b)
 {
-  char *p = kalloc(nullptr);
+  char *p = kalloc("kmalloc");
  if(p == 0)
    return -1;

--- a/kernel/main.cc
+++ b/kernel/main.cc
@@ -116,7 +116,8 @@ cmain(u64 mbmagic, u64 mbaddr)
  initlapic();
  initcmdline();
  initkalloc(mbaddr);
-  initz();
+  initwq();        // (after kalloc)
+  initz();         // (after wq)
  initproc();      // process table
  initsched();     // scheduler run queues
  initidle();
@@ -125,7 +126,6 @@ cmain(u64 mbmagic, u64 mbaddr)
  initinode();     // inode cache
  initdisk();      // disk
  initconsole();
-  initwq();
  initfutex();
  initsamp();
  initlockstat();

--- a/kernel/radix.cc
+++ b/kernel/radix.cc
@@ -39,7 +39,7 @@ push_down(radix_entry cur, radix_ptr *ptr)
    radix_elem *elem = cur.elem();
    // FIXME: This might throw. Might need to unlock the things you've
    // already hit.
-    radix_node *new_rn = new radix_node();
+    radix_node *new_rn = radix_node::create();
    if (elem != nullptr) {
      for (int i = 0; i < (1<<bits_per_level); i++) {
        new_rn->child[i].store(radix_entry(elem));
@@ -58,14 +58,20 @@ push_down(radix_entry cur, radix_ptr *ptr)
      // reallocating new_rn if elem doesn't change.
      // Avoid bouncing on the refcount 1<<bits_per_level times.
-      if (elem != nullptr) {
+      if (elem != nullptr)
-        for (int i = 0; i < (1<<bits_per_level); i++) {
-          new_rn->child[i].store(radix_entry(nullptr));
-        }
        elem->decref(1<<bits_per_level);
-      }
-      delete new_rn;
+      // XXX(austin) This happens for nearly 50% of radix_node
+      // allocations.  Is the compare exchange actually right?
+      if (elem == nullptr)
+        // We know the page is still zeroed
+        zfree(new_rn);
+      else
+        // We already did a batch decref above.  We could zero all of
+        // the entries and call the destructor (which will scan the
+        // node again).  Instead, we skip the whole thing and free
+        // directly.
+        kfree(new_rn);
    }
  }
  return cur;
@@ -131,6 +137,14 @@ radix_entry::release()
  }
 }
+radix_node*
+radix_node::create()
+{
+  static_assert(sizeof(radix_node) == PGSIZE,
+    "radix_node must be exactly one page");
+  return (radix_node*)zalloc("radix_node");
+}
 radix_node::~radix_node()
 {
  for (int i = 0; i < (1<<bits_per_level); i++) {
@@ -138,6 +152,12 @@ radix_node::~radix_node()
  }
 }
+void
+radix_node::operator delete(void *p)
+{
+  kfree(p);
+}
 radix::~radix()
 {
  root_.load().release();

--- a/kernel/vm.cc
+++ b/kernel/vm.cc
@@ -172,6 +172,8 @@ vma::vma(vmap *vmap, uptr start, uptr end, enum vmatype vtype, vmnode *vmn) :
 #endif
    vma_start(start), vma_end(end), va_type(vtype), n(vmn)
 {
+  assert(PGOFFSET(start) == 0);
+  assert(PGOFFSET(end) == 0);
  if (n)
    n->incref();
 }
@@ -462,7 +464,8 @@ vmap::remove(uptr vma_start, uptr len)
    for (auto r: span) {
      vma *rvma = (vma*) r;
      if (rvma->vma_start < vma_start || rvma->vma_end > vma_end) {
-        cprintf("vmap::remove: partial unmap not supported\n");
+        cprintf("vmap::remove: partial unmap not supported; unmapping [%#lx,%#lx) from [%#lx,%#lx)\n",
+                vma_start, vma_start+len, rvma->vma_start, rvma->vma_end);
        return -1;
      }
    }

--- a/kernel/wqkern.cc
+++ b/kernel/wqkern.cc
@@ -30,12 +30,14 @@ wq_size(void)
 int
 wq_push(work *w)
 {
+  assert(wq_);
  return wq_->push(w, mycpuid());
 }
 void
 wqcrit_trywork(void)
 {
+  assert(wqcrit_);
  while (wqcrit_->trywork(false))
    ;
 }
@@ -43,19 +45,22 @@ wqcrit_trywork(void)
 int
 wqcrit_push(work *w, int c)
 {
+  assert(wqcrit_);
  return wqcrit_->push(w, c);
 }
 int
 wq_trywork(void)
 {
+  assert(wq_ && wqcrit_);
  return wqcrit_->trywork(false) || wq_->trywork(true);
 }
 void
 wq_dump(void)
 {
-  return wq_->dump();
+  if (wq_)
+    return wq_->dump();
 }
 void

--- a/kernel/zalloc.cc
+++ b/kernel/zalloc.cc
@@ -12,13 +12,12 @@ extern "C" void zrun_nc(run*);
 static const bool prezero = true;
 struct zallocator {
-  run*   run;
  kmem   kmem;
  wframe frame;
  void  init(int);
  char* alloc(const char*);
-  void  free(char*);
+  void  free(void*);
  void  tryrefill();
 };
 percpu<zallocator> z_;
@@ -83,29 +82,35 @@ zallocator::alloc(const char* name)
  } else {
    // Zero the run header used by kmem
    memset(p, 0, sizeof(struct run));
+    if (0)
+      for (int i = 0; i < PGSIZE; i++)
+        assert(p[i] == 0);
  }
  tryrefill();
  return p;
 }
 void
-zallocator::free(char* p)
+zallocator::free(void* p)
 {
  if (0) 
    for (int i = 0; i < 4096; i++)
-      assert(p[i] == 0);
+      assert(((char*)p)[i] == 0);
  kmem.free((struct run*)p);
 }
+// Allocate a zeroed page.  This page can be freed with kfree or, if
+// it is known to be zeroed when it is freed, zfree.
 char*
 zalloc(const char* name)
 {
  return z_->alloc(name);
 }
+// Free a page that is known to be zero
 void
-zfree(char* p)
+zfree(void* p)
 {
  z_->free(p);
 }