run rcu_gc in a separate proc, since it can sleep (in rcu_delayed bfree)

982d0894 · Nickolai Zeldovich · d5fd5fdd · 982d0894 · 982d0894 · 982d0894
--- a/defs.h
+++ b/defs.h
@@ -181,6 +181,7 @@ void            rcu_end_read(void);
 void            rcu_delayed(void*, void (*dofree)(void*));
 void            rcu_delayed2(int, uint, void (*dofree)(int, uint));
 void		rcu_gc(void);
+void		rcu_gc_worker(void);
 // swtch.S
 void            swtch(struct context**, struct context*);

--- a/proc.c
+++ b/proc.c
@@ -58,6 +58,7 @@ allocproc(void)
  p->epoch = INF;
  p->cpuid = cpu->id;
  p->on_runq = -1;
+  p->cpu_pin = 0;
  snprintf(p->lockname, sizeof(p->lockname), "cv:proc:%d", p->pid);
  initlock(&p->lock, p->lockname+3);
@@ -162,6 +163,20 @@ userinit(void)
  addrun(p);
  p->state = RUNNABLE;
  release(&p->lock);
+  for (uint c = 0; c < NCPU; c++) {
+    struct proc *rcup = allocproc();
+    rcup->vmap = vmap_alloc();
+    rcup->context->eip = (uint) rcu_gc_worker;
+    rcup->cwd = 0;
+    rcup->cpuid = c;
+    rcup->cpu_pin = 1;
+    acquire(&rcup->lock);
+    rcup->state = RUNNABLE;
+    addrun(rcup);
+    release(&rcup->lock);
+  }
 }
 // Grow/shrink current process's memory by n bytes.
@@ -419,7 +434,7 @@ migrate(struct proc *p)
      continue;
    if (idle[c]) {    // OK if there is a race
      acquire(&p->lock);
-      if (p->state != RUNNABLE) {
+      if (p->state != RUNNABLE || p->cpu_pin) {
 	release(&p->lock);
 	continue;
      }
@@ -450,7 +465,7 @@ steal_cb(void *vk, void *v, void *arg)
  struct proc *p = v;
  acquire(&p->lock);
-  if (p->state != RUNNABLE || p->cpuid == cpu->id) {
+  if (p->state != RUNNABLE || p->cpuid == cpu->id || p->cpu_pin) {
    release(&p->lock);
    return 0;
  }
@@ -505,6 +520,7 @@ scheduler(void)
    panic("scheduler allocproc");
  proc = schedp;
+  proc->cpu_pin = 1;
  // Enabling mtrace calls in scheduler generates many mtrace_call_entrys.
  // mtrace_call_set(1, cpu->id);

--- a/proc.h
+++ b/proc.h
@@ -93,6 +93,7 @@ struct proc {
  uint rcu_read_depth;
  char lockname[16];
  int on_runq;
+  int cpu_pin;
 };
 // Process memory is laid out contiguously, low addresses first:

--- a/rcu.c
+++ b/rcu.c
@@ -32,6 +32,7 @@ static struct { struct rcu_head x __attribute__((aligned (CACHELINE))); } rcu_q[
 static uint global_epoch __attribute__ ((aligned (CACHELINE)));
 static struct { struct spinlock l __attribute__((aligned (CACHELINE))); } rcu_lock[NCPU];
 static struct { int v __attribute__((aligned (CACHELINE))); } delayed_nfree[NCPU];
+static struct { struct condvar cv __attribute__((aligned (CACHELINE))); } rcu_cv[NCPU];
 enum { rcu_debug = 0 };
@@ -41,6 +42,7 @@ rcuinit(void)
  for (int i = 0; i < NCPU; i++) {
    initlock(&rcu_lock[i].l, "rcu");
    TAILQ_INIT(&rcu_q[i].x);
+    initcondvar(&rcu_cv[i].cv, "rcu_gc_cv");
  }
 }
@@ -63,7 +65,7 @@ rcu_min(void *vkey, void *v, void *arg){
 // XXX use atomic instruction to update list (instead of holding lock)
 // lists of lists?
 void
-rcu_gc(void)
+rcu_gc_work(void)
 {
  struct rcu *r, *nr;
  uint min_epoch = global_epoch;
@@ -71,7 +73,7 @@ rcu_gc(void)
  ns_enumerate(nspid, rcu_min, &min_epoch);
-  pushcli();
+  // pushcli(); // not necessary: rcup->cpu_pin==1
  acquire(&rcu_lock[cpu->id].l);
  for (r = TAILQ_FIRST(&rcu_q[cpu->id].x); r != NULL; r = nr) {
@@ -102,12 +104,33 @@ rcu_gc(void)
  if (rcu_debug)
    cprintf("rcu_gc: cpu %d n %d delayed_nfree=%d min_epoch=%d\n",
 	    cpu->id, n, delayed_nfree[cpu->id], min_epoch);
-  popcli();
+  // popcli(); // not necessary: rcup->cpu_pin==1
  // global_epoch can be bumped anywhere; this seems as good a place as any
  __sync_fetch_and_add(&global_epoch, 1);
 }
+void
+rcu_gc_worker(void)
+{
+  release(&proc->lock);	// initially held by scheduler
+  struct spinlock wl;
+  initlock(&wl, "rcu_gc_worker");   // dummy lock
+  acquire(&wl);
+  for (;;) {
+    rcu_gc_work();
+    cv_sleep(&rcu_cv[cpu->id].cv, &wl);
+  }
+}
+void
+rcu_gc(void)
+{
+  cv_wakeup(&rcu_cv[cpu->id].cv);
+}
 // XXX Use atomic instruction to update list (instead of holding lock)
 static void
 rcu_delayed_int(struct rcu *r)