hack 3

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 56bccb5..ea809be 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -38,6 +38,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr);
 void rcu_barrier(void);
 bool rcu_eqs_special_set(int cpu);
 void rcu_momentary_dyntick_idle(void);
+void rcu_momentary_dyntick_idle2(void);
 void kfree_rcu_scheduler_running(void);
 bool rcu_gp_might_be_stalled(void);
 
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index a4964ae..0ad0e2f 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -122,6 +122,7 @@ struct bpf_mem_cache {
 	struct llist_node *waiting_for_gp_tail;
 	atomic_t call_rcu_in_progress;
 	int reuse_cb_in_progress;
+	int reuse_tt_cb_in_progress;
 };
 
 struct bpf_mem_caches {
@@ -309,14 +310,34 @@ static void free_rcu(struct rcu_head *rcu)
 	head = __llist_del_all(&sc->wait_for_free);
 	raw_spin_unlock_irqrestore(&sc->reuse_lock, flags);
 	cnt = free_all(head, sc->percpu);
-	printk("after rcu_tasks_trace GP %d\n", cnt);
+//	printk("after rcu_tasks_trace GP %d\n", cnt);
 	atomic_set(&sc->call_rcu_in_progress, 0);
 }
 
+struct bpf_reuse_batch {
+	struct bpf_mem_cache *c;
+	struct llist_node *head, *tail;
+	struct rcu_head rcu;
+};
+
+static void bpf_ma_reuse_tt_cb(struct rcu_head *rcu)
+{
+	struct bpf_reuse_batch *batch = container_of(rcu, struct bpf_reuse_batch, rcu);
+	struct bpf_mem_cache *c = batch->c;
+	int cnt;
+
+	cnt = free_all(batch->head, false);
+//	printk("after rcu_tasks_trace GP cb %d\n", cnt);
+	kfree(batch);
+	c->reuse_tt_cb_in_progress--;
+}
+
+
 static void bpf_ma_add_to_reuse_ready_or_free(struct bpf_mem_cache *c)
 {
 	struct bpf_mem_shared_cache *sc = c->sc;
 	struct llist_node *head, *tail;
+	struct bpf_reuse_batch *batch;
 	unsigned long flags;
 
 	/* Draining could be running concurrently with reuse_rcu() */
@@ -344,8 +365,23 @@ static void bpf_ma_add_to_reuse_ready_or_free(struct bpf_mem_cache *c)
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);*/
 
+/*	if (c->reuse_tt_cb_in_progress >= 10)
+		return;
+	batch = kmalloc(sizeof(*batch), GFP_ATOMIC);
+	if (batch) {
+		head = __llist_del_all(&c->reuse_ready_head);
+		tail = c->reuse_ready_tail;
+		c->reuse_ready_tail = NULL;
+		batch->c = c;
+		batch->head = head;
+		batch->tail = tail;
+		c->reuse_tt_cb_in_progress++;
+		call_rcu_tasks_trace(&batch->rcu, bpf_ma_reuse_tt_cb);
+		return;
+	}*/
+
 	if (!atomic_xchg(&sc->call_rcu_in_progress, 1)) {
-		printk("cpu %d reuse_cb_in_progress %d\n", raw_smp_processor_id(), c->reuse_cb_in_progress);
+//		printk("cpu %d reuse_cb_in_progress %d\n", raw_smp_processor_id(), c->reuse_cb_in_progress);
 		head = __llist_del_all(&c->reuse_ready_head);
 		tail = c->reuse_ready_tail;
 		c->reuse_ready_tail = NULL;
@@ -357,12 +393,6 @@ static void bpf_ma_add_to_reuse_ready_or_free(struct bpf_mem_cache *c)
 	}
 }
 
-struct bpf_reuse_batch {
-	struct bpf_mem_cache *c;
-	struct llist_node *head, *tail;
-	struct rcu_head rcu;
-};
-
 
 static void reuse_rcu(struct rcu_head *rcu)
 {
@@ -385,7 +415,6 @@ static void bpf_ma_reuse_cb(struct rcu_head *rcu)
 	c->reuse_cb_in_progress--;
 }
 
-
 static void reuse_bulk(struct bpf_mem_cache *c)
 {
 	struct llist_node *head, *tail, *llnode, *tmp;
@@ -418,10 +447,16 @@ static void reuse_bulk(struct bpf_mem_cache *c)
 		c->free_by_rcu_tail = tail;
 	__llist_add_batch(head, tail, &c->free_by_rcu);
 
-//	if (c->reuse_cb_in_progress >= 1000)
-//		return;
+	if (c->reuse_cb_in_progress >= 100 || 1) {
+//		local_irq_save(flags);
+		rcu_momentary_dyntick_idle2();
+//		local_irq_restore(flags);
+	}
 
-	batch = kmalloc(sizeof(*batch), GFP_ATOMIC);
+/*	if (c->reuse_cb_in_progress >= 100)
+		return;*/
+
+	batch = NULL;//kmalloc(sizeof(*batch), GFP_ATOMIC);
 	if (batch) {
 		head = __llist_del_all(&c->free_by_rcu);
 		tail = c->free_by_rcu_tail;
@@ -487,7 +522,7 @@ static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu)
 	init_irq_work(&c->refill_work, bpf_mem_refill);
 	if (c->unit_size <= 256) {
 		c->low_watermark = 32;
-		c->high_watermark = 64;
+		c->high_watermark = 1024;
 	} else {
 		/* When page_size == 4k, order-0 cache will have low_mark == 2
 		 * and high_mark == 6 with batch alloc of 3 individual pages at
@@ -735,6 +770,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
 			drain_mem_cache(c);
 			rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
 			rcu_in_progress += c->reuse_cb_in_progress;
+			rcu_in_progress += c->reuse_tt_cb_in_progress;
 		}
 		sc = ma->s_cache;
 		drain_shared_mem_cache(sc);
@@ -754,6 +790,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma)
 				drain_mem_cache(c);
 				rcu_in_progress += atomic_read(&c->call_rcu_in_progress);
 				rcu_in_progress += c->reuse_cb_in_progress;
+				rcu_in_progress += c->reuse_tt_cb_in_progress;
 			}
 		}
 		for (i = 0; i < NUM_CACHES; i++) {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index f52ff72..7fcd659 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -334,6 +334,16 @@ notrace void rcu_momentary_dyntick_idle(void)
 }
 EXPORT_SYMBOL_GPL(rcu_momentary_dyntick_idle);
 
+notrace void rcu_momentary_dyntick_idle2(void)
+{
+	int seq;
+
+	raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
+	seq = ct_state_inc(2 * RCU_DYNTICKS_IDX);
+	/* It is illegal to call this from idle state. */
+	WARN_ON_ONCE(!(seq & RCU_DYNTICKS_IDX));
+}
+
 /**
  * rcu_is_cpu_rrupt_from_idle - see if 'interrupted' from idle
  *