releases/4.14.28/net-sched-drop-qdisc_reset-from-dev_graft_qdisc.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From foo@baz Fri Mar 16 15:43:17 CET 2018
 From: John Fastabend <john.fastabend@gmail.com>
 Date: Thu, 7 Dec 2017 09:56:04 -0800
 Subject: net: sched: drop qdisc_reset from dev_graft_qdisc

 From: John Fastabend <john.fastabend@gmail.com>


 [ Upstream commit 7bbde83b1860c28a1cc35516352c4e7e5172c29a ]

 In qdisc_graft_qdisc a "new" qdisc is attached and the 'qdisc_destroy'
 operation is called on the old qdisc. The destroy operation will wait
 a rcu grace period and call qdisc_rcu_free(). At which point
 gso_cpu_skb is free'd along with all stats so no need to zero stats
 and gso_cpu_skb from the graft operation itself.

 Further after dropping the qdisc locks we can not continue to call
 qdisc_reset before waiting an rcu grace period so that the qdisc is
 detached from all cpus. By removing the qdisc_reset() here we get
 the correct property of waiting an rcu grace period and letting the
 qdisc_destroy operation clean up the qdisc correctly.

 Note, a refcnt greater than 1 would cause the destroy operation to
 be aborted however if this ever happened the reference to the qdisc
 would be lost and we would have a memory leak.

 Signed-off-by: John Fastabend <john.fastabend@gmail.com>
 Signed-off-by: David S. Miller <davem@davemloft.net>
 Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  net/sched/sch_generic.c |   28 +++++++++++++++++++---------
  1 file changed, 19 insertions(+), 9 deletions(-)

 --- a/net/sched/sch_generic.c
 +++ b/net/sched/sch_generic.c
 @@ -743,10 +743,6 @@ struct Qdisc *dev_graft_qdisc(struct net
  	root_lock = qdisc_lock(oqdisc);
  	spin_lock_bh(root_lock);

 -	/* Prune old scheduler */
 -	if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1)
 -		qdisc_reset(oqdisc);
 -
  	/* ... and graft new one */
  	if (qdisc == NULL)
  		qdisc = &noop_qdisc;
 @@ -897,6 +893,16 @@ static bool some_qdisc_is_busy(struct ne
  	return false;
  }

 +static void dev_qdisc_reset(struct net_device *dev,
 +			    struct netdev_queue *dev_queue,
 +			    void *none)
 +{
 +	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
 +
 +	if (qdisc)
 +		qdisc_reset(qdisc);
 +}
 +
  /**
   * 	dev_deactivate_many - deactivate transmissions on several devices
   * 	@head: list of devices to deactivate
 @@ -907,7 +913,6 @@ static bool some_qdisc_is_busy(struct ne
  void dev_deactivate_many(struct list_head *head)
  {
  	struct net_device *dev;
 -	bool sync_needed = false;

  	list_for_each_entry(dev, head, close_list) {
  		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
 @@ -917,20 +922,25 @@ void dev_deactivate_many(struct list_hea
  					     &noop_qdisc);

  		dev_watchdog_down(dev);
 -		sync_needed |= !dev->dismantle;
  	}

  	/* Wait for outstanding qdisc-less dev_queue_xmit calls.
  	 * This is avoided if all devices are in dismantle phase :
  	 * Caller will call synchronize_net() for us
  	 */
 -	if (sync_needed)
 -		synchronize_net();
 +	synchronize_net();

  	/* Wait for outstanding qdisc_run calls. */
 -	list_for_each_entry(dev, head, close_list)
 +	list_for_each_entry(dev, head, close_list) {
  		while (some_qdisc_is_busy(dev))
  			yield();
 +		/* The new qdisc is assigned at this point so we can safely
 +		 * unwind stale skb lists and qdisc statistics
 +		 */
 +		netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
 +		if (dev_ingress_queue(dev))
 +			dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
 +	}
  }

  void dev_deactivate(struct net_device *dev)
	From foo@baz Fri Mar 16 15:43:17 CET 2018
	From: John Fastabend <john.fastabend@gmail.com>
	Date: Thu, 7 Dec 2017 09:56:04 -0800
	Subject: net: sched: drop qdisc_reset from dev_graft_qdisc

	From: John Fastabend <john.fastabend@gmail.com>


	[ Upstream commit 7bbde83b1860c28a1cc35516352c4e7e5172c29a ]

	In qdisc_graft_qdisc a "new" qdisc is attached and the 'qdisc_destroy'
	operation is called on the old qdisc. The destroy operation will wait
	a rcu grace period and call qdisc_rcu_free(). At which point
	gso_cpu_skb is free'd along with all stats so no need to zero stats
	and gso_cpu_skb from the graft operation itself.

	Further after dropping the qdisc locks we can not continue to call
	qdisc_reset before waiting an rcu grace period so that the qdisc is
	detached from all cpus. By removing the qdisc_reset() here we get
	the correct property of waiting an rcu grace period and letting the
	qdisc_destroy operation clean up the qdisc correctly.

	Note, a refcnt greater than 1 would cause the destroy operation to
	be aborted however if this ever happened the reference to the qdisc
	would be lost and we would have a memory leak.

	Signed-off-by: John Fastabend <john.fastabend@gmail.com>
	Signed-off-by: David S. Miller <davem@davemloft.net>
	Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	net/sched/sch_generic.c \| 28 +++++++++++++++++++---------
	1 file changed, 19 insertions(+), 9 deletions(-)

	--- a/net/sched/sch_generic.c
	+++ b/net/sched/sch_generic.c
	@@ -743,10 +743,6 @@ struct Qdisc *dev_graft_qdisc(struct net
	root_lock = qdisc_lock(oqdisc);
	spin_lock_bh(root_lock);

	- /* Prune old scheduler */
	- if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1)
	- qdisc_reset(oqdisc);
	-
	/* ... and graft new one */
	if (qdisc == NULL)
	qdisc = &noop_qdisc;
	@@ -897,6 +893,16 @@ static bool some_qdisc_is_busy(struct ne
	return false;
	}

	+static void dev_qdisc_reset(struct net_device *dev,
	+ struct netdev_queue *dev_queue,
	+ void *none)
	+{
	+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
	+
	+ if (qdisc)
	+ qdisc_reset(qdisc);
	+}
	+
	/**
	* dev_deactivate_many - deactivate transmissions on several devices
	* @head: list of devices to deactivate
	@@ -907,7 +913,6 @@ static bool some_qdisc_is_busy(struct ne
	void dev_deactivate_many(struct list_head *head)
	{
	struct net_device *dev;
	- bool sync_needed = false;

	list_for_each_entry(dev, head, close_list) {
	netdev_for_each_tx_queue(dev, dev_deactivate_queue,
	@@ -917,20 +922,25 @@ void dev_deactivate_many(struct list_hea
	&noop_qdisc);

	dev_watchdog_down(dev);
	- sync_needed \|= !dev->dismantle;
	}

	/* Wait for outstanding qdisc-less dev_queue_xmit calls.
	* This is avoided if all devices are in dismantle phase :
	* Caller will call synchronize_net() for us
	*/
	- if (sync_needed)
	- synchronize_net();
	+ synchronize_net();

	/* Wait for outstanding qdisc_run calls. */
	- list_for_each_entry(dev, head, close_list)
	+ list_for_each_entry(dev, head, close_list) {
	while (some_qdisc_is_busy(dev))
	yield();
	+ /* The new qdisc is assigned at this point so we can safely
	+ * unwind stale skb lists and qdisc statistics
	+ */
	+ netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
	+ if (dev_ingress_queue(dev))
	+ dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
	+ }
	}

	void dev_deactivate(struct net_device *dev)