| From foo@baz Fri Mar 16 15:43:17 CET 2018 |
| From: John Fastabend <john.fastabend@gmail.com> |
| Date: Thu, 7 Dec 2017 09:56:04 -0800 |
| Subject: net: sched: drop qdisc_reset from dev_graft_qdisc |
| |
| From: John Fastabend <john.fastabend@gmail.com> |
| |
| |
| [ Upstream commit 7bbde83b1860c28a1cc35516352c4e7e5172c29a ] |
| |
| In qdisc_graft_qdisc a "new" qdisc is attached and the 'qdisc_destroy' |
| operation is called on the old qdisc. The destroy operation will wait |
| a rcu grace period and call qdisc_rcu_free(). At which point |
| gso_cpu_skb is free'd along with all stats so no need to zero stats |
| and gso_cpu_skb from the graft operation itself. |
| |
| Further after dropping the qdisc locks we can not continue to call |
| qdisc_reset before waiting an rcu grace period so that the qdisc is |
| detached from all cpus. By removing the qdisc_reset() here we get |
| the correct property of waiting an rcu grace period and letting the |
| qdisc_destroy operation clean up the qdisc correctly. |
| |
| Note, a refcnt greater than 1 would cause the destroy operation to |
| be aborted however if this ever happened the reference to the qdisc |
| would be lost and we would have a memory leak. |
| |
| Signed-off-by: John Fastabend <john.fastabend@gmail.com> |
| Signed-off-by: David S. Miller <davem@davemloft.net> |
| Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| net/sched/sch_generic.c | 28 +++++++++++++++++++--------- |
| 1 file changed, 19 insertions(+), 9 deletions(-) |
| |
| --- a/net/sched/sch_generic.c |
| +++ b/net/sched/sch_generic.c |
| @@ -743,10 +743,6 @@ struct Qdisc *dev_graft_qdisc(struct net |
| root_lock = qdisc_lock(oqdisc); |
| spin_lock_bh(root_lock); |
| |
| - /* Prune old scheduler */ |
| - if (oqdisc && refcount_read(&oqdisc->refcnt) <= 1) |
| - qdisc_reset(oqdisc); |
| - |
| /* ... and graft new one */ |
| if (qdisc == NULL) |
| qdisc = &noop_qdisc; |
| @@ -897,6 +893,16 @@ static bool some_qdisc_is_busy(struct ne |
| return false; |
| } |
| |
| +static void dev_qdisc_reset(struct net_device *dev, |
| + struct netdev_queue *dev_queue, |
| + void *none) |
| +{ |
| + struct Qdisc *qdisc = dev_queue->qdisc_sleeping; |
| + |
| + if (qdisc) |
| + qdisc_reset(qdisc); |
| +} |
| + |
| /** |
| * dev_deactivate_many - deactivate transmissions on several devices |
| * @head: list of devices to deactivate |
| @@ -907,7 +913,6 @@ static bool some_qdisc_is_busy(struct ne |
| void dev_deactivate_many(struct list_head *head) |
| { |
| struct net_device *dev; |
| - bool sync_needed = false; |
| |
| list_for_each_entry(dev, head, close_list) { |
| netdev_for_each_tx_queue(dev, dev_deactivate_queue, |
| @@ -917,20 +922,25 @@ void dev_deactivate_many(struct list_hea |
| &noop_qdisc); |
| |
| dev_watchdog_down(dev); |
| - sync_needed |= !dev->dismantle; |
| } |
| |
| /* Wait for outstanding qdisc-less dev_queue_xmit calls. |
| * This is avoided if all devices are in dismantle phase : |
| * Caller will call synchronize_net() for us |
| */ |
| - if (sync_needed) |
| - synchronize_net(); |
| + synchronize_net(); |
| |
| /* Wait for outstanding qdisc_run calls. */ |
| - list_for_each_entry(dev, head, close_list) |
| + list_for_each_entry(dev, head, close_list) { |
| while (some_qdisc_is_busy(dev)) |
| yield(); |
| + /* The new qdisc is assigned at this point so we can safely |
| + * unwind stale skb lists and qdisc statistics |
| + */ |
| + netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL); |
| + if (dev_ingress_queue(dev)) |
| + dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL); |
| + } |
| } |
| |
| void dev_deactivate(struct net_device *dev) |