| From 33caa208dba6fa639e8a92fd0c8320b652e5550c Mon Sep 17 00:00:00 2001 |
| From: Haiyang Zhang <haiyangz@microsoft.com> |
| Date: Wed, 6 Aug 2025 13:21:51 -0700 |
| Subject: hv_netvsc: Fix panic during namespace deletion with VF |
| |
| From: Haiyang Zhang <haiyangz@microsoft.com> |
| |
| commit 33caa208dba6fa639e8a92fd0c8320b652e5550c upstream. |
| |
| The existing code move the VF NIC to new namespace when NETDEV_REGISTER is |
| received on netvsc NIC. During deletion of the namespace, |
| default_device_exit_batch() >> default_device_exit_net() is called. When |
| netvsc NIC is moved back and registered to the default namespace, it |
| automatically brings VF NIC back to the default namespace. This will cause |
| the default_device_exit_net() >> for_each_netdev_safe loop unable to detect |
| the list end, and hit NULL ptr: |
| |
| [ 231.449420] mana 7870:00:00.0 enP30832s1: Moved VF to namespace with: eth0 |
| [ 231.449656] BUG: kernel NULL pointer dereference, address: 0000000000000010 |
| [ 231.450246] #PF: supervisor read access in kernel mode |
| [ 231.450579] #PF: error_code(0x0000) - not-present page |
| [ 231.450916] PGD 17b8a8067 P4D 0 |
| [ 231.451163] Oops: Oops: 0000 [#1] SMP NOPTI |
| [ 231.451450] CPU: 82 UID: 0 PID: 1394 Comm: kworker/u768:1 Not tainted 6.16.0-rc4+ #3 VOLUNTARY |
| [ 231.452042] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.1 11/21/2024 |
| [ 231.452692] Workqueue: netns cleanup_net |
| [ 231.452947] RIP: 0010:default_device_exit_batch+0x16c/0x3f0 |
| [ 231.453326] Code: c0 0c f5 b3 e8 d5 db fe ff 48 85 c0 74 15 48 c7 c2 f8 fd ca b2 be 10 00 00 00 48 8d 7d c0 e8 7b 77 25 00 49 8b 86 28 01 00 00 <48> 8b 50 10 4c 8b 2a 4c 8d 62 f0 49 83 ed 10 4c 39 e0 0f 84 d6 00 |
| [ 231.454294] RSP: 0018:ff75fc7c9bf9fd00 EFLAGS: 00010246 |
| [ 231.454610] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 61c8864680b583eb |
| [ 231.455094] RDX: ff1fa9f71462d800 RSI: ff75fc7c9bf9fd38 RDI: 0000000030766564 |
| [ 231.455686] RBP: ff75fc7c9bf9fd78 R08: 0000000000000000 R09: 0000000000000000 |
| [ 231.456126] R10: 0000000000000001 R11: 0000000000000004 R12: ff1fa9f70088e340 |
| [ 231.456621] R13: ff1fa9f70088e340 R14: ffffffffb3f50c20 R15: ff1fa9f7103e6340 |
| [ 231.457161] FS: 0000000000000000(0000) GS:ff1faa6783a08000(0000) knlGS:0000000000000000 |
| [ 231.457707] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 |
| [ 231.458031] CR2: 0000000000000010 CR3: 0000000179ab2006 CR4: 0000000000b73ef0 |
| [ 231.458434] Call Trace: |
| [ 231.458600] <TASK> |
| [ 231.458777] ops_undo_list+0x100/0x220 |
| [ 231.459015] cleanup_net+0x1b8/0x300 |
| [ 231.459285] process_one_work+0x184/0x340 |
| |
| To fix it, move the ns change to a workqueue, and take rtnl_lock to avoid |
| changing the netdev list when default_device_exit_net() is using it. |
| |
| Cc: stable@vger.kernel.org |
| Fixes: 4c262801ea60 ("hv_netvsc: Fix VF namespace also in synthetic NIC NETDEV_REGISTER event") |
| Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> |
| Link: https://patch.msgid.link/1754511711-11188-1-git-send-email-haiyangz@linux.microsoft.com |
| Signed-off-by: Jakub Kicinski <kuba@kernel.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| drivers/net/hyperv/hyperv_net.h | 3 +++ |
| drivers/net/hyperv/netvsc_drv.c | 29 ++++++++++++++++++++++++++++- |
| 2 files changed, 31 insertions(+), 1 deletion(-) |
| |
| --- a/drivers/net/hyperv/hyperv_net.h |
| +++ b/drivers/net/hyperv/hyperv_net.h |
| @@ -1057,6 +1057,7 @@ struct net_device_context { |
| struct net_device __rcu *vf_netdev; |
| struct netvsc_vf_pcpu_stats __percpu *vf_stats; |
| struct delayed_work vf_takeover; |
| + struct delayed_work vfns_work; |
| |
| /* 1: allocated, serial number is valid. 0: not allocated */ |
| u32 vf_alloc; |
| @@ -1071,6 +1072,8 @@ struct net_device_context { |
| struct netvsc_device_info *saved_netvsc_dev_info; |
| }; |
| |
| +void netvsc_vfns_work(struct work_struct *w); |
| + |
| /* Azure hosts don't support non-TCP port numbers in hashing for fragmented |
| * packets. We can use ethtool to change UDP hash level when necessary. |
| */ |
| --- a/drivers/net/hyperv/netvsc_drv.c |
| +++ b/drivers/net/hyperv/netvsc_drv.c |
| @@ -2508,6 +2508,7 @@ static int netvsc_probe(struct hv_device |
| spin_lock_init(&net_device_ctx->lock); |
| INIT_LIST_HEAD(&net_device_ctx->reconfig_events); |
| INIT_DELAYED_WORK(&net_device_ctx->vf_takeover, netvsc_vf_setup); |
| + INIT_DELAYED_WORK(&net_device_ctx->vfns_work, netvsc_vfns_work); |
| |
| net_device_ctx->vf_stats |
| = netdev_alloc_pcpu_stats(struct netvsc_vf_pcpu_stats); |
| @@ -2647,6 +2648,8 @@ static int netvsc_remove(struct hv_devic |
| cancel_delayed_work_sync(&ndev_ctx->dwork); |
| |
| rtnl_lock(); |
| + cancel_delayed_work_sync(&ndev_ctx->vfns_work); |
| + |
| nvdev = rtnl_dereference(ndev_ctx->nvdev); |
| if (nvdev) { |
| cancel_work_sync(&nvdev->subchan_work); |
| @@ -2689,6 +2692,7 @@ static int netvsc_suspend(struct hv_devi |
| cancel_delayed_work_sync(&ndev_ctx->dwork); |
| |
| rtnl_lock(); |
| + cancel_delayed_work_sync(&ndev_ctx->vfns_work); |
| |
| nvdev = rtnl_dereference(ndev_ctx->nvdev); |
| if (nvdev == NULL) { |
| @@ -2782,6 +2786,27 @@ static void netvsc_event_set_vf_ns(struc |
| } |
| } |
| |
| +void netvsc_vfns_work(struct work_struct *w) |
| +{ |
| + struct net_device_context *ndev_ctx = |
| + container_of(w, struct net_device_context, vfns_work.work); |
| + struct net_device *ndev; |
| + |
| + if (!rtnl_trylock()) { |
| + schedule_delayed_work(&ndev_ctx->vfns_work, 1); |
| + return; |
| + } |
| + |
| + ndev = hv_get_drvdata(ndev_ctx->device_ctx); |
| + if (!ndev) |
| + goto out; |
| + |
| + netvsc_event_set_vf_ns(ndev); |
| + |
| +out: |
| + rtnl_unlock(); |
| +} |
| + |
| /* |
| * On Hyper-V, every VF interface is matched with a corresponding |
| * synthetic interface. The synthetic interface is presented first |
| @@ -2792,10 +2817,12 @@ static int netvsc_netdev_event(struct no |
| unsigned long event, void *ptr) |
| { |
| struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); |
| + struct net_device_context *ndev_ctx; |
| int ret = 0; |
| |
| if (event_dev->netdev_ops == &device_ops && event == NETDEV_REGISTER) { |
| - netvsc_event_set_vf_ns(event_dev); |
| + ndev_ctx = netdev_priv(event_dev); |
| + schedule_delayed_work(&ndev_ctx->vfns_work, 0); |
| return NOTIFY_DONE; |
| } |
| |