| From c504f0ce87de12434f44f09a60f766ebed7ca4cb Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Wed, 12 May 2021 01:06:40 -0700 |
| Subject: PCI: hv: Fix a race condition when removing the device |
| |
| From: Long Li <longli@microsoft.com> |
| |
| [ Upstream commit 94d22763207ac6633612b8d8e0ca4fba0f7aa139 ] |
| |
| On removing the device, any work item (hv_pci_devices_present() or |
| hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running |
| and race with hv_pci_remove(). |
| |
| This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2) |
| and decide to rescind the channel immediately after that. |
| |
| Fix this by flushing/destroying the workqueue of hbus before doing hbus remove. |
| |
| Link: https://lore.kernel.org/r/1620806800-30983-1-git-send-email-longli@linuxonhyperv.com |
| Signed-off-by: Long Li <longli@microsoft.com> |
| Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> |
| Reviewed-by: Michael Kelley <mikelley@microsoft.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/pci/controller/pci-hyperv.c | 30 ++++++++++++++++++++++------- |
| 1 file changed, 23 insertions(+), 7 deletions(-) |
| |
| diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c |
| index 7479edf3676c..fd30f70c7560 100644 |
| --- a/drivers/pci/controller/pci-hyperv.c |
| +++ b/drivers/pci/controller/pci-hyperv.c |
| @@ -444,7 +444,6 @@ enum hv_pcibus_state { |
| hv_pcibus_probed, |
| hv_pcibus_installed, |
| hv_pcibus_removing, |
| - hv_pcibus_removed, |
| hv_pcibus_maximum |
| }; |
| |
| @@ -3247,8 +3246,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) |
| struct pci_packet teardown_packet; |
| u8 buffer[sizeof(struct pci_message)]; |
| } pkt; |
| - struct hv_dr_state *dr; |
| struct hv_pci_compl comp_pkt; |
| + struct hv_pci_dev *hpdev, *tmp; |
| + unsigned long flags; |
| int ret; |
| |
| /* |
| @@ -3260,9 +3260,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) |
| |
| if (!keep_devs) { |
| /* Delete any children which might still exist. */ |
| - dr = kzalloc(sizeof(*dr), GFP_KERNEL); |
| - if (dr && hv_pci_start_relations_work(hbus, dr)) |
| - kfree(dr); |
| + spin_lock_irqsave(&hbus->device_list_lock, flags); |
| + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) { |
| + list_del(&hpdev->list_entry); |
| + if (hpdev->pci_slot) |
| + pci_destroy_slot(hpdev->pci_slot); |
| + /* For the two refs got in new_pcichild_device() */ |
| + put_pcichild(hpdev); |
| + put_pcichild(hpdev); |
| + } |
| + spin_unlock_irqrestore(&hbus->device_list_lock, flags); |
| } |
| |
| ret = hv_send_resources_released(hdev); |
| @@ -3305,13 +3312,23 @@ static int hv_pci_remove(struct hv_device *hdev) |
| |
| hbus = hv_get_drvdata(hdev); |
| if (hbus->state == hv_pcibus_installed) { |
| + tasklet_disable(&hdev->channel->callback_event); |
| + hbus->state = hv_pcibus_removing; |
| + tasklet_enable(&hdev->channel->callback_event); |
| + destroy_workqueue(hbus->wq); |
| + hbus->wq = NULL; |
| + /* |
| + * At this point, no work is running or can be scheduled |
| + * on hbus-wq. We can't race with hv_pci_devices_present() |
| + * or hv_pci_eject_device(), it's safe to proceed. |
| + */ |
| + |
| /* Remove the bus from PCI's point of view. */ |
| pci_lock_rescan_remove(); |
| pci_stop_root_bus(hbus->pci_bus); |
| hv_pci_remove_slots(hbus); |
| pci_remove_root_bus(hbus->pci_bus); |
| pci_unlock_rescan_remove(); |
| - hbus->state = hv_pcibus_removed; |
| } |
| |
| ret = hv_pci_bus_exit(hdev, false); |
| @@ -3326,7 +3343,6 @@ static int hv_pci_remove(struct hv_device *hdev) |
| irq_domain_free_fwnode(hbus->sysdata.fwnode); |
| put_hvpcibus(hbus); |
| wait_for_completion(&hbus->remove_event); |
| - destroy_workqueue(hbus->wq); |
| |
| hv_put_dom_num(hbus->sysdata.domain); |
| |
| -- |
| 2.30.2 |
| |