| From e5442e57435a715f1ac56397b375bd38e67f7971 Mon Sep 17 00:00:00 2001 |
| From: Mathias Nyman <mathias.nyman@linux.intel.com> |
| Date: Fri, 7 Apr 2017 17:57:01 +0300 |
| Subject: [PATCH 187/286] xhci: Rework how we handle unresponsive or hoptlug |
| removed hosts |
| |
| Introduce a new xhci_hc_died() function that takes care of handling |
| pending commands and URBs if a host controller becomes unresponsive. |
| |
| This addresses issues on hotpluggable xhci controllers that disappear |
| from the bus suddenly, often while the bus (PCI) remove function is |
| still being processed. |
| |
| xhci_hc_died() sets a XHCI_STATUS_DYING flag to prevent new URBs and |
| commands or to be queued. The flag also ensures xhci_hc_died() will |
| give back pending commands and URBs once. |
| |
| Host is considered dead if register read returns 0xffffffff, or host |
| fails to abort the command ring, or fails stopping an endpoint after |
| trying for 5 seconds. |
| |
| Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| (cherry picked from commit d9f11ba9f107aa335091ab8d7ba5eea714e46e8b) |
| Signed-off-by: Simon Horman <horms+renesas@verge.net.au> |
| --- |
| drivers/usb/host/xhci-hub.c | 12 ++-- |
| drivers/usb/host/xhci-ring.c | 118 ++++++++++++++++++++----------------------- |
| drivers/usb/host/xhci.c | 16 +++++ |
| drivers/usb/host/xhci.h | 1 |
| 4 files changed, 80 insertions(+), 67 deletions(-) |
| |
| --- a/drivers/usb/host/xhci-hub.c |
| +++ b/drivers/usb/host/xhci-hub.c |
| @@ -1074,7 +1074,8 @@ int xhci_hub_control(struct usb_hcd *hcd |
| goto error; |
| wIndex--; |
| temp = readl(port_array[wIndex]); |
| - if (temp == 0xffffffff) { |
| + if (temp == ~(u32)0) { |
| + xhci_hc_died(xhci); |
| retval = -ENODEV; |
| break; |
| } |
| @@ -1116,7 +1117,8 @@ int xhci_hub_control(struct usb_hcd *hcd |
| goto error; |
| wIndex--; |
| temp = readl(port_array[wIndex]); |
| - if (temp == 0xffffffff) { |
| + if (temp == ~(u32)0) { |
| + xhci_hc_died(xhci); |
| retval = -ENODEV; |
| break; |
| } |
| @@ -1291,7 +1293,8 @@ int xhci_hub_control(struct usb_hcd *hcd |
| goto error; |
| wIndex--; |
| temp = readl(port_array[wIndex]); |
| - if (temp == 0xffffffff) { |
| + if (temp == ~(u32)0) { |
| + xhci_hc_died(xhci); |
| retval = -ENODEV; |
| break; |
| } |
| @@ -1402,7 +1405,8 @@ int xhci_hub_status_data(struct usb_hcd |
| /* For each port, did anything change? If so, set that bit in buf. */ |
| for (i = 0; i < max_ports; i++) { |
| temp = readl(port_array[i]); |
| - if (temp == 0xffffffff) { |
| + if (temp == ~(u32)0) { |
| + xhci_hc_died(xhci); |
| retval = -ENODEV; |
| break; |
| } |
| --- a/drivers/usb/host/xhci-ring.c |
| +++ b/drivers/usb/host/xhci-ring.c |
| @@ -359,21 +359,19 @@ static int xhci_abort_cmd_ring(struct xh |
| xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, |
| &xhci->op_regs->cmd_ring); |
| |
| - /* Section 4.6.1.2 of xHCI 1.0 spec says software should |
| - * time the completion od all xHCI commands, including |
| - * the Command Abort operation. If software doesn't see |
| - * CRR negated in a timely manner (e.g. longer than 5 |
| - * seconds), then it should assume that the there are |
| - * larger problems with the xHC and assert HCRST. |
| + /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the |
| + * completion of the Command Abort operation. If CRR is not negated in 5 |
| + * seconds then driver handles it as if host died (-ENODEV). |
| + * In the future we should distinguish between -ENODEV and -ETIMEDOUT |
| + * and try to recover a -ETIMEDOUT with a host controller reset. |
| */ |
| ret = xhci_handshake(&xhci->op_regs->cmd_ring, |
| CMD_RING_RUNNING, 0, 5 * 1000 * 1000); |
| if (ret < 0) { |
| - xhci_err(xhci, |
| - "Stop command ring failed, maybe the host is dead\n"); |
| - xhci->xhc_state |= XHCI_STATE_DYING; |
| + xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret); |
| xhci_halt(xhci); |
| - return -ESHUTDOWN; |
| + xhci_hc_died(xhci); |
| + return ret; |
| } |
| /* |
| * Writing the CMD_RING_ABORT bit should cause a cmd completion event, |
| @@ -876,6 +874,40 @@ static void xhci_kill_endpoint_urbs(stru |
| } |
| } |
| |
| +/* |
| + * host controller died, register read returns 0xffffffff |
| + * Complete pending commands, mark them ABORTED. |
| + * URBs need to be given back as usb core might be waiting with device locks |
| + * held for the URBs to finish during device disconnect, blocking host remove. |
| + * |
| + * Call with xhci->lock held. |
| + * lock is relased and re-acquired while giving back urb. |
| + */ |
| +void xhci_hc_died(struct xhci_hcd *xhci) |
| +{ |
| + int i, j; |
| + |
| + if (xhci->xhc_state & XHCI_STATE_DYING) |
| + return; |
| + |
| + xhci_err(xhci, "xHCI host controller not responding, assume dead\n"); |
| + xhci->xhc_state |= XHCI_STATE_DYING; |
| + |
| + xhci_cleanup_command_queue(xhci); |
| + |
| + /* return any pending urbs, remove may be waiting for them */ |
| + for (i = 0; i <= HCS_MAX_SLOTS(xhci->hcs_params1); i++) { |
| + if (!xhci->devs[i]) |
| + continue; |
| + for (j = 0; j < 31; j++) |
| + xhci_kill_endpoint_urbs(xhci, i, j); |
| + } |
| + |
| + /* inform usb core hc died if PCI remove isn't already handling it */ |
| + if (!(xhci->xhc_state & XHCI_STATE_REMOVING)) |
| + usb_hc_died(xhci_to_hcd(xhci)); |
| +} |
| + |
| /* Watchdog timer function for when a stop endpoint command fails to complete. |
| * In this case, we assume the host controller is broken or dying or dead. The |
| * host may still be completing some other events, so we have to be careful to |
| @@ -897,7 +929,6 @@ void xhci_stop_endpoint_command_watchdog |
| { |
| struct xhci_hcd *xhci; |
| struct xhci_virt_ep *ep; |
| - int ret, i, j; |
| unsigned long flags; |
| |
| ep = (struct xhci_virt_ep *) arg; |
| @@ -914,52 +945,22 @@ void xhci_stop_endpoint_command_watchdog |
| } |
| |
| xhci_warn(xhci, "xHCI host not responding to stop endpoint command.\n"); |
| - xhci_warn(xhci, "Assuming host is dying, halting host.\n"); |
| - /* Oops, HC is dead or dying or at least not responding to the stop |
| - * endpoint command. |
| - */ |
| - |
| - xhci->xhc_state |= XHCI_STATE_DYING; |
| ep->ep_state &= ~EP_STOP_CMD_PENDING; |
| |
| - /* Disable interrupts from the host controller and start halting it */ |
| - xhci_quiesce(xhci); |
| - spin_unlock_irqrestore(&xhci->lock, flags); |
| + xhci_halt(xhci); |
| |
| - ret = xhci_halt(xhci); |
| + /* |
| + * handle a stop endpoint cmd timeout as if host died (-ENODEV). |
| + * In the future we could distinguish between -ENODEV and -ETIMEDOUT |
| + * and try to recover a -ETIMEDOUT with a host controller reset |
| + */ |
| + xhci_hc_died(xhci); |
| |
| - spin_lock_irqsave(&xhci->lock, flags); |
| - if (ret < 0) { |
| - /* This is bad; the host is not responding to commands and it's |
| - * not allowing itself to be halted. At least interrupts are |
| - * disabled. If we call usb_hc_died(), it will attempt to |
| - * disconnect all device drivers under this host. Those |
| - * disconnect() methods will wait for all URBs to be unlinked, |
| - * so we must complete them. |
| - */ |
| - xhci_warn(xhci, "Non-responsive xHCI host is not halting.\n"); |
| - xhci_warn(xhci, "Completing active URBs anyway.\n"); |
| - /* We could turn all TDs on the rings to no-ops. This won't |
| - * help if the host has cached part of the ring, and is slow if |
| - * we want to preserve the cycle bit. Skip it and hope the host |
| - * doesn't touch the memory. |
| - */ |
| - } |
| - for (i = 0; i < MAX_HC_SLOTS; i++) { |
| - if (!xhci->devs[i]) |
| - continue; |
| - for (j = 0; j < 31; j++) |
| - xhci_kill_endpoint_urbs(xhci, i, j); |
| - } |
| spin_unlock_irqrestore(&xhci->lock, flags); |
| xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, |
| - "Calling usb_hc_died()"); |
| - usb_hc_died(xhci_to_hcd(xhci)); |
| - xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, |
| "xHCI host controller is dead."); |
| } |
| |
| - |
| static void update_ring_for_set_deq_completion(struct xhci_hcd *xhci, |
| struct xhci_virt_device *dev, |
| struct xhci_ring *ep_ring, |
| @@ -1294,7 +1295,6 @@ void xhci_cleanup_command_queue(struct x |
| void xhci_handle_command_timeout(struct work_struct *work) |
| { |
| struct xhci_hcd *xhci; |
| - int ret; |
| unsigned long flags; |
| u64 hw_ring_state; |
| |
| @@ -1315,22 +1315,17 @@ void xhci_handle_command_timeout(struct |
| |
| /* Make sure command ring is running before aborting it */ |
| hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); |
| + if (hw_ring_state == ~(u64)0) { |
| + xhci_hc_died(xhci); |
| + goto time_out_completed; |
| + } |
| + |
| if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) && |
| (hw_ring_state & CMD_RING_RUNNING)) { |
| /* Prevent new doorbell, and start command abort */ |
| xhci->cmd_ring_state = CMD_RING_STATE_ABORTED; |
| xhci_dbg(xhci, "Command timeout\n"); |
| - ret = xhci_abort_cmd_ring(xhci, flags); |
| - if (unlikely(ret == -ESHUTDOWN)) { |
| - xhci_err(xhci, "Abort command ring failed\n"); |
| - xhci_cleanup_command_queue(xhci); |
| - spin_unlock_irqrestore(&xhci->lock, flags); |
| - usb_hc_died(xhci_to_hcd(xhci)->primary_hcd); |
| - xhci_dbg(xhci, "xHCI host controller is dead.\n"); |
| - |
| - return; |
| - } |
| - |
| + xhci_abort_cmd_ring(xhci, flags); |
| goto time_out_completed; |
| } |
| |
| @@ -2698,7 +2693,8 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd |
| spin_lock(&xhci->lock); |
| /* Check if the xHC generated the interrupt, or the irq is shared */ |
| status = readl(&xhci->op_regs->status); |
| - if (status == 0xffffffff) { |
| + if (status == ~(u32)0) { |
| + xhci_hc_died(xhci); |
| ret = IRQ_HANDLED; |
| goto out; |
| } |
| --- a/drivers/usb/host/xhci.c |
| +++ b/drivers/usb/host/xhci.c |
| @@ -1546,10 +1546,16 @@ int xhci_urb_dequeue(struct usb_hcd *hcd |
| if (!ep || !ep_ring) |
| goto err_giveback; |
| |
| + /* If xHC is dead take it down and return ALL URBs in xhci_hc_died() */ |
| temp = readl(&xhci->op_regs->status); |
| - if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) { |
| + if (temp == ~(u32)0 || xhci->xhc_state & XHCI_STATE_DYING) { |
| + xhci_hc_died(xhci); |
| + goto done; |
| + } |
| + |
| + if (xhci->xhc_state & XHCI_STATE_HALTED) { |
| xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb, |
| - "HW died, freeing TD."); |
| + "HC halted, freeing TD manually."); |
| for (i = urb_priv->num_tds_done; |
| i < urb_priv->num_tds; |
| i++) { |
| @@ -2640,6 +2646,12 @@ static int xhci_configure_endpoint(struc |
| return -EINVAL; |
| |
| spin_lock_irqsave(&xhci->lock, flags); |
| + |
| + if (xhci->xhc_state & XHCI_STATE_DYING) { |
| + spin_unlock_irqrestore(&xhci->lock, flags); |
| + return -ESHUTDOWN; |
| + } |
| + |
| virt_dev = xhci->devs[udev->slot_id]; |
| |
| ctrl_ctx = xhci_get_input_control_ctx(command->in_ctx); |
| --- a/drivers/usb/host/xhci.h |
| +++ b/drivers/usb/host/xhci.h |
| @@ -2138,6 +2138,7 @@ int xhci_hub_control(struct usb_hcd *hcd |
| char *buf, u16 wLength); |
| int xhci_hub_status_data(struct usb_hcd *hcd, char *buf); |
| int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1); |
| +void xhci_hc_died(struct xhci_hcd *xhci); |
| |
| #ifdef CONFIG_PM |
| int xhci_bus_suspend(struct usb_hcd *hcd); |