blob: 945ae45aad5660043f29eded686c95e8a7825c68 [file] [log] [blame]
From e5442e57435a715f1ac56397b375bd38e67f7971 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Fri, 7 Apr 2017 17:57:01 +0300
Subject: [PATCH 187/286] xhci: Rework how we handle unresponsive or hoptlug
removed hosts
Introduce a new xhci_hc_died() function that takes care of handling
pending commands and URBs if a host controller becomes unresponsive.
This addresses issues on hotpluggable xhci controllers that disappear
from the bus suddenly, often while the bus (PCI) remove function is
still being processed.
xhci_hc_died() sets a XHCI_STATUS_DYING flag to prevent new URBs and
commands or to be queued. The flag also ensures xhci_hc_died() will
give back pending commands and URBs once.
Host is considered dead if register read returns 0xffffffff, or host
fails to abort the command ring, or fails stopping an endpoint after
trying for 5 seconds.
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit d9f11ba9f107aa335091ab8d7ba5eea714e46e8b)
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
---
drivers/usb/host/xhci-hub.c | 12 ++--
drivers/usb/host/xhci-ring.c | 118 ++++++++++++++++++++-----------------------
drivers/usb/host/xhci.c | 16 +++++
drivers/usb/host/xhci.h | 1
4 files changed, 80 insertions(+), 67 deletions(-)
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1074,7 +1074,8 @@ int xhci_hub_control(struct usb_hcd *hcd
goto error;
wIndex--;
temp = readl(port_array[wIndex]);
- if (temp == 0xffffffff) {
+ if (temp == ~(u32)0) {
+ xhci_hc_died(xhci);
retval = -ENODEV;
break;
}
@@ -1116,7 +1117,8 @@ int xhci_hub_control(struct usb_hcd *hcd
goto error;
wIndex--;
temp = readl(port_array[wIndex]);
- if (temp == 0xffffffff) {
+ if (temp == ~(u32)0) {
+ xhci_hc_died(xhci);
retval = -ENODEV;
break;
}
@@ -1291,7 +1293,8 @@ int xhci_hub_control(struct usb_hcd *hcd
goto error;
wIndex--;
temp = readl(port_array[wIndex]);
- if (temp == 0xffffffff) {
+ if (temp == ~(u32)0) {
+ xhci_hc_died(xhci);
retval = -ENODEV;
break;
}
@@ -1402,7 +1405,8 @@ int xhci_hub_status_data(struct usb_hcd
/* For each port, did anything change? If so, set that bit in buf. */
for (i = 0; i < max_ports; i++) {
temp = readl(port_array[i]);
- if (temp == 0xffffffff) {
+ if (temp == ~(u32)0) {
+ xhci_hc_died(xhci);
retval = -ENODEV;
break;
}
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -359,21 +359,19 @@ static int xhci_abort_cmd_ring(struct xh
xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
&xhci->op_regs->cmd_ring);
- /* Section 4.6.1.2 of xHCI 1.0 spec says software should
- * time the completion od all xHCI commands, including
- * the Command Abort operation. If software doesn't see
- * CRR negated in a timely manner (e.g. longer than 5
- * seconds), then it should assume that the there are
- * larger problems with the xHC and assert HCRST.
+ /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the
+ * completion of the Command Abort operation. If CRR is not negated in 5
+ * seconds then driver handles it as if host died (-ENODEV).
+ * In the future we should distinguish between -ENODEV and -ETIMEDOUT
+ * and try to recover a -ETIMEDOUT with a host controller reset.
*/
ret = xhci_handshake(&xhci->op_regs->cmd_ring,
CMD_RING_RUNNING, 0, 5 * 1000 * 1000);
if (ret < 0) {
- xhci_err(xhci,
- "Stop command ring failed, maybe the host is dead\n");
- xhci->xhc_state |= XHCI_STATE_DYING;
+ xhci_err(xhci, "Abort failed to stop command ring: %d\n", ret);
xhci_halt(xhci);
- return -ESHUTDOWN;
+ xhci_hc_died(xhci);
+ return ret;
}
/*
* Writing the CMD_RING_ABORT bit should cause a cmd completion event,
@@ -876,6 +874,40 @@ static void xhci_kill_endpoint_urbs(stru
}
}
+/*
+ * host controller died, register read returns 0xffffffff
+ * Complete pending commands, mark them ABORTED.
+ * URBs need to be given back as usb core might be waiting with device locks
+ * held for the URBs to finish during device disconnect, blocking host remove.
+ *
+ * Call with xhci->lock held.
+ * lock is relased and re-acquired while giving back urb.
+ */
+void xhci_hc_died(struct xhci_hcd *xhci)
+{
+ int i, j;
+
+ if (xhci->xhc_state & XHCI_STATE_DYING)
+ return;
+
+ xhci_err(xhci, "xHCI host controller not responding, assume dead\n");
+ xhci->xhc_state |= XHCI_STATE_DYING;
+
+ xhci_cleanup_command_queue(xhci);
+
+ /* return any pending urbs, remove may be waiting for them */
+ for (i = 0; i <= HCS_MAX_SLOTS(xhci->hcs_params1); i++) {
+ if (!xhci->devs[i])
+ continue;
+ for (j = 0; j < 31; j++)
+ xhci_kill_endpoint_urbs(xhci, i, j);
+ }
+
+ /* inform usb core hc died if PCI remove isn't already handling it */
+ if (!(xhci->xhc_state & XHCI_STATE_REMOVING))
+ usb_hc_died(xhci_to_hcd(xhci));
+}
+
/* Watchdog timer function for when a stop endpoint command fails to complete.
* In this case, we assume the host controller is broken or dying or dead. The
* host may still be completing some other events, so we have to be careful to
@@ -897,7 +929,6 @@ void xhci_stop_endpoint_command_watchdog
{
struct xhci_hcd *xhci;
struct xhci_virt_ep *ep;
- int ret, i, j;
unsigned long flags;
ep = (struct xhci_virt_ep *) arg;
@@ -914,52 +945,22 @@ void xhci_stop_endpoint_command_watchdog
}
xhci_warn(xhci, "xHCI host not responding to stop endpoint command.\n");
- xhci_warn(xhci, "Assuming host is dying, halting host.\n");
- /* Oops, HC is dead or dying or at least not responding to the stop
- * endpoint command.
- */
-
- xhci->xhc_state |= XHCI_STATE_DYING;
ep->ep_state &= ~EP_STOP_CMD_PENDING;
- /* Disable interrupts from the host controller and start halting it */
- xhci_quiesce(xhci);
- spin_unlock_irqrestore(&xhci->lock, flags);
+ xhci_halt(xhci);
- ret = xhci_halt(xhci);
+ /*
+ * handle a stop endpoint cmd timeout as if host died (-ENODEV).
+ * In the future we could distinguish between -ENODEV and -ETIMEDOUT
+ * and try to recover a -ETIMEDOUT with a host controller reset
+ */
+ xhci_hc_died(xhci);
- spin_lock_irqsave(&xhci->lock, flags);
- if (ret < 0) {
- /* This is bad; the host is not responding to commands and it's
- * not allowing itself to be halted. At least interrupts are
- * disabled. If we call usb_hc_died(), it will attempt to
- * disconnect all device drivers under this host. Those
- * disconnect() methods will wait for all URBs to be unlinked,
- * so we must complete them.
- */
- xhci_warn(xhci, "Non-responsive xHCI host is not halting.\n");
- xhci_warn(xhci, "Completing active URBs anyway.\n");
- /* We could turn all TDs on the rings to no-ops. This won't
- * help if the host has cached part of the ring, and is slow if
- * we want to preserve the cycle bit. Skip it and hope the host
- * doesn't touch the memory.
- */
- }
- for (i = 0; i < MAX_HC_SLOTS; i++) {
- if (!xhci->devs[i])
- continue;
- for (j = 0; j < 31; j++)
- xhci_kill_endpoint_urbs(xhci, i, j);
- }
spin_unlock_irqrestore(&xhci->lock, flags);
xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
- "Calling usb_hc_died()");
- usb_hc_died(xhci_to_hcd(xhci));
- xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
"xHCI host controller is dead.");
}
-
static void update_ring_for_set_deq_completion(struct xhci_hcd *xhci,
struct xhci_virt_device *dev,
struct xhci_ring *ep_ring,
@@ -1294,7 +1295,6 @@ void xhci_cleanup_command_queue(struct x
void xhci_handle_command_timeout(struct work_struct *work)
{
struct xhci_hcd *xhci;
- int ret;
unsigned long flags;
u64 hw_ring_state;
@@ -1315,22 +1315,17 @@ void xhci_handle_command_timeout(struct
/* Make sure command ring is running before aborting it */
hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
+ if (hw_ring_state == ~(u64)0) {
+ xhci_hc_died(xhci);
+ goto time_out_completed;
+ }
+
if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
(hw_ring_state & CMD_RING_RUNNING)) {
/* Prevent new doorbell, and start command abort */
xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
xhci_dbg(xhci, "Command timeout\n");
- ret = xhci_abort_cmd_ring(xhci, flags);
- if (unlikely(ret == -ESHUTDOWN)) {
- xhci_err(xhci, "Abort command ring failed\n");
- xhci_cleanup_command_queue(xhci);
- spin_unlock_irqrestore(&xhci->lock, flags);
- usb_hc_died(xhci_to_hcd(xhci)->primary_hcd);
- xhci_dbg(xhci, "xHCI host controller is dead.\n");
-
- return;
- }
-
+ xhci_abort_cmd_ring(xhci, flags);
goto time_out_completed;
}
@@ -2698,7 +2693,8 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd
spin_lock(&xhci->lock);
/* Check if the xHC generated the interrupt, or the irq is shared */
status = readl(&xhci->op_regs->status);
- if (status == 0xffffffff) {
+ if (status == ~(u32)0) {
+ xhci_hc_died(xhci);
ret = IRQ_HANDLED;
goto out;
}
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1546,10 +1546,16 @@ int xhci_urb_dequeue(struct usb_hcd *hcd
if (!ep || !ep_ring)
goto err_giveback;
+ /* If xHC is dead take it down and return ALL URBs in xhci_hc_died() */
temp = readl(&xhci->op_regs->status);
- if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) {
+ if (temp == ~(u32)0 || xhci->xhc_state & XHCI_STATE_DYING) {
+ xhci_hc_died(xhci);
+ goto done;
+ }
+
+ if (xhci->xhc_state & XHCI_STATE_HALTED) {
xhci_dbg_trace(xhci, trace_xhci_dbg_cancel_urb,
- "HW died, freeing TD.");
+ "HC halted, freeing TD manually.");
for (i = urb_priv->num_tds_done;
i < urb_priv->num_tds;
i++) {
@@ -2640,6 +2646,12 @@ static int xhci_configure_endpoint(struc
return -EINVAL;
spin_lock_irqsave(&xhci->lock, flags);
+
+ if (xhci->xhc_state & XHCI_STATE_DYING) {
+ spin_unlock_irqrestore(&xhci->lock, flags);
+ return -ESHUTDOWN;
+ }
+
virt_dev = xhci->devs[udev->slot_id];
ctrl_ctx = xhci_get_input_control_ctx(command->in_ctx);
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -2138,6 +2138,7 @@ int xhci_hub_control(struct usb_hcd *hcd
char *buf, u16 wLength);
int xhci_hub_status_data(struct usb_hcd *hcd, char *buf);
int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1);
+void xhci_hc_died(struct xhci_hcd *xhci);
#ifdef CONFIG_PM
int xhci_bus_suspend(struct usb_hcd *hcd);