| From 0e80b18d188e1ba0eb38ffdc8ffbbf2e4eea085b Mon Sep 17 00:00:00 2001 |
| From: Oded Gabbay <oded.gabbay@gmail.com> |
| Date: Tue, 3 Dec 2019 10:12:10 +0200 |
| Subject: [PATCH] habanalabs: rate limit error msg on waiting for CS |
| |
| commit 018e0e3594f7dcd029d258e368c485e742fa9cdb upstream. |
| |
| In case a user submits a CS, and the submission fails, and the user doesn't |
| check the return value and instead use the error return value as a valid |
| sequence number of a CS and ask to wait on it, the driver will print an |
| error and return an error code for that wait. |
| |
| The real problem happens if now the user ignores the error of the wait, and |
| try to wait again and again. This can lead to a flood of error messages |
| from the driver and even soft lockup event. |
| |
| Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> |
| Reviewed-by: Tomer Tayar <ttayar@habana.ai> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c |
| index 6fe785e26859..52fd0229c4f7 100644 |
| --- a/drivers/misc/habanalabs/command_submission.c |
| +++ b/drivers/misc/habanalabs/command_submission.c |
| @@ -764,8 +764,9 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) |
| memset(args, 0, sizeof(*args)); |
| |
| if (rc < 0) { |
| - dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n", |
| - rc, seq); |
| + dev_err_ratelimited(hdev->dev, |
| + "Error %ld on waiting for CS handle %llu\n", |
| + rc, seq); |
| if (rc == -ERESTARTSYS) { |
| args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED; |
| rc = -EINTR; |
| diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c |
| index f4c92f110a72..4d5a0454208e 100644 |
| --- a/drivers/misc/habanalabs/context.c |
| +++ b/drivers/misc/habanalabs/context.c |
| @@ -159,7 +159,7 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq) |
| spin_lock(&ctx->cs_lock); |
| |
| if (seq >= ctx->cs_sequence) { |
| - dev_notice(hdev->dev, |
| + dev_notice_ratelimited(hdev->dev, |
| "Can't wait on seq %llu because current CS is at seq %llu\n", |
| seq, ctx->cs_sequence); |
| spin_unlock(&ctx->cs_lock); |
| -- |
| 2.7.4 |
| |