| From f26d04331360d42dbd6b58448bd98e4edbfbe1c5 Mon Sep 17 00:00:00 2001 |
| From: Paul Moore <paul@paul-moore.com> |
| Date: Thu, 13 Jan 2022 18:54:38 -0500 |
| Subject: audit: improve audit queue handling when "audit=1" on cmdline |
| |
| From: Paul Moore <paul@paul-moore.com> |
| |
| commit f26d04331360d42dbd6b58448bd98e4edbfbe1c5 upstream. |
| |
| When an admin enables audit at early boot via the "audit=1" kernel |
| command line the audit queue behavior is slightly different; the |
| audit subsystem goes to greater lengths to avoid dropping records, |
| which unfortunately can result in problems when the audit daemon is |
| forcibly stopped for an extended period of time. |
| |
| This patch makes a number of changes designed to improve the audit |
| queuing behavior so that leaving the audit daemon in a stopped state |
| for an extended period does not cause a significant impact to the |
| system. |
| |
| - kauditd_send_queue() is now limited to looping through the |
| passed queue only once per call. This not only prevents the |
| function from looping indefinitely when records are returned |
| to the current queue, it also allows any recovery handling in |
| kauditd_thread() to take place when kauditd_send_queue() |
| returns. |
| |
| - Transient netlink send errors seen as -EAGAIN now cause the |
| record to be returned to the retry queue instead of going to |
| the hold queue. The intention of the hold queue is to store, |
| perhaps for an extended period of time, the events which led |
| up to the audit daemon going offline. The retry queue remains |
| a temporary queue intended to protect against transient issues |
| between the kernel and the audit daemon. |
| |
| - The retry queue is now limited by the audit_backlog_limit |
| setting, the same as the other queues. This allows admins |
| to bound the size of all of the audit queues on the system. |
| |
| - kauditd_rehold_skb() now returns records to the end of the |
| hold queue to ensure ordering is preserved in the face of |
| recent changes to kauditd_send_queue(). |
| |
| Cc: stable@vger.kernel.org |
| Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking") |
| Fixes: f4b3ee3c85551 ("audit: improve robustness of the audit queue handling") |
| Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com> |
| Tested-by: Gaosheng Cui <cuigaosheng1@huawei.com> |
| Reviewed-by: Richard Guy Briggs <rgb@redhat.com> |
| Signed-off-by: Paul Moore <paul@paul-moore.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| --- |
| kernel/audit.c | 62 +++++++++++++++++++++++++++++++++++++++------------------ |
| 1 file changed, 43 insertions(+), 19 deletions(-) |
| |
| --- a/kernel/audit.c |
| +++ b/kernel/audit.c |
| @@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk |
| /** |
| * kauditd_rehold_skb - Handle a audit record send failure in the hold queue |
| * @skb: audit record |
| + * @error: error code (unused) |
| * |
| * Description: |
| * This should only be used by the kauditd_thread when it fails to flush the |
| * hold queue. |
| */ |
| -static void kauditd_rehold_skb(struct sk_buff *skb) |
| +static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) |
| { |
| - /* put the record back in the queue at the same place */ |
| - skb_queue_head(&audit_hold_queue, skb); |
| + /* put the record back in the queue */ |
| + skb_queue_tail(&audit_hold_queue, skb); |
| } |
| |
| /** |
| * kauditd_hold_skb - Queue an audit record, waiting for auditd |
| * @skb: audit record |
| + * @error: error code |
| * |
| * Description: |
| * Queue the audit record, waiting for an instance of auditd. When this |
| @@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk |
| * and queue it, if we have room. If we want to hold on to the record, but we |
| * don't have room, record a record lost message. |
| */ |
| -static void kauditd_hold_skb(struct sk_buff *skb) |
| +static void kauditd_hold_skb(struct sk_buff *skb, int error) |
| { |
| /* at this point it is uncertain if we will ever send this to auditd so |
| * try to send the message via printk before we go any further */ |
| kauditd_printk_skb(skb); |
| |
| /* can we just silently drop the message? */ |
| - if (!audit_default) { |
| - kfree_skb(skb); |
| - return; |
| + if (!audit_default) |
| + goto drop; |
| + |
| + /* the hold queue is only for when the daemon goes away completely, |
| + * not -EAGAIN failures; if we are in a -EAGAIN state requeue the |
| + * record on the retry queue unless it's full, in which case drop it |
| + */ |
| + if (error == -EAGAIN) { |
| + if (!audit_backlog_limit || |
| + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { |
| + skb_queue_tail(&audit_retry_queue, skb); |
| + return; |
| + } |
| + audit_log_lost("kauditd retry queue overflow"); |
| + goto drop; |
| } |
| |
| - /* if we have room, queue the message */ |
| + /* if we have room in the hold queue, queue the message */ |
| if (!audit_backlog_limit || |
| skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { |
| skb_queue_tail(&audit_hold_queue, skb); |
| @@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_b |
| |
| /* we have no other options - drop the message */ |
| audit_log_lost("kauditd hold queue overflow"); |
| +drop: |
| kfree_skb(skb); |
| } |
| |
| /** |
| * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd |
| * @skb: audit record |
| + * @error: error code (unused) |
| * |
| * Description: |
| * Not as serious as kauditd_hold_skb() as we still have a connected auditd, |
| * but for some reason we are having problems sending it audit records so |
| * queue the given record and attempt to resend. |
| */ |
| -static void kauditd_retry_skb(struct sk_buff *skb) |
| +static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) |
| { |
| - /* NOTE: because records should only live in the retry queue for a |
| - * short period of time, before either being sent or moved to the hold |
| - * queue, we don't currently enforce a limit on this queue */ |
| - skb_queue_tail(&audit_retry_queue, skb); |
| + if (!audit_backlog_limit || |
| + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { |
| + skb_queue_tail(&audit_retry_queue, skb); |
| + return; |
| + } |
| + |
| + /* we have to drop the record, send it via printk as a last effort */ |
| + kauditd_printk_skb(skb); |
| + audit_log_lost("kauditd retry queue overflow"); |
| + kfree_skb(skb); |
| } |
| |
| /** |
| @@ -640,7 +662,7 @@ static void auditd_reset(const struct au |
| /* flush the retry queue to the hold queue, but don't touch the main |
| * queue since we need to process that normally for multicast */ |
| while ((skb = skb_dequeue(&audit_retry_queue))) |
| - kauditd_hold_skb(skb); |
| + kauditd_hold_skb(skb, -ECONNREFUSED); |
| } |
| |
| /** |
| @@ -714,16 +736,18 @@ static int kauditd_send_queue(struct soc |
| struct sk_buff_head *queue, |
| unsigned int retry_limit, |
| void (*skb_hook)(struct sk_buff *skb), |
| - void (*err_hook)(struct sk_buff *skb)) |
| + void (*err_hook)(struct sk_buff *skb, int error)) |
| { |
| int rc = 0; |
| - struct sk_buff *skb; |
| + struct sk_buff *skb = NULL; |
| + struct sk_buff *skb_tail; |
| unsigned int failed = 0; |
| |
| /* NOTE: kauditd_thread takes care of all our locking, we just use |
| * the netlink info passed to us (e.g. sk and portid) */ |
| |
| - while ((skb = skb_dequeue(queue))) { |
| + skb_tail = skb_peek_tail(queue); |
| + while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { |
| /* call the skb_hook for each skb we touch */ |
| if (skb_hook) |
| (*skb_hook)(skb); |
| @@ -731,7 +755,7 @@ static int kauditd_send_queue(struct soc |
| /* can we send to anyone via unicast? */ |
| if (!sk) { |
| if (err_hook) |
| - (*err_hook)(skb); |
| + (*err_hook)(skb, -ECONNREFUSED); |
| continue; |
| } |
| |
| @@ -745,7 +769,7 @@ retry: |
| rc == -ECONNREFUSED || rc == -EPERM) { |
| sk = NULL; |
| if (err_hook) |
| - (*err_hook)(skb); |
| + (*err_hook)(skb, rc); |
| if (rc == -EAGAIN) |
| rc = 0; |
| /* continue to drain the queue */ |