releases/5.10.99/audit-improve-audit-queue-handling-when-audit-1-on-cmdline.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From f26d04331360d42dbd6b58448bd98e4edbfbe1c5 Mon Sep 17 00:00:00 2001
 From: Paul Moore <paul@paul-moore.com>
 Date: Thu, 13 Jan 2022 18:54:38 -0500
 Subject: audit: improve audit queue handling when "audit=1" on cmdline

 From: Paul Moore <paul@paul-moore.com>

 commit f26d04331360d42dbd6b58448bd98e4edbfbe1c5 upstream.

 When an admin enables audit at early boot via the "audit=1" kernel
 command line the audit queue behavior is slightly different; the
 audit subsystem goes to greater lengths to avoid dropping records,
 which unfortunately can result in problems when the audit daemon is
 forcibly stopped for an extended period of time.

 This patch makes a number of changes designed to improve the audit
 queuing behavior so that leaving the audit daemon in a stopped state
 for an extended period does not cause a significant impact to the
 system.

 - kauditd_send_queue() is now limited to looping through the
   passed queue only once per call.  This not only prevents the
   function from looping indefinitely when records are returned
   to the current queue, it also allows any recovery handling in
   kauditd_thread() to take place when kauditd_send_queue()
   returns.

 - Transient netlink send errors seen as -EAGAIN now cause the
   record to be returned to the retry queue instead of going to
   the hold queue.  The intention of the hold queue is to store,
   perhaps for an extended period of time, the events which led
   up to the audit daemon going offline.  The retry queue remains
   a temporary queue intended to protect against transient issues
   between the kernel and the audit daemon.

 - The retry queue is now limited by the audit_backlog_limit
   setting, the same as the other queues.  This allows admins
   to bound the size of all of the audit queues on the system.

 - kauditd_rehold_skb() now returns records to the end of the
   hold queue to ensure ordering is preserved in the face of
   recent changes to kauditd_send_queue().

 Cc: stable@vger.kernel.org
 Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking")
 Fixes: f4b3ee3c85551 ("audit: improve robustness of the audit queue handling")
 Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com>
 Tested-by: Gaosheng Cui <cuigaosheng1@huawei.com>
 Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
 Signed-off-by: Paul Moore <paul@paul-moore.com>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  kernel/audit.c |   62 +++++++++++++++++++++++++++++++++++++++------------------
  1 file changed, 43 insertions(+), 19 deletions(-)

 --- a/kernel/audit.c
 +++ b/kernel/audit.c
 @@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk
  /**
   * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
   * @skb: audit record
 + * @error: error code (unused)
   *
   * Description:
   * This should only be used by the kauditd_thread when it fails to flush the
   * hold queue.
   */
 -static void kauditd_rehold_skb(struct sk_buff *skb)
 +static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
  {
 -	/* put the record back in the queue at the same place */
 -	skb_queue_head(&audit_hold_queue, skb);
 +	/* put the record back in the queue */
 +	skb_queue_tail(&audit_hold_queue, skb);
  }

  /**
   * kauditd_hold_skb - Queue an audit record, waiting for auditd
   * @skb: audit record
 + * @error: error code
   *
   * Description:
   * Queue the audit record, waiting for an instance of auditd.  When this
 @@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk
   * and queue it, if we have room.  If we want to hold on to the record, but we
   * don't have room, record a record lost message.
   */
 -static void kauditd_hold_skb(struct sk_buff *skb)
 +static void kauditd_hold_skb(struct sk_buff *skb, int error)
  {
  	/* at this point it is uncertain if we will ever send this to auditd so
  	 * try to send the message via printk before we go any further */
  	kauditd_printk_skb(skb);

  	/* can we just silently drop the message? */
 -	if (!audit_default) {
 -		kfree_skb(skb);
 -		return;
 +	if (!audit_default)
 +		goto drop;
 +
 +	/* the hold queue is only for when the daemon goes away completely,
 +	 * not -EAGAIN failures; if we are in a -EAGAIN state requeue the
 +	 * record on the retry queue unless it's full, in which case drop it
 +	 */
 +	if (error == -EAGAIN) {
 +		if (!audit_backlog_limit ||
 +		    skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
 +			skb_queue_tail(&audit_retry_queue, skb);
 +			return;
 +		}
 +		audit_log_lost("kauditd retry queue overflow");
 +		goto drop;
  	}

 -	/* if we have room, queue the message */
 +	/* if we have room in the hold queue, queue the message */
  	if (!audit_backlog_limit ||
  	    skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
  		skb_queue_tail(&audit_hold_queue, skb);
 @@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_b

  	/* we have no other options - drop the message */
  	audit_log_lost("kauditd hold queue overflow");
 +drop:
  	kfree_skb(skb);
  }

  /**
   * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
   * @skb: audit record
 + * @error: error code (unused)
   *
   * Description:
   * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
   * but for some reason we are having problems sending it audit records so
   * queue the given record and attempt to resend.
   */
 -static void kauditd_retry_skb(struct sk_buff *skb)
 +static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
  {
 -	/* NOTE: because records should only live in the retry queue for a
 -	 * short period of time, before either being sent or moved to the hold
 -	 * queue, we don't currently enforce a limit on this queue */
 -	skb_queue_tail(&audit_retry_queue, skb);
 +	if (!audit_backlog_limit ||
 +	    skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
 +		skb_queue_tail(&audit_retry_queue, skb);
 +		return;
 +	}
 +
 +	/* we have to drop the record, send it via printk as a last effort */
 +	kauditd_printk_skb(skb);
 +	audit_log_lost("kauditd retry queue overflow");
 +	kfree_skb(skb);
  }

  /**
 @@ -640,7 +662,7 @@ static void auditd_reset(const struct au
  	/* flush the retry queue to the hold queue, but don't touch the main
  	 * queue since we need to process that normally for multicast */
  	while ((skb = skb_dequeue(&audit_retry_queue)))
 -		kauditd_hold_skb(skb);
 +		kauditd_hold_skb(skb, -ECONNREFUSED);
  }

  /**
 @@ -714,16 +736,18 @@ static int kauditd_send_queue(struct soc
  			      struct sk_buff_head *queue,
  			      unsigned int retry_limit,
  			      void (*skb_hook)(struct sk_buff *skb),
 -			      void (*err_hook)(struct sk_buff *skb))
 +			      void (*err_hook)(struct sk_buff *skb, int error))
  {
  	int rc = 0;
 -	struct sk_buff *skb;
 +	struct sk_buff *skb = NULL;
 +	struct sk_buff *skb_tail;
  	unsigned int failed = 0;

  	/* NOTE: kauditd_thread takes care of all our locking, we just use
  	 *       the netlink info passed to us (e.g. sk and portid) */

 -	while ((skb = skb_dequeue(queue))) {
 +	skb_tail = skb_peek_tail(queue);
 +	while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
  		/* call the skb_hook for each skb we touch */
  		if (skb_hook)
  			(*skb_hook)(skb);
 @@ -731,7 +755,7 @@ static int kauditd_send_queue(struct soc
  		/* can we send to anyone via unicast? */
  		if (!sk) {
  			if (err_hook)
 -				(*err_hook)(skb);
 +				(*err_hook)(skb, -ECONNREFUSED);
  			continue;
  		}

 @@ -745,7 +769,7 @@ retry:
  			    rc == -ECONNREFUSED || rc == -EPERM) {
  				sk = NULL;
  				if (err_hook)
 -					(*err_hook)(skb);
 +					(*err_hook)(skb, rc);
  				if (rc == -EAGAIN)
  					rc = 0;
  				/* continue to drain the queue */
	From f26d04331360d42dbd6b58448bd98e4edbfbe1c5 Mon Sep 17 00:00:00 2001
	From: Paul Moore <paul@paul-moore.com>
	Date: Thu, 13 Jan 2022 18:54:38 -0500
	Subject: audit: improve audit queue handling when "audit=1" on cmdline

	From: Paul Moore <paul@paul-moore.com>

	commit f26d04331360d42dbd6b58448bd98e4edbfbe1c5 upstream.

	When an admin enables audit at early boot via the "audit=1" kernel
	command line the audit queue behavior is slightly different; the
	audit subsystem goes to greater lengths to avoid dropping records,
	which unfortunately can result in problems when the audit daemon is
	forcibly stopped for an extended period of time.

	This patch makes a number of changes designed to improve the audit
	queuing behavior so that leaving the audit daemon in a stopped state
	for an extended period does not cause a significant impact to the
	system.

	- kauditd_send_queue() is now limited to looping through the
	passed queue only once per call. This not only prevents the
	function from looping indefinitely when records are returned
	to the current queue, it also allows any recovery handling in
	kauditd_thread() to take place when kauditd_send_queue()
	returns.

	- Transient netlink send errors seen as -EAGAIN now cause the
	record to be returned to the retry queue instead of going to
	the hold queue. The intention of the hold queue is to store,
	perhaps for an extended period of time, the events which led
	up to the audit daemon going offline. The retry queue remains
	a temporary queue intended to protect against transient issues
	between the kernel and the audit daemon.

	- The retry queue is now limited by the audit_backlog_limit
	setting, the same as the other queues. This allows admins
	to bound the size of all of the audit queues on the system.

	- kauditd_rehold_skb() now returns records to the end of the
	hold queue to ensure ordering is preserved in the face of
	recent changes to kauditd_send_queue().

	Cc: stable@vger.kernel.org
	Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking")
	Fixes: f4b3ee3c85551 ("audit: improve robustness of the audit queue handling")
	Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com>
	Tested-by: Gaosheng Cui <cuigaosheng1@huawei.com>
	Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
	Signed-off-by: Paul Moore <paul@paul-moore.com>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	kernel/audit.c \| 62 +++++++++++++++++++++++++++++++++++++++------------------
	1 file changed, 43 insertions(+), 19 deletions(-)

	--- a/kernel/audit.c
	+++ b/kernel/audit.c
	@@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk
	/**
	* kauditd_rehold_skb - Handle a audit record send failure in the hold queue
	* @skb: audit record
	+ * @error: error code (unused)
	*
	* Description:
	* This should only be used by the kauditd_thread when it fails to flush the
	* hold queue.
	*/
	-static void kauditd_rehold_skb(struct sk_buff *skb)
	+static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
	{
	- /* put the record back in the queue at the same place */
	- skb_queue_head(&audit_hold_queue, skb);
	+ /* put the record back in the queue */
	+ skb_queue_tail(&audit_hold_queue, skb);
	}

	/**
	* kauditd_hold_skb - Queue an audit record, waiting for auditd
	* @skb: audit record
	+ * @error: error code
	*
	* Description:
	* Queue the audit record, waiting for an instance of auditd. When this
	@@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk
	* and queue it, if we have room. If we want to hold on to the record, but we
	* don't have room, record a record lost message.
	*/
	-static void kauditd_hold_skb(struct sk_buff *skb)
	+static void kauditd_hold_skb(struct sk_buff *skb, int error)
	{
	/* at this point it is uncertain if we will ever send this to auditd so
	* try to send the message via printk before we go any further */
	kauditd_printk_skb(skb);

	/* can we just silently drop the message? */
	- if (!audit_default) {
	- kfree_skb(skb);
	- return;
	+ if (!audit_default)
	+ goto drop;
	+
	+ /* the hold queue is only for when the daemon goes away completely,
	+ * not -EAGAIN failures; if we are in a -EAGAIN state requeue the
	+ * record on the retry queue unless it's full, in which case drop it
	+ */
	+ if (error == -EAGAIN) {
	+ if (!audit_backlog_limit \|\|
	+ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
	+ skb_queue_tail(&audit_retry_queue, skb);
	+ return;
	+ }
	+ audit_log_lost("kauditd retry queue overflow");
	+ goto drop;
	}

	- /* if we have room, queue the message */
	+ /* if we have room in the hold queue, queue the message */
	if (!audit_backlog_limit \|\|
	skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
	skb_queue_tail(&audit_hold_queue, skb);
	@@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_b

	/* we have no other options - drop the message */
	audit_log_lost("kauditd hold queue overflow");
	+drop:
	kfree_skb(skb);
	}

	/**
	* kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
	* @skb: audit record
	+ * @error: error code (unused)
	*
	* Description:
	* Not as serious as kauditd_hold_skb() as we still have a connected auditd,
	* but for some reason we are having problems sending it audit records so
	* queue the given record and attempt to resend.
	*/
	-static void kauditd_retry_skb(struct sk_buff *skb)
	+static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
	{
	- /* NOTE: because records should only live in the retry queue for a
	- * short period of time, before either being sent or moved to the hold
	- * queue, we don't currently enforce a limit on this queue */
	- skb_queue_tail(&audit_retry_queue, skb);
	+ if (!audit_backlog_limit \|\|
	+ skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
	+ skb_queue_tail(&audit_retry_queue, skb);
	+ return;
	+ }
	+
	+ /* we have to drop the record, send it via printk as a last effort */
	+ kauditd_printk_skb(skb);
	+ audit_log_lost("kauditd retry queue overflow");
	+ kfree_skb(skb);
	}

	/**
	@@ -640,7 +662,7 @@ static void auditd_reset(const struct au
	/* flush the retry queue to the hold queue, but don't touch the main
	* queue since we need to process that normally for multicast */
	while ((skb = skb_dequeue(&audit_retry_queue)))
	- kauditd_hold_skb(skb);
	+ kauditd_hold_skb(skb, -ECONNREFUSED);
	}

	/**
	@@ -714,16 +736,18 @@ static int kauditd_send_queue(struct soc
	struct sk_buff_head *queue,
	unsigned int retry_limit,
	void (skb_hook)(struct sk_buff skb),
	- void (err_hook)(struct sk_buff skb))
	+ void (err_hook)(struct sk_buff skb, int error))
	{
	int rc = 0;
	- struct sk_buff *skb;
	+ struct sk_buff *skb = NULL;
	+ struct sk_buff *skb_tail;
	unsigned int failed = 0;

	/* NOTE: kauditd_thread takes care of all our locking, we just use
	* the netlink info passed to us (e.g. sk and portid) */

	- while ((skb = skb_dequeue(queue))) {
	+ skb_tail = skb_peek_tail(queue);
	+ while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
	/* call the skb_hook for each skb we touch */
	if (skb_hook)
	(*skb_hook)(skb);
	@@ -731,7 +755,7 @@ static int kauditd_send_queue(struct soc
	/* can we send to anyone via unicast? */
	if (!sk) {
	if (err_hook)
	- (*err_hook)(skb);
	+ (*err_hook)(skb, -ECONNREFUSED);
	continue;
	}

	@@ -745,7 +769,7 @@ retry:
	rc == -ECONNREFUSED \|\| rc == -EPERM) {
	sk = NULL;
	if (err_hook)
	- (*err_hook)(skb);
	+ (*err_hook)(skb, rc);
	if (rc == -EAGAIN)
	rc = 0;
	/* continue to drain the queue */