| From 354f6b4770a5352e0d510ffabafcf8d91865cdd6 Mon Sep 17 00:00:00 2001 |
| From: Sasha Levin <sashal@kernel.org> |
| Date: Tue, 11 Nov 2025 14:11:22 +0100 |
| Subject: fs/nls: Fix utf16 to utf8 conversion |
| MIME-Version: 1.0 |
| Content-Type: text/plain; charset=UTF-8 |
| Content-Transfer-Encoding: 8bit |
| |
| From: Armin Wolf <W_Armin@gmx.de> |
| |
| [ Upstream commit 25524b6190295577e4918c689644451365e6466d ] |
| |
| Currently the function responsible for converting between utf16 and |
| utf8 strings will ignore any characters that cannot be converted. This |
| however also includes multi-byte characters that do not fit into the |
| provided string buffer. |
| |
| This can cause problems if such a multi-byte character is followed by |
| a single-byte character. In such a case the multi-byte character might |
| be ignored when the provided string buffer is too small, but the |
| single-byte character might fit and is thus still copied into the |
| resulting string. |
| |
| Fix this by stop filling the provided string buffer once a character |
| does not fit. In order to be able to do this extend utf32_to_utf8() |
| to return useful errno codes instead of -1. |
| |
| Fixes: 74675a58507e ("NLS: update handling of Unicode") |
| Signed-off-by: Armin Wolf <W_Armin@gmx.de> |
| Link: https://patch.msgid.link/20251111131125.3379-2-W_Armin@gmx.de |
| Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> |
| Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| fs/nls/nls_base.c | 16 ++++++++++++---- |
| 1 file changed, 12 insertions(+), 4 deletions(-) |
| |
| diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c |
| index a026dbd3593f6..7eacded3c17d1 100644 |
| --- a/fs/nls/nls_base.c |
| +++ b/fs/nls/nls_base.c |
| @@ -94,7 +94,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout) |
| |
| l = u; |
| if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) |
| - return -1; |
| + return -EILSEQ; |
| |
| nc = 0; |
| for (t = utf8_table; t->cmask && maxout; t++, maxout--) { |
| @@ -110,7 +110,7 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxout) |
| return nc; |
| } |
| } |
| - return -1; |
| + return -EOVERFLOW; |
| } |
| EXPORT_SYMBOL(utf32_to_utf8); |
| |
| @@ -217,8 +217,16 @@ int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, |
| inlen--; |
| } |
| size = utf32_to_utf8(u, op, maxout); |
| - if (size == -1) { |
| - /* Ignore character and move on */ |
| + if (size < 0) { |
| + if (size == -EILSEQ) { |
| + /* Ignore character and move on */ |
| + continue; |
| + } |
| + /* |
| + * Stop filling the buffer with data once a character |
| + * does not fit anymore. |
| + */ |
| + break; |
| } else { |
| op += size; |
| maxout -= size; |
| -- |
| 2.51.0 |
| |