| From 3d3fec0a5dd00618a10bb70737839d4e00a989b1 Mon Sep 17 00:00:00 2001 |
| From: Linus Torvalds <torvalds@linux-foundation.org> |
| Date: Wed, 22 Jan 2020 12:37:25 -0800 |
| Subject: [PATCH] readdir: make user_access_begin() use the real access range |
| |
| commit 3c2659bd1db81ed6a264a9fc6262d51667d655ad upstream. |
| |
| In commit 9f79b78ef744 ("Convert filldir[64]() from __put_user() to |
| unsafe_put_user()") I changed filldir to not do individual __put_user() |
| accesses, but instead use unsafe_put_user() surrounded by the proper |
| user_access_begin/end() pair. |
| |
| That make them enormously faster on modern x86, where the STAC/CLAC |
| games make individual user accesses fairly heavy-weight. |
| |
| However, the user_access_begin() range was not really the exact right |
| one, since filldir() has the unfortunate problem that it needs to not |
| only fill out the new directory entry, it also needs to fix up the |
| previous one to contain the proper file offset. |
| |
| It's unfortunate, but the "d_off" field in "struct dirent" is _not_ the |
| file offset of the directory entry itself - it's the offset of the next |
| one. So we end up backfilling the offset in the previous entry as we |
| walk along. |
| |
| But since x86 didn't really care about the exact range, and used to be |
| the only architecture that did anything fancy in user_access_begin() to |
| begin with, the filldir[64]() changes did something lazy, and even |
| commented on it: |
| |
| /* |
| * Note! This range-checks 'previous' (which may be NULL). |
| * The real range was checked in getdents |
| */ |
| if (!user_access_begin(dirent, sizeof(*dirent))) |
| goto efault; |
| |
| and it all worked fine. |
| |
| But now 32-bit ppc is starting to also implement user_access_begin(), |
| and the fact that we faked the range to only be the (possibly not even |
| valid) previous directory entry becomes a problem, because ppc32 will |
| actually be using the range that is passed in for more than just "check |
| that it's user space". |
| |
| This is a complete rewrite of Christophe's original patch. |
| |
| By saving off the record length of the previous entry instead of a |
| pointer to it in the filldir data structures, we can simplify the range |
| check and the writing of the previous entry d_off field. No need for |
| any conditionals in the user accesses themselves, although we retain the |
| conditional EINTR checking for the "was this the first directory entry" |
| signal handling latency logic. |
| |
| Fixes: 9f79b78ef744 ("Convert filldir[64]() from __put_user() to unsafe_put_user()") |
| Link: https://lore.kernel.org/lkml/a02d3426f93f7eb04960a4d9140902d278cab0bb.1579697910.git.christophe.leroy@c-s.fr/ |
| Link: https://lore.kernel.org/lkml/408c90c4068b00ea8f1c41cca45b84ec23d4946b.1579783936.git.christophe.leroy@c-s.fr/ |
| Reported-and-tested-by: Christophe Leroy <christophe.leroy@c-s.fr> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/fs/readdir.c b/fs/readdir.c |
| index d26d5ea4de7b..d5ee72280c82 100644 |
| --- a/fs/readdir.c |
| +++ b/fs/readdir.c |
| @@ -206,7 +206,7 @@ struct linux_dirent { |
| struct getdents_callback { |
| struct dir_context ctx; |
| struct linux_dirent __user * current_dir; |
| - struct linux_dirent __user * previous; |
| + int prev_reclen; |
| int count; |
| int error; |
| }; |
| @@ -214,12 +214,13 @@ struct getdents_callback { |
| static int filldir(struct dir_context *ctx, const char *name, int namlen, |
| loff_t offset, u64 ino, unsigned int d_type) |
| { |
| - struct linux_dirent __user * dirent; |
| + struct linux_dirent __user *dirent, *prev; |
| struct getdents_callback *buf = |
| container_of(ctx, struct getdents_callback, ctx); |
| unsigned long d_ino; |
| int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, |
| sizeof(long)); |
| + int prev_reclen; |
| |
| buf->error = verify_dirent_name(name, namlen); |
| if (unlikely(buf->error)) |
| @@ -232,28 +233,24 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, |
| buf->error = -EOVERFLOW; |
| return -EOVERFLOW; |
| } |
| - dirent = buf->previous; |
| - if (dirent && signal_pending(current)) |
| + prev_reclen = buf->prev_reclen; |
| + if (prev_reclen && signal_pending(current)) |
| return -EINTR; |
| - |
| - /* |
| - * Note! This range-checks 'previous' (which may be NULL). |
| - * The real range was checked in getdents |
| - */ |
| - if (!user_access_begin(dirent, sizeof(*dirent))) |
| - goto efault; |
| - if (dirent) |
| - unsafe_put_user(offset, &dirent->d_off, efault_end); |
| dirent = buf->current_dir; |
| + prev = (void __user *) dirent - prev_reclen; |
| + if (!user_access_begin(prev, reclen + prev_reclen)) |
| + goto efault; |
| + |
| + /* This might be 'dirent->d_off', but if so it will get overwritten */ |
| + unsafe_put_user(offset, &prev->d_off, efault_end); |
| unsafe_put_user(d_ino, &dirent->d_ino, efault_end); |
| unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
| unsafe_put_user(d_type, (char __user *) dirent + reclen - 1, efault_end); |
| unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
| user_access_end(); |
| |
| - buf->previous = dirent; |
| - dirent = (void __user *)dirent + reclen; |
| - buf->current_dir = dirent; |
| + buf->current_dir = (void __user *)dirent + reclen; |
| + buf->prev_reclen = reclen; |
| buf->count -= reclen; |
| return 0; |
| efault_end: |
| @@ -267,7 +264,6 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, |
| struct linux_dirent __user *, dirent, unsigned int, count) |
| { |
| struct fd f; |
| - struct linux_dirent __user * lastdirent; |
| struct getdents_callback buf = { |
| .ctx.actor = filldir, |
| .count = count, |
| @@ -285,8 +281,10 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, |
| error = iterate_dir(f.file, &buf.ctx); |
| if (error >= 0) |
| error = buf.error; |
| - lastdirent = buf.previous; |
| - if (lastdirent) { |
| + if (buf.prev_reclen) { |
| + struct linux_dirent __user * lastdirent; |
| + lastdirent = (void __user *)buf.current_dir - buf.prev_reclen; |
| + |
| if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
| error = -EFAULT; |
| else |
| @@ -299,7 +297,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, |
| struct getdents_callback64 { |
| struct dir_context ctx; |
| struct linux_dirent64 __user * current_dir; |
| - struct linux_dirent64 __user * previous; |
| + int prev_reclen; |
| int count; |
| int error; |
| }; |
| @@ -307,11 +305,12 @@ struct getdents_callback64 { |
| static int filldir64(struct dir_context *ctx, const char *name, int namlen, |
| loff_t offset, u64 ino, unsigned int d_type) |
| { |
| - struct linux_dirent64 __user *dirent; |
| + struct linux_dirent64 __user *dirent, *prev; |
| struct getdents_callback64 *buf = |
| container_of(ctx, struct getdents_callback64, ctx); |
| int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, |
| sizeof(u64)); |
| + int prev_reclen; |
| |
| buf->error = verify_dirent_name(name, namlen); |
| if (unlikely(buf->error)) |
| @@ -319,30 +318,27 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, |
| buf->error = -EINVAL; /* only used if we fail.. */ |
| if (reclen > buf->count) |
| return -EINVAL; |
| - dirent = buf->previous; |
| - if (dirent && signal_pending(current)) |
| + prev_reclen = buf->prev_reclen; |
| + if (prev_reclen && signal_pending(current)) |
| return -EINTR; |
| - |
| - /* |
| - * Note! This range-checks 'previous' (which may be NULL). |
| - * The real range was checked in getdents |
| - */ |
| - if (!user_access_begin(dirent, sizeof(*dirent))) |
| - goto efault; |
| - if (dirent) |
| - unsafe_put_user(offset, &dirent->d_off, efault_end); |
| dirent = buf->current_dir; |
| + prev = (void __user *)dirent - prev_reclen; |
| + if (!user_access_begin(prev, reclen + prev_reclen)) |
| + goto efault; |
| + |
| + /* This might be 'dirent->d_off', but if so it will get overwritten */ |
| + unsafe_put_user(offset, &prev->d_off, efault_end); |
| unsafe_put_user(ino, &dirent->d_ino, efault_end); |
| unsafe_put_user(reclen, &dirent->d_reclen, efault_end); |
| unsafe_put_user(d_type, &dirent->d_type, efault_end); |
| unsafe_copy_dirent_name(dirent->d_name, name, namlen, efault_end); |
| user_access_end(); |
| |
| - buf->previous = dirent; |
| - dirent = (void __user *)dirent + reclen; |
| - buf->current_dir = dirent; |
| + buf->prev_reclen = reclen; |
| + buf->current_dir = (void __user *)dirent + reclen; |
| buf->count -= reclen; |
| return 0; |
| + |
| efault_end: |
| user_access_end(); |
| efault: |
| @@ -354,7 +350,6 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, |
| unsigned int count) |
| { |
| struct fd f; |
| - struct linux_dirent64 __user * lastdirent; |
| struct getdents_callback64 buf = { |
| .ctx.actor = filldir64, |
| .count = count, |
| @@ -372,9 +367,11 @@ int ksys_getdents64(unsigned int fd, struct linux_dirent64 __user *dirent, |
| error = iterate_dir(f.file, &buf.ctx); |
| if (error >= 0) |
| error = buf.error; |
| - lastdirent = buf.previous; |
| - if (lastdirent) { |
| + if (buf.prev_reclen) { |
| + struct linux_dirent64 __user * lastdirent; |
| typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
| + |
| + lastdirent = (void __user *) buf.current_dir - buf.prev_reclen; |
| if (__put_user(d_off, &lastdirent->d_off)) |
| error = -EFAULT; |
| else |
| -- |
| 2.7.4 |
| |