blob: e5b205562d2ecc398ba6055c817b02d378db4b0a [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Implement mseal() syscall.
*
* Copyright (c) 2023,2024 Google, Inc.
*
* Author: Jeff Xu <jeffxu@chromium.org>
*/
#include <linux/mempolicy.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/syscalls.h>
#include <linux/sched.h>
#include "internal.h"
/*
* mseal() disallows an input range which contain unmapped ranges (VMA holes).
*
* It disallows unmapped regions from start to end whether they exist at the
* start, in the middle, or at the end of the range, or any combination thereof.
*
* This is because after sealng a range, there's nothing to stop memory mapping
* of ranges in the remaining gaps later, meaning that the user might then
* wrongly consider the entirety of the mseal()'d range to be sealed when it
* in fact isn't.
*/
/*
* Does the [start, end) range contain any unmapped memory?
*
* We ensure that:
* - start is part of a valid VMA.
* - end is part of a valid VMA.
* - no gap (unallocated memory) exists between start and end.
*/
static bool range_contains_unmapped(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct vm_area_struct *vma;
unsigned long prev_end = start;
VMA_ITERATOR(vmi, current->mm, start);
for_each_vma_range(vmi, vma, end) {
if (vma->vm_start > prev_end)
return true;
prev_end = vma->vm_end;
}
return prev_end < end;
}
static int mseal_apply(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct vm_area_struct *vma, *prev;
unsigned long curr_start = start;
VMA_ITERATOR(vmi, mm, start);
/* We know there are no gaps so this will be non-NULL. */
vma = vma_iter_load(&vmi);
prev = vma_prev(&vmi);
if (start > vma->vm_start)
prev = vma;
for_each_vma_range(vmi, vma, end) {
unsigned long curr_end = MIN(vma->vm_end, end);
if (!(vma->vm_flags & VM_SEALED)) {
vma = vma_modify_flags(&vmi, prev, vma,
curr_start, curr_end,
vma->vm_flags | VM_SEALED);
if (IS_ERR(vma))
return PTR_ERR(vma);
vm_flags_set(vma, VM_SEALED);
}
prev = vma;
curr_start = curr_end;
}
return 0;
}
/*
* mseal(2) seals the VM's meta data from
* selected syscalls.
*
* addr/len: VM address range.
*
* The address range by addr/len must meet:
* start (addr) must be in a valid VMA.
* end (addr + len) must be in a valid VMA.
* no gap (unallocated memory) between start and end.
* start (addr) must be page aligned.
*
* len: len will be page aligned implicitly.
*
* Below VMA operations are blocked after sealing.
* 1> Unmapping, moving to another location, and shrinking
* the size, via munmap() and mremap(), can leave an empty
* space, therefore can be replaced with a VMA with a new
* set of attributes.
* 2> Moving or expanding a different vma into the current location,
* via mremap().
* 3> Modifying a VMA via mmap(MAP_FIXED).
* 4> Size expansion, via mremap(), does not appear to pose any
* specific risks to sealed VMAs. It is included anyway because
* the use case is unclear. In any case, users can rely on
* merging to expand a sealed VMA.
* 5> mprotect and pkey_mprotect.
* 6> Some destructive madvice() behavior (e.g. MADV_DONTNEED)
* for anonymous memory, when users don't have write permission to the
* memory. Those behaviors can alter region contents by discarding pages,
* effectively a memset(0) for anonymous memory.
*
* flags: reserved.
*
* return values:
* zero: success.
* -EINVAL:
* invalid input flags.
* start address is not page aligned.
* Address arange (start + len) overflow.
* -ENOMEM:
* addr is not a valid address (not allocated).
* end (start + len) is not a valid address.
* a gap (unallocated memory) between start and end.
* -EPERM:
* - In 32 bit architecture, sealing is not supported.
* Note:
* user can call mseal(2) multiple times, adding a seal on an
* already sealed memory is a no-action (no error).
*
* unseal() is not supported.
*/
int do_mseal(unsigned long start, size_t len_in, unsigned long flags)
{
size_t len;
int ret = 0;
unsigned long end;
struct mm_struct *mm = current->mm;
/* Verify flags not set. */
if (flags)
return -EINVAL;
start = untagged_addr(start);
if (!PAGE_ALIGNED(start))
return -EINVAL;
len = PAGE_ALIGN(len_in);
/* Check to see whether len was rounded up from small -ve to zero. */
if (len_in && !len)
return -EINVAL;
end = start + len;
if (end < start)
return -EINVAL;
if (end == start)
return 0;
if (mmap_write_lock_killable(mm))
return -EINTR;
if (range_contains_unmapped(mm, start, end)) {
ret = -ENOMEM;
goto out;
}
/*
* Second pass, this should success, unless there are errors
* from vma_modify_flags, e.g. merge/split error, or process
* reaching the max supported VMAs, however, those cases shall
* be rare.
*/
ret = mseal_apply(mm, start, end);
out:
mmap_write_unlock(mm);
return ret;
}
SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
flags)
{
return do_mseal(start, len, flags);
}