Merge branch 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 entry updates from Ingo Molnar:
 "This contains x32 and compat syscall improvements, the biggest one of
  which splits x32 syscalls into their own table, which allows new
  syscalls to share the x32 and x86-64 number - which turns the
  512-547 special syscall numbers range into a legacy wart that won't be
  extended going forward"

* 'x86-entry-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/syscalls: Split the x32 syscalls into their own table
  x86/syscalls: Disallow compat entries for all types of 64-bit syscalls
  x86/syscalls: Use the compat versions of rt_sigsuspend() and rt_sigprocmask()
  x86/syscalls: Make __X32_SYSCALL_BIT be unsigned long
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 536b574..3f8e226 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -285,15 +285,16 @@
 	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
 		nr = syscall_trace_enter(regs);
 
-	/*
-	 * NB: Native and x32 syscalls are dispatched from the same
-	 * table.  The only functional difference is the x32 bit in
-	 * regs->orig_ax, which changes the behavior of some syscalls.
-	 */
-	nr &= __SYSCALL_MASK;
 	if (likely(nr < NR_syscalls)) {
 		nr = array_index_nospec(nr, NR_syscalls);
 		regs->ax = sys_call_table[nr](regs);
+#ifdef CONFIG_X86_X32_ABI
+	} else if (likely((nr & __X32_SYSCALL_BIT) &&
+			  (nr & ~__X32_SYSCALL_BIT) < X32_NR_syscalls)) {
+		nr = array_index_nospec(nr & ~__X32_SYSCALL_BIT,
+					X32_NR_syscalls);
+		regs->ax = x32_sys_call_table[nr](regs);
+#endif
 	}
 
 	syscall_return_slowpath(regs);
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index d5252bc..b1bf317 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -10,10 +10,13 @@
 /* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
 extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
 #define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
+#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
 #include <asm/syscalls_64.h>
 #undef __SYSCALL_64
+#undef __SYSCALL_X32
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = sym,
+#define __SYSCALL_X32(nr, sym, qual)
 
 asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
 	/*
@@ -23,3 +26,25 @@
 	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_64.h>
 };
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = sym,
+
+asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
+	/*
+	 * Smells like a compiler bug -- it doesn't work
+	 * when the & below is removed.
+	 */
+	[0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
+#include <asm/syscalls_64.h>
+};
+
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#endif
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index c00019a..3fe0254 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -186,11 +186,11 @@
 172	i386	prctl			sys_prctl			__ia32_sys_prctl
 173	i386	rt_sigreturn		sys_rt_sigreturn		sys32_rt_sigreturn
 174	i386	rt_sigaction		sys_rt_sigaction		__ia32_compat_sys_rt_sigaction
-175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_sys_rt_sigprocmask
+175	i386	rt_sigprocmask		sys_rt_sigprocmask		__ia32_compat_sys_rt_sigprocmask
 176	i386	rt_sigpending		sys_rt_sigpending		__ia32_compat_sys_rt_sigpending
 177	i386	rt_sigtimedwait		sys_rt_sigtimedwait_time32	__ia32_compat_sys_rt_sigtimedwait_time32
 178	i386	rt_sigqueueinfo		sys_rt_sigqueueinfo		__ia32_compat_sys_rt_sigqueueinfo
-179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_sys_rt_sigsuspend
+179	i386	rt_sigsuspend		sys_rt_sigsuspend		__ia32_compat_sys_rt_sigsuspend
 180	i386	pread64			sys_pread64			__ia32_compat_sys_x86_pread
 181	i386	pwrite64		sys_pwrite64			__ia32_compat_sys_x86_pwrite
 182	i386	chown			sys_chown16			__ia32_sys_chown16
diff --git a/arch/x86/entry/syscalls/syscalltbl.sh b/arch/x86/entry/syscalls/syscalltbl.sh
index 94fcd19..1af2be3 100644
--- a/arch/x86/entry/syscalls/syscalltbl.sh
+++ b/arch/x86/entry/syscalls/syscalltbl.sh
@@ -1,13 +1,13 @@
-#!/bin/sh
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 in="$1"
 out="$2"
 
 syscall_macro() {
-    abi="$1"
-    nr="$2"
-    entry="$3"
+    local abi="$1"
+    local nr="$2"
+    local entry="$3"
 
     # Entry can be either just a function name or "function/qualifier"
     real_entry="${entry%%/*}"
@@ -21,14 +21,14 @@
 }
 
 emit() {
-    abi="$1"
-    nr="$2"
-    entry="$3"
-    compat="$4"
-    umlentry=""
+    local abi="$1"
+    local nr="$2"
+    local entry="$3"
+    local compat="$4"
+    local umlentry=""
 
-    if [ "$abi" = "64" -a -n "$compat" ]; then
-	echo "a compat entry for a 64-bit syscall makes no sense" >&2
+    if [ "$abi" != "I386" -a -n "$compat" ]; then
+	echo "a compat entry ($abi: $compat) for a 64-bit syscall makes no sense" >&2
 	exit 1
     fi
 
@@ -62,14 +62,17 @@
     while read nr abi name entry compat; do
 	abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
 	if [ "$abi" = "COMMON" -o "$abi" = "64" ]; then
-	    # COMMON is the same as 64, except that we don't expect X32
-	    # programs to use it.  Our expectation has nothing to do with
-	    # any generated code, so treat them the same.
 	    emit 64 "$nr" "$entry" "$compat"
+	    if [ "$abi" = "COMMON" ]; then
+		# COMMON means that this syscall exists in the same form for
+		# 64-bit and X32.
+		echo "#ifdef CONFIG_X86_X32_ABI"
+		emit X32 "$nr" "$entry" "$compat"
+		echo "#endif"
+	    fi
 	elif [ "$abi" = "X32" ]; then
-	    # X32 is equivalent to 64 on an X32-compatible kernel.
 	    echo "#ifdef CONFIG_X86_X32_ABI"
-	    emit 64 "$nr" "$entry" "$compat"
+	    emit X32 "$nr" "$entry" "$compat"
 	    echo "#endif"
 	elif [ "$abi" = "I386" ]; then
 	    emit "$abi" "$nr" "$entry" "$compat"
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2dc4a02..8db3fdb 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -36,6 +36,10 @@
 extern const sys_call_ptr_t ia32_sys_call_table[];
 #endif
 
+#ifdef CONFIG_X86_X32_ABI
+extern const sys_call_ptr_t x32_sys_call_table[];
+#endif
+
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
  * This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0975897..a7dd080 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -5,12 +5,6 @@
 #include <uapi/asm/unistd.h>
 
 
-# ifdef CONFIG_X86_X32_ABI
-#  define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
-# else
-#  define __SYSCALL_MASK (~0)
-# endif
-
 # ifdef CONFIG_X86_32
 
 #  include <asm/unistd_32.h>
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
index 30d7d04..196fdd0 100644
--- a/arch/x86/include/uapi/asm/unistd.h
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -3,7 +3,7 @@
 #define _UAPI_ASM_X86_UNISTD_H
 
 /* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT	0x40000000
+#define __X32_SYSCALL_BIT	0x40000000UL
 
 #ifndef __KERNEL__
 # ifdef __i386__
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d3d0752..70e9772 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -6,13 +6,28 @@
 #include <asm/ia32.h>
 
 #define __SYSCALL_64(nr, sym, qual) [nr] = 1,
+#define __SYSCALL_X32(nr, sym, qual)
 static char syscalls_64[] = {
 #include <asm/syscalls_64.h>
 };
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+
+#ifdef CONFIG_X86_X32_ABI
+#define __SYSCALL_64(nr, sym, qual)
+#define __SYSCALL_X32(nr, sym, qual) [nr] = 1,
+static char syscalls_x32[] = {
+#include <asm/syscalls_64.h>
+};
+#undef __SYSCALL_64
+#undef __SYSCALL_X32
+#endif
+
 #define __SYSCALL_I386(nr, sym, qual) [nr] = 1,
 static char syscalls_ia32[] = {
 #include <asm/syscalls_32.h>
 };
+#undef __SYSCALL_I386
 
 #if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS)
 #include <asm/kvm_para.h>
@@ -80,6 +95,11 @@
 	DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
 	DEFINE(NR_syscalls, sizeof(syscalls_64));
 
+#ifdef CONFIG_X86_X32_ABI
+	DEFINE(__NR_syscall_x32_max, sizeof(syscalls_x32) - 1);
+	DEFINE(X32_NR_syscalls, sizeof(syscalls_x32));
+#endif
+
 	DEFINE(__NR_syscall_compat_max, sizeof(syscalls_ia32) - 1);
 	DEFINE(IA32_NR_syscalls, sizeof(syscalls_ia32));
 
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 3bc5b744..5d49bfe 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -17,7 +17,7 @@
 TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
new file mode 100644
index 0000000..d6b09cb
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2018 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <syscall.h>
+
+static int nerrs;
+
+#define X32_BIT 0x40000000UL
+
+static void check_enosys(unsigned long nr, bool *ok)
+{
+	/* If this fails, a segfault is reasonably likely. */
+	fflush(stdout);
+
+	long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
+	if (ret == 0) {
+		printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
+		*ok = false;
+	} else if (errno != ENOSYS) {
+		printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
+		*ok = false;
+	}
+}
+
+static void test_x32_without_x32_bit(void)
+{
+	bool ok = true;
+
+	/*
+	 * Syscalls 512-547 are "x32" syscalls.  They are intended to be
+	 * called with the x32 (0x40000000) bit set.  Calling them without
+	 * the x32 bit set is nonsense and should not work.
+	 */
+	printf("[RUN]\tChecking syscalls 512-547\n");
+	for (int i = 512; i <= 547; i++)
+		check_enosys(i, &ok);
+
+	/*
+	 * Check that a handful of 64-bit-only syscalls are rejected if the x32
+	 * bit is set.
+	 */
+	printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
+	check_enosys(16 | X32_BIT, &ok);	/* ioctl */
+	check_enosys(19 | X32_BIT, &ok);	/* readv */
+	check_enosys(20 | X32_BIT, &ok);	/* writev */
+
+	/*
+	 * Check some syscalls with high bits set.
+	 */
+	printf("[RUN]\tChecking numbers above 2^32-1\n");
+	check_enosys((1UL << 32), &ok);
+	check_enosys(X32_BIT | (1UL << 32), &ok);
+
+	if (!ok)
+		nerrs++;
+	else
+		printf("[OK]\tThey all returned -ENOSYS\n");
+}
+
+int main()
+{
+	/*
+	 * Anyone diagnosing a failure will want to know whether the kernel
+	 * supports x32.  Tell them.
+	 */
+	printf("\tChecking for x32...");
+	fflush(stdout);
+	if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
+		printf(" supported\n");
+	} else if (errno == ENOSYS) {
+		printf(" not supported\n");
+	} else {
+		printf(" confused\n");
+	}
+
+	test_x32_without_x32_bit();
+
+	return nerrs ? 1 : 0;
+}