Merge tag 'gpio-fixes-for-v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux

Pull gpio fixes from Bartosz Golaszewski:
 "A bunch of fixes for the GPIO subsystem. We have two regressions in
  the core code spotted right after the merge window, a series of fixes
  for ACPI GPIO and a subsequent fix for a related regression in
  gpio-pca953x + a minor tweak in .gitignore and a rework of handling of
  the gpio-line-names to remedy a regression in stm32mp151.

  Summary:

   - fix two regressions in core GPIO subsystem code: one NULL-pointer
     dereference and one list corruption

   - read GPIO line names from fwnode instead of using the generic
     device properties to fix a regression on stm32mp151

   - fixes to ACPI GPIO and gpio-pca953x to handle a regression in IRQ
     handling on Intel Galileo

   - update .gitignore in GPIO selftests"

* tag 'gpio-fixes-for-v5.12-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux:
  gpiolib: Read "gpio-line-names" from a firmware node
  gpio: pca953x: Set IRQ type when handle Intel Galileo Gen 2
  gpiolib: acpi: Allow to find GpioInt() resource by name and index
  gpiolib: acpi: Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk
  gpiolib: acpi: Add missing IRQF_ONESHOT
  gpio: fix gpio-device list corruption
  gpio: fix NULL-deref-on-deregistration regression
  selftests: gpio: update .gitignore
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 7f5912a..9e8f0cc 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -171,7 +171,7 @@
 #include <asm-generic/memory_model.h>
 #endif
 
-#define virt_addr_valid(kaddr)	((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
+#define virt_addr_valid(kaddr)	((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
 #define pfn_valid(pfn)		virt_addr_valid(pfn_to_virt(pfn))
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 6bbe520..8d0f862 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -30,8 +30,8 @@
 #define page_to_pfn(page)	virt_to_pfn(page_to_virt(page))
 #define pfn_valid(pfn)	        ((pfn) < max_mapnr)
 
-#define	virt_addr_valid(kaddr)	(((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
-				((void *)(kaddr) < (void *)memory_end))
+#define	virt_addr_valid(kaddr)	(((unsigned long)(kaddr) >= PAGE_OFFSET) && \
+				((unsigned long)(kaddr) < memory_end))
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index e3946b0..3d70d15 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -14,6 +14,7 @@
 
 #include <asm/addrspace.h>
 #include <asm/unaligned.h>
+#include <asm-generic/vmlinux.lds.h>
 
 /*
  * These two variables specify the free mem region
@@ -120,6 +121,13 @@
 		/* last four bytes is always image size in little endian */
 		image_size = get_unaligned_le32((void *)&__image_end - 4);
 
+		/* The device tree's address must be properly aligned  */
+		image_size = ALIGN(image_size, STRUCT_ALIGNMENT);
+
+		puts("Copy device tree to address  ");
+		puthex(VMLINUX_LOAD_ADDRESS_ULL + image_size);
+		puts("\n");
+
 		/* copy dtb to where the booted kernel will expect it */
 		memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
 		       __appended_dtb, dtb_size);
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index 8e1deaf..5e4105c 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -12,8 +12,8 @@
 obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
 poly1305-mips-y := poly1305-core.o poly1305-glue.o
 
-perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
-perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+perlasm-flavour-$(CONFIG_32BIT) := o32
+perlasm-flavour-$(CONFIG_64BIT) := 64
 
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index 6aa8f126a..b710e76 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -24,8 +24,11 @@
 extern void (*board_cache_error_setup)(void);
 
 extern int register_nmi_notifier(struct notifier_block *nb);
+extern void reserve_exception_space(phys_addr_t addr, unsigned long size);
 extern char except_vec_nmi[];
 
+#define VECTORSPACING 0x100	/* for EI/VI mode */
+
 #define nmi_notifier(fn, pri)						\
 ({									\
 	static struct notifier_block fn##_nb = {			\
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 9a89637..b718920 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -26,6 +26,7 @@
 #include <asm/elf.h>
 #include <asm/pgtable-bits.h>
 #include <asm/spram.h>
+#include <asm/traps.h>
 #include <linux/uaccess.h>
 
 #include "fpu-probe.h"
@@ -1628,6 +1629,7 @@
 		c->cputype = CPU_BMIPS3300;
 		__cpu_name[cpu] = "Broadcom BMIPS3300";
 		set_elf_platform(cpu, "bmips3300");
+		reserve_exception_space(0x400, VECTORSPACING * 64);
 		break;
 	case PRID_IMP_BMIPS43XX: {
 		int rev = c->processor_id & PRID_REV_MASK;
@@ -1638,6 +1640,7 @@
 			__cpu_name[cpu] = "Broadcom BMIPS4380";
 			set_elf_platform(cpu, "bmips4380");
 			c->options |= MIPS_CPU_RIXI;
+			reserve_exception_space(0x400, VECTORSPACING * 64);
 		} else {
 			c->cputype = CPU_BMIPS4350;
 			__cpu_name[cpu] = "Broadcom BMIPS4350";
@@ -1654,6 +1657,7 @@
 			__cpu_name[cpu] = "Broadcom BMIPS5000";
 		set_elf_platform(cpu, "bmips5000");
 		c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI;
+		reserve_exception_space(0x1000, VECTORSPACING * 64);
 		break;
 	}
 }
@@ -2133,6 +2137,8 @@
 	if (cpu == 0)
 		__ua_limit = ~((1ull << cpu_vmbits) - 1);
 #endif
+
+	reserve_exception_space(0, 0x1000);
 }
 
 void cpu_report(void)
diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c
index abdbbe8..af65477 100644
--- a/arch/mips/kernel/cpu-r3k-probe.c
+++ b/arch/mips/kernel/cpu-r3k-probe.c
@@ -21,6 +21,7 @@
 #include <asm/fpu.h>
 #include <asm/mipsregs.h>
 #include <asm/elf.h>
+#include <asm/traps.h>
 
 #include "fpu-probe.h"
 
@@ -158,6 +159,8 @@
 		cpu_set_fpu_opts(c);
 	else
 		cpu_set_nofpu_opts(c);
+
+	reserve_exception_space(0, 0x400);
 }
 
 void cpu_report(void)
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e035295..808b8b6 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2009,13 +2009,16 @@
 	nmi_exit();
 }
 
-#define VECTORSPACING 0x100	/* for EI/VI mode */
-
 unsigned long ebase;
 EXPORT_SYMBOL_GPL(ebase);
 unsigned long exception_handlers[32];
 unsigned long vi_handlers[64];
 
+void reserve_exception_space(phys_addr_t addr, unsigned long size)
+{
+	memblock_reserve(addr, size);
+}
+
 void __init *set_except_vector(int n, void *addr)
 {
 	unsigned long handler = (unsigned long) addr;
@@ -2367,10 +2370,7 @@
 
 	if (!cpu_has_mips_r2_r6) {
 		ebase = CAC_BASE;
-		ebase_pa = virt_to_phys((void *)ebase);
 		vec_size = 0x400;
-
-		memblock_reserve(ebase_pa, vec_size);
 	} else {
 		if (cpu_has_veic || cpu_has_vint)
 			vec_size = 0x200 + VECTORSPACING*64;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index c1c345b..1234834 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -145,6 +145,7 @@
 	}
 
 #ifdef CONFIG_MIPS_ELF_APPENDED_DTB
+	STRUCT_ALIGN();
 	.appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) {
 		*(.appended_dtb)
 		KEEP(*(.appended_dtb))
@@ -172,6 +173,11 @@
 #endif
 
 #ifdef CONFIG_MIPS_RAW_APPENDED_DTB
+	.fill : {
+		FILL(0);
+		BYTE(0);
+		. = ALIGN(8);
+	}
 	__appended_dtb = .;
 	/* leave space for appended DTB */
 	. += 0x100000;
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 4e53ac4..afc3b8d 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -203,9 +203,12 @@
 	def_bool y
 	depends on PA8X00 || PA7200
 
+config PARISC_HUGE_KERNEL
+	def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST
+
 config MLONGCALLS
-	def_bool y if !MODULES || UBSAN || FTRACE
-	bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE
+	def_bool y if PARISC_HUGE_KERNEL
+	bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL
 	depends on PA8X00
 	help
 	  If you configure the kernel to include many drivers built-in instead
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 2127974..65de6c4 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -567,8 +567,6 @@
 };
 
 #ifdef CONFIG_64BIT
-#include <linux/compat.h>
-
 static int gpr32_get(struct task_struct *target,
 		     const struct user_regset *regset,
 		     struct membuf to)
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 7141cce..a920599 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -53,8 +53,8 @@
 #define mfdcr(rn)						\
 	({unsigned int rval;					\
 	if (__builtin_constant_p(rn) && rn < 1024)		\
-		asm volatile("mfdcr %0," __stringify(rn)	\
-		              : "=r" (rval));			\
+		asm volatile("mfdcr %0, %1" : "=r" (rval)	\
+			      : "n" (rn));			\
 	else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR)))	\
 		rval = mfdcrx(rn);				\
 	else							\
@@ -64,8 +64,8 @@
 #define mtdcr(rn, v)						\
 do {								\
 	if (__builtin_constant_p(rn) && rn < 1024)		\
-		asm volatile("mtdcr " __stringify(rn) ",%0"	\
-			      : : "r" (v)); 			\
+		asm volatile("mtdcr %0, %1"			\
+			      : : "n" (rn), "r" (v));		\
 	else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR)))	\
 		mtdcrx(rn, v);					\
 	else							\
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 80b27f5..607168b 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -228,7 +228,7 @@
 #define MMU_FTRS_ALWAYS		0
 #endif
 
-static inline bool early_mmu_has_feature(unsigned long feature)
+static __always_inline bool early_mmu_has_feature(unsigned long feature)
 {
 	if (MMU_FTRS_ALWAYS & feature)
 		return true;
@@ -286,7 +286,7 @@
 
 }
 
-static inline bool mmu_has_feature(unsigned long feature)
+static __always_inline bool mmu_has_feature(unsigned long feature)
 {
 	return early_mmu_has_feature(feature);
 }
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 0cf5274..721c0d6 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -113,7 +113,7 @@
 	const char *name;
 	const struct vio_device_id *id_table;
 	int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
-	int (*remove)(struct vio_dev *dev);
+	void (*remove)(struct vio_dev *dev);
 	/* A driver must have a get_desired_dma() function to
 	 * be loaded in a CMO environment if it uses DMA.
 	 */
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 727fdab..565e84e 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -457,11 +457,12 @@
 	cmplw	0,r1,r3
 #endif
 	mfspr	r2, SPRN_SDR1
-	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
 	rlwinm	r2, r2, 28, 0xfffff000
 #ifdef CONFIG_MODULES
 	bgt-	112f
 	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	li	r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
 	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 #endif
 112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
@@ -520,10 +521,11 @@
 	lis	r1, TASK_SIZE@h		/* check if kernel address */
 	cmplw	0,r1,r3
 	mfspr	r2, SPRN_SDR1
-	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED
+	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
 	rlwinm	r2, r2, 28, 0xfffff000
 	bgt-	112f
 	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	li	r1, _PAGE_PRESENT | _PAGE_ACCESSED
 	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
 	lwz	r2,0(r2)		/* get pmd entry */
@@ -597,10 +599,11 @@
 	lis	r1, TASK_SIZE@h		/* check if kernel address */
 	cmplw	0,r1,r3
 	mfspr	r2, SPRN_SDR1
-	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
 	rlwinm	r2, r2, 28, 0xfffff000
 	bgt-	112f
 	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
+	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
 	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
 	lwz	r2,0(r2)		/* get pmd entry */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 398cd86..2ef3c40 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -149,7 +149,7 @@
  * enabled when the interrupt handler returns (indicating a process-context /
  * synchronous interrupt) then irqs_enabled should be true.
  */
-static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri)
+static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
 {
 	/* This must be done with RI=1 because tracing may touch vmaps */
 	trace_hardirqs_on();
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index bb5c20d..c6aebc1 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -904,7 +904,7 @@
 	if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
 		return -EFAULT;
 
-	nr_vsx_regs = size / sizeof(__vector128);
+	nr_vsx_regs = max(1ul, size / sizeof(__vector128));
 	emulate_vsx_load(op, buf, mem, cross_endian);
 	preempt_disable();
 	if (reg < 32) {
@@ -951,7 +951,7 @@
 	if (!address_ok(regs, ea, size))
 		return -EFAULT;
 
-	nr_vsx_regs = size / sizeof(__vector128);
+	nr_vsx_regs = max(1ul, size / sizeof(__vector128));
 	preempt_disable();
 	if (reg < 32) {
 		/* FP regs + extensions */
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6817331..766f064 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -222,7 +222,7 @@
 	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
 		*addrp = mfspr(SPRN_SDAR);
 
-	if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
+	if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
 		*addrp = 0;
 }
 
@@ -507,7 +507,7 @@
 			 * addresses, hence include a check before filtering code
 			 */
 			if (!(ppmu->flags & PPMU_ARCH_31) &&
-				is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+			    is_kernel_addr(addr) && event->attr.exclude_kernel)
 				continue;
 
 			/* Branches are read most recent first (ie. mfbhrb 0 is
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index b3ac245..6373003 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -4,6 +4,7 @@
  * Copyright 2006-2007 Michael Ellerman, IBM Corp.
  */
 
+#include <linux/crash_dump.h>
 #include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
@@ -458,8 +459,28 @@
 			return hwirq;
 		}
 
-		virq = irq_create_mapping_affinity(NULL, hwirq,
-						   entry->affinity);
+		/*
+		 * Depending on the number of online CPUs in the original
+		 * kernel, it is likely for CPU #0 to be offline in a kdump
+		 * kernel. The associated IRQs in the affinity mappings
+		 * provided by irq_create_affinity_masks() are thus not
+		 * started by irq_startup(), as per-design for managed IRQs.
+		 * This can be a problem with multi-queue block devices driven
+		 * by blk-mq : such a non-started IRQ is very likely paired
+		 * with the single queue enforced by blk-mq during kdump (see
+		 * blk_mq_alloc_tag_set()). This causes the device to remain
+		 * silent and likely hangs the guest at some point.
+		 *
+		 * We don't really care for fine-grained affinity when doing
+		 * kdump actually : simply ignore the pre-computed affinity
+		 * masks in this case and let the default mask with all CPUs
+		 * be used when creating the IRQ mappings.
+		 */
+		if (is_kdump_kernel())
+			virq = irq_create_mapping(NULL, hwirq);
+		else
+			virq = irq_create_mapping_affinity(NULL, hwirq,
+							   entry->affinity);
 
 		if (!virq) {
 			pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index b2797cf..9cb4fc8 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1261,7 +1261,6 @@
 	struct vio_dev *viodev = to_vio_dev(dev);
 	struct vio_driver *viodrv = to_vio_driver(dev->driver);
 	struct device *devptr;
-	int ret = 1;
 
 	/*
 	 * Hold a reference to the device after the remove function is called
@@ -1270,13 +1269,13 @@
 	devptr = get_device(dev);
 
 	if (viodrv->remove)
-		ret = viodrv->remove(viodev);
+		viodrv->remove(viodev);
 
-	if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+	if (firmware_has_feature(FW_FEATURE_CMO))
 		vio_cmo_bus_remove(viodev);
 
 	put_device(devptr);
-	return ret;
+	return 0;
 }
 
 /**
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 7e078bc..8fb09ee 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -8,7 +8,6 @@
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
-#include <asm/extable_64.h>
 #include <asm/spitfire.h>
 #include <asm/adi.h>
 
diff --git a/arch/sparc/include/asm/extable_64.h b/arch/sparc/include/asm/extable.h
similarity index 92%
rename from arch/sparc/include/asm/extable_64.h
rename to arch/sparc/include/asm/extable.h
index 5a01719..554a9dc 100644
--- a/arch/sparc/include/asm/extable_64.h
+++ b/arch/sparc/include/asm/extable.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_EXTABLE64_H
-#define __ASM_EXTABLE64_H
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
 /*
  * The exception table consists of pairs of addresses: the first is the
  * address of an instruction that is allowed to fault, and the second is
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index 3c4bc21..b6242f7 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -50,16 +50,12 @@
 	unsigned long   fsr;
 	unsigned long   fpqdepth;
 	struct fpq	fpqueue[16];
-	unsigned long flags;
 	mm_segment_t current_ds;
 };
 
-#define SPARC_FLAG_KTHREAD      0x1    /* task is a kernel thread */
-#define SPARC_FLAG_UNALIGNED    0x2    /* is allowed to do unaligned accesses */
-
 #define INIT_THREAD  { \
-	.flags = SPARC_FLAG_KTHREAD, \
 	.current_ds = KERNEL_DS, \
+	.kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \
 }
 
 /* Do necessary setup to start up a newly executed thread. */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 42cd4cd..8047a9c 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -118,6 +118,7 @@
 	.task		=	&tsk,			\
 	.current_ds	=	ASI_P,			\
 	.preempt_count	=	INIT_PREEMPT_COUNT,	\
+	.kregs		=	(struct pt_regs *)(init_stack+THREAD_SIZE)-1 \
 }
 
 /* how to get the thread information struct from C */
diff --git a/arch/sparc/include/asm/uaccess.h b/arch/sparc/include/asm/uaccess.h
index dd85bc2..3900942 100644
--- a/arch/sparc/include/asm/uaccess.h
+++ b/arch/sparc/include/asm/uaccess.h
@@ -1,6 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef ___ASM_SPARC_UACCESS_H
 #define ___ASM_SPARC_UACCESS_H
+
+#include <asm/extable.h>
+
 #if defined(__sparc__) && defined(__arch64__)
 #include <asm/uaccess_64.h>
 #else
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 0a2d3eb..4a12346 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -13,9 +13,6 @@
 
 #include <asm/processor.h>
 
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
-
 /* Sparc is not segmented, however we need to be able to fool access_ok()
  * when doing system calls from kernel mode legitimately.
  *
@@ -40,36 +37,6 @@
 #define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size)))
 #define access_ok(addr, size) __access_ok((unsigned long)(addr), size)
 
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- *
- * There is a special way how to put a range of potentially faulting
- * insns (like twenty ldd/std's with now intervening other instructions)
- * You specify address of first in insn and 0 in fixup and in the next
- * exception_table_entry you specify last potentially faulting insn + 1
- * and in fixup the routine which should handle the fault.
- * That fixup code will get
- * (faulting_insn_address - first_insn_in_the_range_address)/4
- * in %g2 (ie. index of the faulting instruction in the range).
- */
-
-struct exception_table_entry
-{
-        unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise.  */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2);
-
 /* Uh, these should become the main single-value transfer routines..
  * They automatically use the right size if we just have the right
  * pointer type..
@@ -252,12 +219,7 @@
 	unsigned long ret;
 
 	__asm__ __volatile__ (
-		".section __ex_table,#alloc\n\t"
-		".align 4\n\t"
-		".word 1f,3\n\t"
-		".previous\n\t"
 		"mov %2, %%o1\n"
-		"1:\n\t"
 		"call __bzero\n\t"
 		" mov %1, %%o0\n\t"
 		"mov %%o0, %0\n"
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 698cf69..30eb4c6 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -10,7 +10,6 @@
 #include <linux/string.h>
 #include <asm/asi.h>
 #include <asm/spitfire.h>
-#include <asm/extable_64.h>
 
 #include <asm/processor.h>
 
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index be30c8d..6044b82 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -515,7 +515,7 @@
 
 		/* I want a kernel stack NOW! */
 		set	init_thread_union, %g1
-		set	(THREAD_SIZE - STACKFRAME_SZ), %g2
+		set	(THREAD_SIZE - STACKFRAME_SZ - TRACEREG_SZ), %g2
 		add	%g1, %g2, %sp
 		mov	0, %fp			/* And for good luck */
 
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index c5ff247..72a5bdc 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -706,7 +706,7 @@
 	wr	%g0, ASI_P, %asi
 	mov	1, %g1
 	sllx	%g1, THREAD_SHIFT, %g1
-	sub	%g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+	sub	%g1, (STACKFRAME_SZ + STACK_BIAS + TRACEREG_SZ), %g1
 	add	%g6, %g1, %sp
 
 	/* Set per-cpu pointer initially to zero, this makes
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index b91e880..3b97949 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -216,16 +216,6 @@
 		clear_thread_flag(TIF_USEDFPU);
 #endif
 	}
-
-	/* This task is no longer a kernel thread. */
-	if (current->thread.flags & SPARC_FLAG_KTHREAD) {
-		current->thread.flags &= ~SPARC_FLAG_KTHREAD;
-
-		/* We must fixup kregs as well. */
-		/* XXX This was not fixed for ti for a while, worked. Unused? */
-		current->thread.kregs = (struct pt_regs *)
-		    (task_stack_page(current) + (THREAD_SIZE - TRACEREG_SZ));
-	}
 }
 
 static inline struct sparc_stackf __user *
@@ -313,7 +303,6 @@
 		extern int nwindows;
 		unsigned long psr;
 		memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
-		p->thread.flags |= SPARC_FLAG_KTHREAD;
 		p->thread.current_ds = KERNEL_DS;
 		ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
 		childregs->u_regs[UREG_G1] = sp; /* function */
@@ -325,7 +314,6 @@
 	}
 	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
 	childregs->u_regs[UREG_FP] = sp;
-	p->thread.flags &= ~SPARC_FLAG_KTHREAD;
 	p->thread.current_ds = USER_DS;
 	ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
 	ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index eea43a1..c8e0dd9 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -266,7 +266,6 @@
 }
 
 struct tt_entry *sparc_ttable;
-static struct pt_regs fake_swapper_regs;
 
 /* Called from head_32.S - before we have setup anything
  * in the kernel. Be very careful with what you do here.
@@ -363,8 +362,6 @@
 		(*(linux_dbvec->teach_debugger))();
 	}
 
-	init_task.thread.kregs = &fake_swapper_regs;
-
 	/* Run-time patch instructions to match the cpu model */
 	per_cpu_patch();
 
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index d872441..48abee4 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -165,8 +165,6 @@
 
 char reboot_command[COMMAND_LINE_SIZE];
 
-static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
-
 static void __init per_cpu_patch(void)
 {
 	struct cpuid_patch_entry *p;
@@ -661,8 +659,6 @@
 	rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
 #endif
 
-	task_thread_info(&init_task)->kregs = &fake_swapper_regs;
-
 #ifdef CONFIG_IP_PNP
 	if (!ic_set_manually) {
 		phandle chosen = prom_finddevice("/chosen");
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 83db94c..ef5c520 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <linux/perf_event.h>
+#include <linux/extable.h>
 
 #include <asm/setup.h>
 
@@ -213,10 +214,10 @@
 
 static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
 {
-	unsigned long g2 = regs->u_regs [UREG_G2];
-	unsigned long fixup = search_extables_range(regs->pc, &g2);
+	const struct exception_table_entry *entry;
 
-	if (!fixup) {
+	entry = search_exception_tables(regs->pc);
+	if (!entry) {
 		unsigned long address = compute_effective_address(regs, insn);
         	if(address < PAGE_SIZE) {
                 	printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler");
@@ -232,9 +233,8 @@
 	        die_if_kernel("Oops", regs);
 		/* Not reached */
 	}
-	regs->pc = fixup;
+	regs->pc = entry->fixup;
 	regs->npc = regs->pc + 4;
-	regs->u_regs [UREG_G2] = g2;
 }
 
 asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
@@ -274,103 +274,9 @@
 	}
 }
 
-static inline int ok_for_user(struct pt_regs *regs, unsigned int insn,
-			      enum direction dir)
-{
-	unsigned int reg;
-	int size = ((insn >> 19) & 3) == 3 ? 8 : 4;
-
-	if ((regs->pc | regs->npc) & 3)
-		return 0;
-
-	/* Must access_ok() in all the necessary places. */
-#define WINREG_ADDR(regnum) \
-	((void __user *)(((unsigned long *)regs->u_regs[UREG_FP])+(regnum)))
-
-	reg = (insn >> 25) & 0x1f;
-	if (reg >= 16) {
-		if (!access_ok(WINREG_ADDR(reg - 16), size))
-			return -EFAULT;
-	}
-	reg = (insn >> 14) & 0x1f;
-	if (reg >= 16) {
-		if (!access_ok(WINREG_ADDR(reg - 16), size))
-			return -EFAULT;
-	}
-	if (!(insn & 0x2000)) {
-		reg = (insn & 0x1f);
-		if (reg >= 16) {
-			if (!access_ok(WINREG_ADDR(reg - 16), size))
-				return -EFAULT;
-		}
-	}
-#undef WINREG_ADDR
-	return 0;
-}
-
-static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
+asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 {
 	send_sig_fault(SIGBUS, BUS_ADRALN,
 		       (void __user *)safe_compute_effective_address(regs, insn),
 		       0, current);
 }
-
-asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
-{
-	enum direction dir;
-
-	if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) ||
-	   (((insn >> 30) & 3) != 3))
-		goto kill_user;
-	dir = decode_direction(insn);
-	if(!ok_for_user(regs, insn, dir)) {
-		goto kill_user;
-	} else {
-		int err, size = decode_access_size(insn);
-		unsigned long addr;
-
-		if(floating_point_load_or_store_p(insn)) {
-			printk("User FPU load/store unaligned unsupported.\n");
-			goto kill_user;
-		}
-
-		addr = compute_effective_address(regs, insn);
-		perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
-		switch(dir) {
-		case load:
-			err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
-							 regs),
-					  size, (unsigned long *) addr,
-					  decode_signedness(insn));
-			break;
-
-		case store:
-			err = do_int_store(((insn>>25)&0x1f), size,
-					   (unsigned long *) addr, regs);
-			break;
-
-		case both:
-			/*
-			 * This was supported in 2.4. However, we question
-			 * the value of SWAP instruction across word boundaries.
-			 */
-			printk("Unaligned SWAP unsupported.\n");
-			err = -EFAULT;
-			break;
-
-		default:
-			unaligned_panic("Impossible user unaligned trap.");
-			goto out;
-		}
-		if (err)
-			goto kill_user;
-		else
-			advance(regs);
-		goto out;
-	}
-
-kill_user:
-	user_mna_trap_fault(regs, insn);
-out:
-	;
-}
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 7488d13..781e39b 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -155,13 +155,6 @@
         .text;                                  \
         .align  4
 
-#define EXT(start,end)				\
-        .section __ex_table,ALLOC;		\
-        .align  4;                              \
-        .word   start, 0, end, cc_fault;         \
-        .text;                                  \
-        .align  4
-
 	/* This aligned version executes typically in 8.5 superscalar cycles, this
 	 * is the best I can do.  I say 8.5 because the final add will pair with
 	 * the next ldd in the main unrolled loop.  Thus the pipe is always full.
@@ -169,20 +162,20 @@
 	 * please check the fixup code below as well.
 	 */
 #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
-	ldd	[src + off + 0x00], t0;							\
-	ldd	[src + off + 0x08], t2;							\
+	EX(ldd	[src + off + 0x00], t0);						\
+	EX(ldd	[src + off + 0x08], t2);						\
 	addxcc	t0, sum, sum;								\
-	ldd	[src + off + 0x10], t4;							\
+	EX(ldd	[src + off + 0x10], t4);						\
 	addxcc	t1, sum, sum;								\
-	ldd	[src + off + 0x18], t6;							\
+	EX(ldd	[src + off + 0x18], t6);						\
 	addxcc	t2, sum, sum;								\
-	std	t0, [dst + off + 0x00];							\
+	EX(std	t0, [dst + off + 0x00]);						\
 	addxcc	t3, sum, sum;								\
-	std	t2, [dst + off + 0x08];							\
+	EX(std	t2, [dst + off + 0x08]);						\
 	addxcc	t4, sum, sum;								\
-	std	t4, [dst + off + 0x10];							\
+	EX(std	t4, [dst + off + 0x10]);						\
 	addxcc	t5, sum, sum;								\
-	std	t6, [dst + off + 0x18];							\
+	EX(std	t6, [dst + off + 0x18]);						\
 	addxcc	t6, sum, sum;								\
 	addxcc	t7, sum, sum;
 
@@ -191,39 +184,39 @@
 	 * Viking MXCC into streaming mode.  Ho hum...
 	 */
 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)	\
-	ldd	[src + off + 0x00], t0;						\
-	ldd	[src + off + 0x08], t2;						\
-	ldd	[src + off + 0x10], t4;						\
-	ldd	[src + off + 0x18], t6;						\
-	st	t0, [dst + off + 0x00];						\
+	EX(ldd	[src + off + 0x00], t0);					\
+	EX(ldd	[src + off + 0x08], t2);					\
+	EX(ldd	[src + off + 0x10], t4);					\
+	EX(ldd	[src + off + 0x18], t6);					\
+	EX(st	t0, [dst + off + 0x00]);					\
 	addxcc	t0, sum, sum;							\
-	st	t1, [dst + off + 0x04];						\
+	EX(st	t1, [dst + off + 0x04]);					\
 	addxcc	t1, sum, sum;							\
-	st	t2, [dst + off + 0x08];						\
+	EX(st	t2, [dst + off + 0x08]);					\
 	addxcc	t2, sum, sum;							\
-	st	t3, [dst + off + 0x0c];						\
+	EX(st	t3, [dst + off + 0x0c]);					\
 	addxcc	t3, sum, sum;							\
-	st	t4, [dst + off + 0x10];						\
+	EX(st	t4, [dst + off + 0x10]);					\
 	addxcc	t4, sum, sum;							\
-	st	t5, [dst + off + 0x14];						\
+	EX(st	t5, [dst + off + 0x14]);					\
 	addxcc	t5, sum, sum;							\
-	st	t6, [dst + off + 0x18];						\
+	EX(st	t6, [dst + off + 0x18]);					\
 	addxcc	t6, sum, sum;							\
-	st	t7, [dst + off + 0x1c];						\
+	EX(st	t7, [dst + off + 0x1c]);					\
 	addxcc	t7, sum, sum;
 
 	/* Yuck, 6 superscalar cycles... */
 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3)	\
-	ldd	[src - off - 0x08], t0;				\
-	ldd	[src - off - 0x00], t2;				\
+	EX(ldd	[src - off - 0x08], t0);			\
+	EX(ldd	[src - off - 0x00], t2);			\
 	addxcc	t0, sum, sum;					\
-	st	t0, [dst - off - 0x08];				\
+	EX(st	t0, [dst - off - 0x08]);			\
 	addxcc	t1, sum, sum;					\
-	st	t1, [dst - off - 0x04];				\
+	EX(st	t1, [dst - off - 0x04]);			\
 	addxcc	t2, sum, sum;					\
-	st	t2, [dst - off - 0x00];				\
+	EX(st	t2, [dst - off - 0x00]);			\
 	addxcc	t3, sum, sum;					\
-	st	t3, [dst - off + 0x04];
+	EX(st	t3, [dst - off + 0x04]);
 
 	/* Handle the end cruft code out of band for better cache patterns. */
 cc_end_cruft:
@@ -331,7 +324,6 @@
 	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
 	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
 	CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-10:	EXT(5b, 10b)			! note for exception handling
 	sub	%g1, 128, %g1		! detract from length
 	addx	%g0, %g7, %g7		! add in last carry bit
 	andcc	%g1, 0xffffff80, %g0	! more to csum?
@@ -356,8 +348,7 @@
 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
 	CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
-12:	EXT(cctbl, 12b)			! note for exception table handling
-	addx	%g0, %g7, %g7
+12:	addx	%g0, %g7, %g7
 	andcc	%o3, 0xf, %g0		! check for low bits set
 ccte:	bne	cc_end_cruft		! something left, handle it out of band
 	 andcc	%o3, 8, %g0		! begin checks for that code
@@ -367,7 +358,6 @@
 	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
 	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
 	CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-11:	EXT(ccdbl, 11b)			! note for exception table handling
 	sub	%g1, 128, %g1		! detract from length
 	addx	%g0, %g7, %g7		! add in last carry bit
 	andcc	%g1, 0xffffff80, %g0	! more to csum?
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
index dc72f2b..954572c 100644
--- a/arch/sparc/lib/copy_user.S
+++ b/arch/sparc/lib/copy_user.S
@@ -21,98 +21,134 @@
 /* Work around cpp -rob */
 #define ALLOC #alloc
 #define EXECINSTR #execinstr
+
+#define EX_ENTRY(l1, l2)			\
+	.section __ex_table,ALLOC;		\
+	.align	4;				\
+	.word	l1, l2;				\
+	.text;
+
 #define EX(x,y,a,b) 				\
 98: 	x,y;					\
 	.section .fixup,ALLOC,EXECINSTR;	\
 	.align	4;				\
-99:	ba fixupretl;				\
-	 a, b, %g3;				\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 99b;			\
-	.text;					\
-	.align	4
+99:	retl;					\
+	 a, b, %o0;				\
+	EX_ENTRY(98b, 99b)
 
 #define EX2(x,y,c,d,e,a,b) 			\
 98: 	x,y;					\
 	.section .fixup,ALLOC,EXECINSTR;	\
 	.align	4;				\
 99:	c, d, e;				\
-	ba fixupretl;				\
-	 a, b, %g3;				\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 99b;			\
-	.text;					\
-	.align	4
+	retl;					\
+	 a, b, %o0;				\
+	EX_ENTRY(98b, 99b)
 
 #define EXO2(x,y) 				\
 98: 	x, y;					\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	98b, 97f;			\
-	.text;					\
-	.align	4
+	EX_ENTRY(98b, 97f)
 
-#define EXT(start,end,handler)			\
-	.section __ex_table,ALLOC;		\
-	.align	4;				\
-	.word	start, 0, end, handler;		\
-	.text;					\
-	.align	4
+#define LD(insn, src, offset, reg, label)	\
+98:	insn [%src + (offset)], %reg;		\
+	.section .fixup,ALLOC,EXECINSTR;	\
+99:	ba	label;				\
+	 mov	offset, %g5;			\
+	EX_ENTRY(98b, 99b)
 
-/* Please do not change following macros unless you change logic used
- * in .fixup at the end of this file as well
- */
+#define ST(insn, dst, offset, reg, label)	\
+98:	insn %reg, [%dst + (offset)];		\
+	.section .fixup,ALLOC,EXECINSTR;	\
+99:	ba	label;				\
+	 mov	offset, %g5;			\
+	EX_ENTRY(98b, 99b)
 
 /* Both these macros have to start with exactly the same insn */
+/* left: g7 + (g1 % 128) - offset */
 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[%src + (offset) + 0x00], %t0; \
-	ldd	[%src + (offset) + 0x08], %t2; \
-	ldd	[%src + (offset) + 0x10], %t4; \
-	ldd	[%src + (offset) + 0x18], %t6; \
-	st	%t0, [%dst + (offset) + 0x00]; \
-	st	%t1, [%dst + (offset) + 0x04]; \
-	st	%t2, [%dst + (offset) + 0x08]; \
-	st	%t3, [%dst + (offset) + 0x0c]; \
-	st	%t4, [%dst + (offset) + 0x10]; \
-	st	%t5, [%dst + (offset) + 0x14]; \
-	st	%t6, [%dst + (offset) + 0x18]; \
-	st	%t7, [%dst + (offset) + 0x1c];
+	LD(ldd, src, offset + 0x00, t0, bigchunk_fault)	\
+	LD(ldd, src, offset + 0x08, t2, bigchunk_fault)	\
+	LD(ldd, src, offset + 0x10, t4, bigchunk_fault)	\
+	LD(ldd, src, offset + 0x18, t6, bigchunk_fault)	\
+	ST(st, dst, offset + 0x00, t0, bigchunk_fault)	\
+	ST(st, dst, offset + 0x04, t1, bigchunk_fault)	\
+	ST(st, dst, offset + 0x08, t2, bigchunk_fault)	\
+	ST(st, dst, offset + 0x0c, t3, bigchunk_fault)	\
+	ST(st, dst, offset + 0x10, t4, bigchunk_fault)	\
+	ST(st, dst, offset + 0x14, t5, bigchunk_fault)	\
+	ST(st, dst, offset + 0x18, t6, bigchunk_fault)	\
+	ST(st, dst, offset + 0x1c, t7, bigchunk_fault)
 
+/* left: g7 + (g1 % 128) - offset */
 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-	ldd	[%src + (offset) + 0x00], %t0; \
-	ldd	[%src + (offset) + 0x08], %t2; \
-	ldd	[%src + (offset) + 0x10], %t4; \
-	ldd	[%src + (offset) + 0x18], %t6; \
-	std	%t0, [%dst + (offset) + 0x00]; \
-	std	%t2, [%dst + (offset) + 0x08]; \
-	std	%t4, [%dst + (offset) + 0x10]; \
-	std	%t6, [%dst + (offset) + 0x18];
+	LD(ldd, src, offset + 0x00, t0, bigchunk_fault)	\
+	LD(ldd, src, offset + 0x08, t2, bigchunk_fault)	\
+	LD(ldd, src, offset + 0x10, t4, bigchunk_fault)	\
+	LD(ldd, src, offset + 0x18, t6, bigchunk_fault)	\
+	ST(std, dst, offset + 0x00, t0, bigchunk_fault)	\
+	ST(std, dst, offset + 0x08, t2, bigchunk_fault)	\
+	ST(std, dst, offset + 0x10, t4, bigchunk_fault)	\
+	ST(std, dst, offset + 0x18, t6, bigchunk_fault)
 
+	.section .fixup,#alloc,#execinstr
+bigchunk_fault:
+	sub	%g7, %g5, %o0
+	and	%g1, 127, %g1
+	retl
+	 add	%o0, %g1, %o0
+
+/* left: offset + 16 + (g1 % 16) */
 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
-	ldd	[%src - (offset) - 0x10], %t0; \
-	ldd	[%src - (offset) - 0x08], %t2; \
-	st	%t0, [%dst - (offset) - 0x10]; \
-	st	%t1, [%dst - (offset) - 0x0c]; \
-	st	%t2, [%dst - (offset) - 0x08]; \
-	st	%t3, [%dst - (offset) - 0x04];
+	LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault)	\
+	LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault)	\
+	ST(st, dst, -(offset + 0x10), t0, lastchunk_fault)	\
+	ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault)	\
+	ST(st, dst, -(offset + 0x08), t2, lastchunk_fault)	\
+	ST(st, dst, -(offset + 0x04), t3, lastchunk_fault)
 
+	.section .fixup,#alloc,#execinstr
+lastchunk_fault:
+	and	%g1, 15, %g1
+	retl
+	 sub	%g1, %g5, %o0
+
+/* left: o3 + (o2 % 16) - offset */
 #define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
-	lduh	[%src + (offset) + 0x00], %t0; \
-	lduh	[%src + (offset) + 0x02], %t1; \
-	lduh	[%src + (offset) + 0x04], %t2; \
-	lduh	[%src + (offset) + 0x06], %t3; \
-	sth	%t0, [%dst + (offset) + 0x00]; \
-	sth	%t1, [%dst + (offset) + 0x02]; \
-	sth	%t2, [%dst + (offset) + 0x04]; \
-	sth	%t3, [%dst + (offset) + 0x06];
+	LD(lduh, src, offset + 0x00, t0, halfchunk_fault)	\
+	LD(lduh, src, offset + 0x02, t1, halfchunk_fault)	\
+	LD(lduh, src, offset + 0x04, t2, halfchunk_fault)	\
+	LD(lduh, src, offset + 0x06, t3, halfchunk_fault)	\
+	ST(sth, dst, offset + 0x00, t0, halfchunk_fault)	\
+	ST(sth, dst, offset + 0x02, t1, halfchunk_fault)	\
+	ST(sth, dst, offset + 0x04, t2, halfchunk_fault)	\
+	ST(sth, dst, offset + 0x06, t3, halfchunk_fault)
 
+/* left: o3 + (o2 % 16) + offset + 2 */
 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
-	ldub	[%src - (offset) - 0x02], %t0; \
-	ldub	[%src - (offset) - 0x01], %t1; \
-	stb	%t0, [%dst - (offset) - 0x02]; \
-	stb	%t1, [%dst - (offset) - 0x01];
+	LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault)	\
+	LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault)	\
+	ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault)	\
+	ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault)
+
+	.section .fixup,#alloc,#execinstr
+halfchunk_fault:
+	and	%o2, 15, %o2
+	sub	%o3, %g5, %o3
+	retl
+	 add	%o2, %o3, %o0
+
+/* left: offset + 2 + (o2 % 2) */
+#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \
+	LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault)	\
+	LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault)	\
+	ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault)	\
+	ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault)
+
+	.section .fixup,#alloc,#execinstr
+last_shortchunk_fault:
+	and	%o2, 1, %o2
+	retl
+	 sub	%o2, %g5, %o0
 
 	.text
 	.align	4
@@ -182,8 +218,6 @@
 	MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-80:
-	EXT(5b, 80b, 50f)
 	subcc	%g7, 128, %g7
 	add	%o1, 128, %o1
 	bne	5b
@@ -201,7 +235,6 @@
 	jmpl	%o5 + %lo(copy_user_table_end), %g0
 	 add	%o0, %g7, %o0
 
-copy_user_table:
 	MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
 	MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
 	MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
@@ -210,7 +243,6 @@
 	MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
 	MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
 copy_user_table_end:
-	EXT(copy_user_table, copy_user_table_end, 51f)
 	be	copy_user_last7
 	 andcc	%g1, 4, %g0
 
@@ -250,8 +282,6 @@
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
 	MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-81:
-	EXT(ldd_std, 81b, 52f)
 	subcc	%g7, 128, %g7
 	add	%o1, 128, %o1
 	bne	ldd_std
@@ -290,8 +320,6 @@
 10:
 	MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
 	MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
-82:
-	EXT(10b, 82b, 53f)
 	subcc	%o3, 0x10, %o3
 	add	%o1, 0x10, %o1
 	bne	10b
@@ -308,8 +336,6 @@
 	MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
 	MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
 	MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
-83:
-	EXT(byte_chunk, 83b, 54f)
 	subcc	%o3, 0x10, %o3
 	add	%o1, 0x10, %o1
 	bne	byte_chunk
@@ -325,16 +351,14 @@
 	add	%o1, %o3, %o1
 	jmpl	%o5 + %lo(short_table_end), %g0
 	 andcc	%o2, 1, %g0
-84:
-	MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
-	MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3)
+	MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3)
 short_table_end:
-	EXT(84b, short_table_end, 55f)
 	be	1f
 	 nop
 	EX(ldub	[%o1], %g2, add %g0, 1)
@@ -363,123 +387,8 @@
 	.section .fixup,#alloc,#execinstr
 	.align	4
 97:
-	mov	%o2, %g3
-fixupretl:
 	retl
-	 mov	%g3, %o0
-
-/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
-50:
-/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
- * happens. This is derived from the amount ldd reads, st stores, etc.
- * x = g2 % 12;
- * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
- * o0 += (g2 / 12) * 32;
- */
-	cmp	%g2, 12
-	add	%o0, %g7, %o0
-	bcs	1f
-	 cmp	%g2, 24
-	bcs	2f
-	 cmp	%g2, 36
-	bcs	3f
-	 nop
-	sub	%g2, 12, %g2
-	sub	%g7, 32, %g7
-3:	sub	%g2, 12, %g2
-	sub	%g7, 32, %g7
-2:	sub	%g2, 12, %g2
-	sub	%g7, 32, %g7
-1:	cmp	%g2, 4
-	bcs,a	60f
-	 clr	%g2
-	sub	%g2, 4, %g2
-	sll	%g2, 2, %g2
-60:	and	%g1, 0x7f, %g3
-	sub	%o0, %g7, %o0
-	add	%g3, %g7, %g3
-	ba	fixupretl
-	 sub	%g3, %g2, %g3
-51:
-/* i = 41 - g2; j = i % 6;
- * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
- * o0 -= (i / 6) * 16 + 16;
- */
-	neg	%g2
-	and	%g1, 0xf, %g1
-	add	%g2, 41, %g2
-	add	%o0, %g1, %o0
-1:	cmp	%g2, 6
-	bcs,a	2f
-	 cmp	%g2, 4
-	add	%g1, 16, %g1
-	b	1b
-	 sub	%g2, 6, %g2
-2:	bcc,a	2f
-	 mov	16, %g2
-	inc	%g2
-	sll	%g2, 2, %g2
-2:	add	%g1, %g2, %g3
-	ba	fixupretl
-	 sub	%o0, %g3, %o0
-52:
-/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
-   o0 += (g2 / 8) * 32 */
-	andn	%g2, 7, %g4
-	add	%o0, %g7, %o0
-	andcc	%g2, 4, %g0
-	and	%g2, 3, %g2
-	sll	%g4, 2, %g4
-	sll	%g2, 3, %g2
-	bne	60b
-	 sub	%g7, %g4, %g7
-	ba	60b
-	 clr	%g2
-53:
-/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
-   o0 += (g2 & 8) */
-	and	%g2, 3, %g4
-	andcc	%g2, 4, %g0
-	and	%g2, 8, %g2
-	sll	%g4, 1, %g4
-	be	1f
-	 add	%o0, %g2, %o0
-	add	%g2, %g4, %g2
-1:	and	%o2, 0xf, %g3
-	add	%g3, %o3, %g3
-	ba	fixupretl
-	 sub	%g3, %g2, %g3
-54:
-/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
-   o0 += (g2 / 4) * 2 */
-	srl	%g2, 2, %o4
-	and	%g2, 1, %o5
-	srl	%g2, 1, %g2
-	add	%o4, %o4, %o4
-	and	%o5, %g2, %o5
-	and	%o2, 0xf, %o2
-	add	%o0, %o4, %o0
-	sub	%o3, %o5, %o3
-	sub	%o2, %o4, %o2
-	ba	fixupretl
-	 add	%o2, %o3, %g3
-55:
-/* i = 27 - g2;
-   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
-   o0 -= i / 4 * 2 + 1 */
-	neg	%g2
-	and	%o2, 1, %o2
-	add	%g2, 27, %g2
-	srl	%g2, 2, %o5
-	andcc	%g2, 3, %g0
-	mov	1, %g2
-	add	%o5, %o5, %o5
-	be,a	1f
-	 clr	%g2
-1:	add	%g2, %o5, %g3
-	sub	%o0, %g3, %o0
-	ba	fixupretl
-	 add	%g3, %o2, %g3
+	 mov	%o2, %o0
 
 	.globl  __copy_user_end
 __copy_user_end:
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
index f427f34..eaff682 100644
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S
@@ -19,7 +19,7 @@
 98: 	x,y;					\
 	.section .fixup,ALLOC,EXECINSTR;	\
 	.align	4;				\
-99:	ba 30f;					\
+99:	retl;					\
 	 a, b, %o0;				\
 	.section __ex_table,ALLOC;		\
 	.align	4;				\
@@ -27,35 +27,44 @@
 	.text;					\
 	.align	4
 
-#define EXT(start,end,handler) 			\
+#define STORE(source, base, offset, n)		\
+98: 	std source, [base + offset + n];	\
+	.section .fixup,ALLOC,EXECINSTR;	\
+	.align	4;				\
+99:	ba 30f;					\
+	 sub %o3, n - offset, %o3;		\
 	.section __ex_table,ALLOC;		\
 	.align	4;				\
-	.word	start, 0, end, handler;		\
+	.word	98b, 99b;			\
 	.text;					\
-	.align	4
+	.align	4;
+
+#define STORE_LAST(source, base, offset, n)	\
+	EX(std source, [base - offset - n],	\
+	   add %o1, offset + n);
 
 /* Please don't change these macros, unless you change the logic
  * in the .fixup section below as well.
  * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
-#define ZERO_BIG_BLOCK(base, offset, source)    \
-	std	source, [base + offset + 0x00]; \
-	std	source, [base + offset + 0x08]; \
-	std	source, [base + offset + 0x10]; \
-	std	source, [base + offset + 0x18]; \
-	std	source, [base + offset + 0x20]; \
-	std	source, [base + offset + 0x28]; \
-	std	source, [base + offset + 0x30]; \
-	std	source, [base + offset + 0x38];
+#define ZERO_BIG_BLOCK(base, offset, source)	\
+	STORE(source, base, offset, 0x00);	\
+	STORE(source, base, offset, 0x08);	\
+	STORE(source, base, offset, 0x10);	\
+	STORE(source, base, offset, 0x18);	\
+	STORE(source, base, offset, 0x20);	\
+	STORE(source, base, offset, 0x28);	\
+	STORE(source, base, offset, 0x30);	\
+	STORE(source, base, offset, 0x38);
 
 #define ZERO_LAST_BLOCKS(base, offset, source)	\
-	std	source, [base - offset - 0x38]; \
-	std	source, [base - offset - 0x30]; \
-	std	source, [base - offset - 0x28]; \
-	std	source, [base - offset - 0x20]; \
-	std	source, [base - offset - 0x18]; \
-	std	source, [base - offset - 0x10]; \
-	std	source, [base - offset - 0x08]; \
-	std	source, [base - offset - 0x00];
+	STORE_LAST(source, base, offset, 0x38);	\
+	STORE_LAST(source, base, offset, 0x30); \
+	STORE_LAST(source, base, offset, 0x28);	\
+	STORE_LAST(source, base, offset, 0x20);	\
+	STORE_LAST(source, base, offset, 0x18);	\
+	STORE_LAST(source, base, offset, 0x10);	\
+	STORE_LAST(source, base, offset, 0x08);	\
+	STORE_LAST(source, base, offset, 0x00);
 
 	.text
 	.align 4
@@ -68,8 +77,6 @@
 	.globl	memset
 	EXPORT_SYMBOL(__bzero)
 	EXPORT_SYMBOL(memset)
-	.globl	__memset_start, __memset_end
-__memset_start:
 memset:
 	mov	%o0, %g1
 	mov	1, %g4
@@ -122,8 +129,6 @@
 	ZERO_BIG_BLOCK(%o0, 0x00, %g2)
 	subcc	%o3, 128, %o3
 	ZERO_BIG_BLOCK(%o0, 0x40, %g2)
-11:
-	EXT(10b, 11b, 20f)
 	bne	10b
 	 add	%o0, 128, %o0
 
@@ -138,11 +143,9 @@
 	jmp	%o4
 	 add	%o0, %o2, %o0
 
-12:
 	ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
 	ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
 13:
-	EXT(12b, 13b, 21f)
 	be	8f
 	 andcc	%o1, 4, %g0
 
@@ -182,37 +185,13 @@
 5:
 	retl
 	 clr	%o0
-__memset_end:
 
 	.section .fixup,#alloc,#execinstr
 	.align	4
-20:
-	cmp	%g2, 8
-	bleu	1f
-	 and	%o1, 0x7f, %o1
-	sub	%g2, 9, %g2
-	add	%o3, 64, %o3
-1:
-	sll	%g2, 3, %g2
-	add	%o3, %o1, %o0
-	b 30f
-	 sub	%o0, %g2, %o0
-21:
-	mov	8, %o0
-	and	%o1, 7, %o1
-	sub	%o0, %g2, %o0
-	sll	%o0, 3, %o0
-	b 30f
-	 add	%o0, %o1, %o0
 30:
-/* %o4 is faulting address, %o5 is %pc where fault occurred */
-	save	%sp, -104, %sp
-	mov	%i5, %o0
-	mov	%i7, %o1
-	call	lookup_fault
-	 mov	%i4, %o2
-	ret
-	 restore
+	and	%o1, 0x7f, %o1
+	retl
+	 add	%o3, %o1, %o0
 
 	.globl __bzero_end
 __bzero_end:
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 68db1f8..871354a 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -8,7 +8,7 @@
 obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o
 obj-y                   += fault_$(BITS).o
 obj-y                   += init_$(BITS).o
-obj-$(CONFIG_SPARC32)   += extable.o srmmu.o iommu.o io-unit.o
+obj-$(CONFIG_SPARC32)   += srmmu.o iommu.o io-unit.o
 obj-$(CONFIG_SPARC32)   += srmmu_access.o
 obj-$(CONFIG_SPARC32)   += hypersparc.o viking.o tsunami.o swift.o
 obj-$(CONFIG_SPARC32)   += leon_mm.o
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
deleted file mode 100644
index 241b406..0000000
--- a/arch/sparc/mm/extable.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/arch/sparc/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/extable.h>
-#include <linux/uaccess.h>
-
-void sort_extable(struct exception_table_entry *start,
-		  struct exception_table_entry *finish)
-{
-}
-
-/* Caller knows they are in a range if ret->fixup == 0 */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *base,
-	       const size_t num,
-	       unsigned long value)
-{
-	int i;
-
-	/* Single insn entries are encoded as:
-	 *	word 1:	insn address
-	 *	word 2:	fixup code address
-	 *
-	 * Range entries are encoded as:
-	 *	word 1: first insn address
-	 *	word 2: 0
-	 *	word 3: last insn address + 4 bytes
-	 *	word 4: fixup code address
-	 *
-	 * Deleted entries are encoded as:
-	 *	word 1: unused
-	 *	word 2: -1
-	 *
-	 * See asm/uaccess.h for more details.
-	 */
-
-	/* 1. Try to find an exact match. */
-	for (i = 0; i < num; i++) {
-		if (base[i].fixup == 0) {
-			/* A range entry, skip both parts. */
-			i++;
-			continue;
-		}
-
-		/* A deleted entry; see trim_init_extable */
-		if (base[i].fixup == -1)
-			continue;
-
-		if (base[i].insn == value)
-			return &base[i];
-	}
-
-	/* 2. Try to find a range match. */
-	for (i = 0; i < (num - 1); i++) {
-		if (base[i].fixup)
-			continue;
-
-		if (base[i].insn <= value && base[i + 1].insn > value)
-			return &base[i];
-
-		i++;
-	}
-
-        return NULL;
-}
-
-#ifdef CONFIG_MODULES
-/* We could memmove them around; easier to mark the trimmed ones. */
-void trim_init_extable(struct module *m)
-{
-	unsigned int i;
-	bool range;
-
-	for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
-		range = m->extable[i].fixup == 0;
-
-		if (within_module_init(m->extable[i].insn, m)) {
-			m->extable[i].fixup = -1;
-			if (range)
-				m->extable[i+1].fixup = -1;
-		}
-		if (range)
-			i++;
-	}
-}
-#endif /* CONFIG_MODULES */
-
-/* Special extable search, which handles ranges.  Returns fixup */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
-{
-	const struct exception_table_entry *entry;
-
-	entry = search_exception_tables(addr);
-	if (!entry)
-		return 0;
-
-	/* Inside range?  Fix g2 and return correct fixup */
-	if (!entry->fixup) {
-		*g2 = (addr - entry->insn) / 4;
-		return (entry + 1)->fixup;
-	}
-
-	return entry->fixup;
-}
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 40ce087..de2031c 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 
 #include <asm/page.h>
 #include <asm/openprom.h>
@@ -54,54 +55,6 @@
 	die_if_kernel("Oops", regs);
 }
 
-asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
-			    unsigned long address)
-{
-	struct pt_regs regs;
-	unsigned long g2;
-	unsigned int insn;
-	int i;
-
-	i = search_extables_range(ret_pc, &g2);
-	switch (i) {
-	case 3:
-		/* load & store will be handled by fixup */
-		return 3;
-
-	case 1:
-		/* store will be handled by fixup, load will bump out */
-		/* for _to_ macros */
-		insn = *((unsigned int *) pc);
-		if ((insn >> 21) & 1)
-			return 1;
-		break;
-
-	case 2:
-		/* load will be handled by fixup, store will bump out */
-		/* for _from_ macros */
-		insn = *((unsigned int *) pc);
-		if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15)
-			return 2;
-		break;
-
-	default:
-		break;
-	}
-
-	memset(&regs, 0, sizeof(regs));
-	regs.pc = pc;
-	regs.npc = pc + 4;
-	__asm__ __volatile__(
-		"rd %%psr, %0\n\t"
-		"nop\n\t"
-		"nop\n\t"
-		"nop\n" : "=r" (regs.psr));
-	unhandled_fault(address, current, &regs);
-
-	/* Not reached */
-	return 0;
-}
-
 static inline void
 show_signal_msg(struct pt_regs *regs, int sig, int code,
 		unsigned long address, struct task_struct *tsk)
@@ -162,8 +115,6 @@
 	struct vm_area_struct *vma;
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
-	unsigned int fixup;
-	unsigned long g2;
 	int from_user = !(regs->psr & PSR_PS);
 	int code;
 	vm_fault_t fault;
@@ -281,30 +232,19 @@
 
 	/* Is this in ex_table? */
 no_context:
-	g2 = regs->u_regs[UREG_G2];
 	if (!from_user) {
-		fixup = search_extables_range(regs->pc, &g2);
-		/* Values below 10 are reserved for other things */
-		if (fixup > 10) {
-			extern const unsigned int __memset_start[];
-			extern const unsigned int __memset_end[];
+		const struct exception_table_entry *entry;
 
+		entry = search_exception_tables(regs->pc);
 #ifdef DEBUG_EXCEPTIONS
-			printk("Exception: PC<%08lx> faddr<%08lx>\n",
-			       regs->pc, address);
-			printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
-				regs->pc, fixup, g2);
+		printk("Exception: PC<%08lx> faddr<%08lx>\n",
+		       regs->pc, address);
+		printk("EX_TABLE: insn<%08lx> fixup<%08x>\n",
+			regs->pc, entry->fixup);
 #endif
-			if ((regs->pc >= (unsigned long)__memset_start &&
-			     regs->pc < (unsigned long)__memset_end)) {
-				regs->u_regs[UREG_I4] = address;
-				regs->u_regs[UREG_I5] = regs->pc;
-			}
-			regs->u_regs[UREG_G2] = g2;
-			regs->pc = fixup;
-			regs->npc = regs->pc + 4;
-			return;
-		}
+		regs->pc = entry->fixup;
+		regs->npc = regs->pc + 4;
+		return;
 	}
 
 	unhandled_fault(address, tsk, regs);
diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h
index ce750a9..ee55f10 100644
--- a/arch/sparc/mm/mm_32.h
+++ b/arch/sparc/mm/mm_32.h
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* fault_32.c - visible as they are called from assembler */
-asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
-                            unsigned long address);
 asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
                                unsigned long address);
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 15c9c28..5809cc1 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -767,7 +767,7 @@
 
 config CRYPTO_POLY1305_MIPS
 	tristate "Poly1305 authenticator algorithm (MIPS optimized)"
-	depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+	depends on MIPS
 	select CRYPTO_ARCH_HAVE_LIB_POLY1305
 
 config CRYPTO_MD4
diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c
index 8038a8a..f4949b6 100644
--- a/drivers/char/hw_random/pseries-rng.c
+++ b/drivers/char/hw_random/pseries-rng.c
@@ -54,10 +54,9 @@
 	return hwrng_register(&pseries_rng);
 }
 
-static int pseries_rng_remove(struct vio_dev *dev)
+static void pseries_rng_remove(struct vio_dev *dev)
 {
 	hwrng_unregister(&pseries_rng);
-	return 0;
 }
 
 static const struct vio_device_id pseries_rng_driver_ids[] = {
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 994385b..9036047 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -343,7 +343,7 @@
  *
  * Return: Always 0.
  */
-static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
+static void tpm_ibmvtpm_remove(struct vio_dev *vdev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(&vdev->dev);
 	struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
@@ -372,8 +372,6 @@
 	kfree(ibmvtpm);
 	/* For tpm_ibmvtpm_get_desired_dma */
 	dev_set_drvdata(&vdev->dev, NULL);
-
-	return 0;
 }
 
 /**
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 2de5e36..cc8dd30 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -1042,7 +1042,7 @@
 	return ret;
 }
 
-static int nx842_remove(struct vio_dev *viodev)
+static void nx842_remove(struct vio_dev *viodev)
 {
 	struct nx842_devdata *old_devdata;
 	unsigned long flags;
@@ -1063,8 +1063,6 @@
 	if (old_devdata)
 		kfree(old_devdata->counters);
 	kfree(old_devdata);
-
-	return 0;
 }
 
 static const struct vio_device_id nx842_vio_driver_ids[] = {
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 0d2dc5b..1d0e8a1 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -783,7 +783,7 @@
 	return nx_register_algs();
 }
 
-static int nx_remove(struct vio_dev *viodev)
+static void nx_remove(struct vio_dev *viodev)
 {
 	dev_dbg(&viodev->dev, "entering nx_remove for UA 0x%x\n",
 		viodev->unit_address);
@@ -811,8 +811,6 @@
 		nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES,
 				       NX_MODE_AES_ECB);
 	}
-
-	return 0;
 }
 
 
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
index 2d778d0..c0fe329 100644
--- a/drivers/misc/ibmvmc.c
+++ b/drivers/misc/ibmvmc.c
@@ -2288,15 +2288,13 @@
 	return -EPERM;
 }
 
-static int ibmvmc_remove(struct vio_dev *vdev)
+static void ibmvmc_remove(struct vio_dev *vdev)
 {
 	struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev);
 
 	dev_info(adapter->dev, "Entering remove for UA 0x%x\n",
 		 vdev->unit_address);
 	ibmvmc_release_crq_queue(adapter);
-
-	return 0;
 }
 
 static struct vio_device_id ibmvmc_device_table[] = {
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b09bed5..bcd31f4 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -94,7 +94,7 @@
 	select CRYPTO_BLAKE2S_ARM if ARM
 	select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
 	select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
-	select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+	select CRYPTO_POLY1305_MIPS if MIPS
 	help
 	  WireGuard is a secure, fast, and easy to use replacement for IPSec
 	  that uses modern cryptography and clever networking tricks. It's
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index c3ec9ce..7fea9ae 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1758,7 +1758,7 @@
 	return 0;
 }
 
-static int ibmveth_remove(struct vio_dev *dev)
+static void ibmveth_remove(struct vio_dev *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(&dev->dev);
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
@@ -1771,8 +1771,6 @@
 
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
-
-	return 0;
 }
 
 static struct attribute veth_active_attr;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 118a4bd..fe3201b 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -78,7 +78,6 @@
 MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
-static int ibmvnic_remove(struct vio_dev *);
 static void release_sub_crqs(struct ibmvnic_adapter *, bool);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
@@ -5396,7 +5395,7 @@
 	return rc;
 }
 
-static int ibmvnic_remove(struct vio_dev *dev)
+static void ibmvnic_remove(struct vio_dev *dev)
 {
 	struct net_device *netdev = dev_get_drvdata(&dev->dev);
 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -5437,8 +5436,6 @@
 	device_remove_file(&dev->dev, &dev_attr_failover);
 	free_netdev(netdev);
 	dev_set_drvdata(&dev->dev, NULL);
-
-	return 0;
 }
 
 static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 755313b..e663085 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -6038,7 +6038,7 @@
  * Return value:
  * 	0
  **/
-static int ibmvfc_remove(struct vio_dev *vdev)
+static void ibmvfc_remove(struct vio_dev *vdev)
 {
 	struct ibmvfc_host *vhost = dev_get_drvdata(&vdev->dev);
 	LIST_HEAD(purge);
@@ -6070,7 +6070,6 @@
 	spin_unlock(&ibmvfc_driver_lock);
 	scsi_host_put(vhost->host);
 	LEAVE;
-	return 0;
 }
 
 /**
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 29fcc44..77fafb1 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -2335,7 +2335,7 @@
 	return -1;
 }
 
-static int ibmvscsi_remove(struct vio_dev *vdev)
+static void ibmvscsi_remove(struct vio_dev *vdev)
 {
 	struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev);
 
@@ -2356,8 +2356,6 @@
 	spin_unlock(&ibmvscsi_driver_lock);
 
 	scsi_host_put(hostdata->host);
-
-	return 0;
 }
 
 /**
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index cc3908c..9abd9e2 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3595,7 +3595,7 @@
 	return rc;
 }
 
-static int ibmvscsis_remove(struct vio_dev *vdev)
+static void ibmvscsis_remove(struct vio_dev *vdev)
 {
 	struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev);
 
@@ -3622,8 +3622,6 @@
 	list_del(&vscsi->list);
 	spin_unlock_bh(&ibmvscsis_dev_lock);
 	kfree(vscsi);
-
-	return 0;
 }
 
 static ssize_t system_id_show(struct device *dev,
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index c908489..9afa1dc 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -317,7 +317,6 @@
 
 static int hvcs_probe(struct vio_dev *dev,
 		const struct vio_device_id *id);
-static int hvcs_remove(struct vio_dev *dev);
 static int __init hvcs_module_init(void);
 static void __exit hvcs_module_exit(void);
 static int hvcs_initialize(void);
@@ -819,7 +818,7 @@
 	return 0;
 }
 
-static int hvcs_remove(struct vio_dev *dev)
+static void hvcs_remove(struct vio_dev *dev)
 {
 	struct hvcs_struct *hvcsd = dev_get_drvdata(&dev->dev);
 	unsigned long flags;
@@ -849,7 +848,6 @@
 
 	printk(KERN_INFO "HVCS: vty-server@%X removed from the"
 			" vio bus.\n", dev->unit_address);
-	return 0;
 };
 
 static struct vio_driver hvcs_vio_driver = {
diff --git a/lib/extable.c b/lib/extable.c
index c3e59ca..9c9f40b 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -21,7 +21,6 @@
 }
 #endif
 
-#ifndef ARCH_HAS_SORT_EXTABLE
 #ifndef ARCH_HAS_RELATIVE_EXTABLE
 #define swap_ex		NULL
 #else
@@ -88,9 +87,6 @@
 		m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
-#endif /* !ARCH_HAS_SORT_EXTABLE */
-
-#ifndef ARCH_HAS_SEARCH_EXTABLE
 
 static int cmp_ex_search(const void *key, const void *elt)
 {
@@ -120,4 +116,3 @@
 	return bsearch(&value, base, num,
 		       sizeof(struct exception_table_entry), cmp_ex_search);
 }
-#endif
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
index 543dd70..ad64d67 100644
--- a/tools/arch/s390/include/uapi/asm/ptrace.h
+++ b/tools/arch/s390/include/uapi/asm/ptrace.h
@@ -179,8 +179,9 @@
 #define ACR_SIZE	4
 
 
-#define PTRACE_OLDSETOPTIONS	     21
-
+#define PTRACE_OLDSETOPTIONS		21
+#define PTRACE_SYSEMU			31
+#define PTRACE_SYSEMU_SINGLESTEP	32
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
 #include <linux/types.h>
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 84b8878..cc96e26 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS			19	   /* N 32-bit words worth of info */
+#define NCAPINTS			20	   /* N 32-bit words worth of info */
 #define NBUGINTS			1	   /* N 32-bit bug flags */
 
 /*
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32		( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32		( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD		( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_SME_COHERENT	( 3*32+17) /* "" AMD hardware-enforced cache coherency */
+/* FREE!                                ( 3*32+17) */
 #define X86_FEATURE_LFENCE_RDTSC	( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER		( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL		( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -201,7 +201,7 @@
 #define X86_FEATURE_INVPCID_SINGLE	( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE		( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK	( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
+/* FREE!                                ( 7*32+10) */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
@@ -211,7 +211,7 @@
 #define X86_FEATURE_SSBD		( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA			( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW		( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV			( 7*32+20) /* AMD Secure Encrypted Virtualization */
+/* FREE!                                ( 7*32+20) */
 #define X86_FEATURE_USE_IBPB		( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW		( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -236,8 +236,6 @@
 #define X86_FEATURE_EPT_AD		( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL		( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL		( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_SEV_ES		( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_VM_PAGE_FLUSH	( 8*32+21) /* "" VM Page Flush MSR is supported */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE		( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -294,6 +292,7 @@
 #define X86_FEATURE_PER_THREAD_MBA	(11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
@@ -337,6 +336,7 @@
 #define X86_FEATURE_AVIC		(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD	(15*32+15) /* Virtual VMSAVE VMLOAD */
 #define X86_FEATURE_VGIF		(15*32+16) /* Virtual GIF */
+#define X86_FEATURE_SVME_ADDR_CHK	(15*32+28) /* "" SVME addr check */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
 #define X86_FEATURE_AVX512VBMI		(16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
@@ -385,6 +385,13 @@
 #define X86_FEATURE_CORE_CAPABILITIES	(18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
 #define X86_FEATURE_SPEC_CTRL_SSBD	(18*32+31) /* "" Speculative Store Bypass Disable */
 
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME			(19*32+ 0) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV			(19*32+ 1) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH	(19*32+ 2) /* "" VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES		(19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SME_COHERENT	(19*32+10) /* "" AMD hardware-enforced cache coherency */
+
 /*
  * BUG word(s)
  */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 8e76d37..5a3022c 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -112,6 +112,7 @@
 #define KVM_NR_IRQCHIPS          3
 
 #define KVM_RUN_X86_SMM		 (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK     (1 << 1)
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index ada955c..b8e650a 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -89,6 +89,7 @@
 #define EXIT_REASON_XRSTORS             64
 #define EXIT_REASON_UMWAIT              67
 #define EXIT_REASON_TPAUSE              68
+#define EXIT_REASON_BUS_LOCK            74
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -150,7 +151,8 @@
 	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
 	{ EXIT_REASON_XRSTORS,               "XRSTORS" }, \
 	{ EXIT_REASON_UMWAIT,                "UMWAIT" }, \
-	{ EXIT_REASON_TPAUSE,                "TPAUSE" }
+	{ EXIT_REASON_TPAUSE,                "TPAUSE" }, \
+	{ EXIT_REASON_BUS_LOCK,              "BUS_LOCK" }
 
 #define VMX_EXIT_REASON_FLAGS \
 	{ VMX_EXIT_REASONS_FAILED_VMENTRY,	"FAILED_VMENTRY" }
diff --git a/tools/build/Makefile b/tools/build/Makefile
index bae48e6..5ed41b9 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -30,12 +30,18 @@
 
 all: $(OUTPUT)fixdep
 
+# Make sure there's anything to clean,
+# feature contains check for existing OUTPUT
+TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./)
+
 clean:
 	$(call QUIET_CLEAN, fixdep)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)rm -f $(OUTPUT)fixdep
 	$(call QUIET_CLEAN, feature-detect)
-	$(Q)$(MAKE) -C feature/ clean >/dev/null
+ifneq ($(wildcard $(TMP_O)),)
+	$(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
+endif
 
 $(OUTPUT)fixdep-in.o: FORCE
 	$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index b0e35ee..4ac5c08 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -10,17 +10,27 @@
 #define CORESIGHT_ETM_PMU_NAME "cs_etm"
 #define CORESIGHT_ETM_PMU_SEED  0x10
 
-/* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID	14
-#define ETM_OPT_TS      28
-#define ETM_OPT_RETSTK	29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC		12
+#define ETM_OPT_CTXTID		14
+#define ETM_OPT_CTXTID2		15
+#define ETM_OPT_TS		28
+#define ETM_OPT_RETSTK		29
 
 /* ETMv4 CONFIGR programming bits for the ETM OPTs */
 #define ETM4_CFG_BIT_CYCACC	4
 #define ETM4_CFG_BIT_CTXTID	6
+#define ETM4_CFG_BIT_VMID	7
 #define ETM4_CFG_BIT_TS		11
 #define ETM4_CFG_BIT_RETSTK	12
+#define ETM4_CFG_BIT_VMID_OPT	15
 
 static inline int coresight_get_trace_id(int cpu)
 {
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 808b48a..0827037 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1,11 +1,10 @@
-/**
- * \file drm.h
+/*
  * Header for the Direct Rendering Manager
  *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
+ * Author: Rickard E. (Rik) Faith <faith@valinux.com>
  *
- * \par Acknowledgments:
- * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
+ * Acknowledgments:
+ * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg.
  */
 
 /*
@@ -85,7 +84,7 @@
 typedef unsigned int drm_drawable_t;
 typedef unsigned int drm_magic_t;
 
-/**
+/*
  * Cliprect.
  *
  * \warning: If you change this structure, make sure you change
@@ -101,7 +100,7 @@
 	unsigned short y2;
 };
 
-/**
+/*
  * Drawable information.
  */
 struct drm_drawable_info {
@@ -109,7 +108,7 @@
 	struct drm_clip_rect *rects;
 };
 
-/**
+/*
  * Texture region,
  */
 struct drm_tex_region {
@@ -120,7 +119,7 @@
 	unsigned int age;
 };
 
-/**
+/*
  * Hardware lock.
  *
  * The lock structure is a simple cache-line aligned integer.  To avoid
@@ -132,7 +131,7 @@
 	char padding[60];			/**< Pad to cache line */
 };
 
-/**
+/*
  * DRM_IOCTL_VERSION ioctl argument type.
  *
  * \sa drmGetVersion().
@@ -149,7 +148,7 @@
 	char __user *desc;	  /**< User-space buffer to hold desc */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_UNIQUE ioctl argument type.
  *
  * \sa drmGetBusid() and drmSetBusId().
@@ -168,7 +167,7 @@
 	int unused;
 };
 
-/**
+/*
  * DRM_IOCTL_CONTROL ioctl argument type.
  *
  * \sa drmCtlInstHandler() and drmCtlUninstHandler().
@@ -183,7 +182,7 @@
 	int irq;
 };
 
-/**
+/*
  * Type of memory to map.
  */
 enum drm_map_type {
@@ -195,7 +194,7 @@
 	_DRM_CONSISTENT = 5	  /**< Consistent memory for PCI DMA */
 };
 
-/**
+/*
  * Memory mapping flags.
  */
 enum drm_map_flags {
@@ -214,7 +213,7 @@
 	void *handle;		 /**< Handle of map */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
  * argument type.
  *
@@ -231,7 +230,7 @@
 	/*   Private data */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_CLIENT ioctl argument type.
  */
 struct drm_client {
@@ -263,7 +262,7 @@
 	    /* Add to the *END* of the list */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_STATS ioctl argument type.
  */
 struct drm_stats {
@@ -274,7 +273,7 @@
 	} data[15];
 };
 
-/**
+/*
  * Hardware locking flags.
  */
 enum drm_lock_flags {
@@ -289,7 +288,7 @@
 	_DRM_HALT_CUR_QUEUES = 0x20  /**< Halt all current queues */
 };
 
-/**
+/*
  * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
  *
  * \sa drmGetLock() and drmUnlock().
@@ -299,7 +298,7 @@
 	enum drm_lock_flags flags;
 };
 
-/**
+/*
  * DMA flags
  *
  * \warning
@@ -328,7 +327,7 @@
 	_DRM_DMA_LARGER_OK = 0x40     /**< Larger-than-requested buffers OK */
 };
 
-/**
+/*
  * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
  *
  * \sa drmAddBufs().
@@ -351,7 +350,7 @@
 				  */
 };
 
-/**
+/*
  * DRM_IOCTL_INFO_BUFS ioctl argument type.
  */
 struct drm_buf_info {
@@ -359,7 +358,7 @@
 	struct drm_buf_desc __user *list;
 };
 
-/**
+/*
  * DRM_IOCTL_FREE_BUFS ioctl argument type.
  */
 struct drm_buf_free {
@@ -367,7 +366,7 @@
 	int __user *list;
 };
 
-/**
+/*
  * Buffer information
  *
  * \sa drm_buf_map.
@@ -379,7 +378,7 @@
 	void __user *address;	       /**< Address of buffer */
 };
 
-/**
+/*
  * DRM_IOCTL_MAP_BUFS ioctl argument type.
  */
 struct drm_buf_map {
@@ -392,7 +391,7 @@
 	struct drm_buf_pub __user *list;	/**< Buffer information */
 };
 
-/**
+/*
  * DRM_IOCTL_DMA ioctl argument type.
  *
  * Indices here refer to the offset into the buffer list in drm_buf_get.
@@ -417,7 +416,7 @@
 	_DRM_CONTEXT_2DONLY = 0x02
 };
 
-/**
+/*
  * DRM_IOCTL_ADD_CTX ioctl argument type.
  *
  * \sa drmCreateContext() and drmDestroyContext().
@@ -427,7 +426,7 @@
 	enum drm_ctx_flags flags;
 };
 
-/**
+/*
  * DRM_IOCTL_RES_CTX ioctl argument type.
  */
 struct drm_ctx_res {
@@ -435,14 +434,14 @@
 	struct drm_ctx __user *contexts;
 };
 
-/**
+/*
  * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
  */
 struct drm_draw {
 	drm_drawable_t handle;
 };
 
-/**
+/*
  * DRM_IOCTL_UPDATE_DRAW ioctl argument type.
  */
 typedef enum {
@@ -456,14 +455,14 @@
 	unsigned long long data;
 };
 
-/**
+/*
  * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
  */
 struct drm_auth {
 	drm_magic_t magic;
 };
 
-/**
+/*
  * DRM_IOCTL_IRQ_BUSID ioctl argument type.
  *
  * \sa drmGetInterruptFromBusID().
@@ -505,7 +504,7 @@
 	long tval_usec;
 };
 
-/**
+/*
  * DRM_IOCTL_WAIT_VBLANK ioctl argument type.
  *
  * \sa drmWaitVBlank().
@@ -518,7 +517,7 @@
 #define _DRM_PRE_MODESET 1
 #define _DRM_POST_MODESET 2
 
-/**
+/*
  * DRM_IOCTL_MODESET_CTL ioctl argument type
  *
  * \sa drmModesetCtl().
@@ -528,7 +527,7 @@
 	__u32 cmd;
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_ENABLE ioctl argument type.
  *
  * \sa drmAgpEnable().
@@ -537,7 +536,7 @@
 	unsigned long mode;	/**< AGP mode */
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
  *
  * \sa drmAgpAlloc() and drmAgpFree().
@@ -549,7 +548,7 @@
 	unsigned long physical;	/**< Physical used by i810 */
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
  *
  * \sa drmAgpBind() and drmAgpUnbind().
@@ -559,7 +558,7 @@
 	unsigned long offset;	/**< In bytes -- will round to page boundary */
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_INFO ioctl argument type.
  *
  * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
@@ -580,7 +579,7 @@
 	unsigned short id_device;
 };
 
-/**
+/*
  * DRM_IOCTL_SG_ALLOC ioctl argument type.
  */
 struct drm_scatter_gather {
@@ -588,7 +587,7 @@
 	unsigned long handle;	/**< Used for mapping / unmapping */
 };
 
-/**
+/*
  * DRM_IOCTL_SET_VERSION ioctl argument type.
  */
 struct drm_set_version {
@@ -598,14 +597,14 @@
 	int drm_dd_minor;
 };
 
-/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
 struct drm_gem_close {
 	/** Handle of the object to be closed. */
 	__u32 handle;
 	__u32 pad;
 };
 
-/** DRM_IOCTL_GEM_FLINK ioctl argument type */
+/* DRM_IOCTL_GEM_FLINK ioctl argument type */
 struct drm_gem_flink {
 	/** Handle for the object being named */
 	__u32 handle;
@@ -614,7 +613,7 @@
 	__u32 name;
 };
 
-/** DRM_IOCTL_GEM_OPEN ioctl argument type */
+/* DRM_IOCTL_GEM_OPEN ioctl argument type */
 struct drm_gem_open {
 	/** Name of object being opened */
 	__u32 name;
@@ -652,7 +651,7 @@
 #define DRM_CAP_SYNCOBJ		0x13
 #define DRM_CAP_SYNCOBJ_TIMELINE	0x14
 
-/** DRM_IOCTL_GET_CAP ioctl argument type */
+/* DRM_IOCTL_GET_CAP ioctl argument type */
 struct drm_get_cap {
 	__u64 capability;
 	__u64 value;
@@ -678,7 +677,9 @@
 /**
  * DRM_CLIENT_CAP_ATOMIC
  *
- * If set to 1, the DRM core will expose atomic properties to userspace
+ * If set to 1, the DRM core will expose atomic properties to userspace. This
+ * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and
+ * &DRM_CLIENT_CAP_ASPECT_RATIO.
  */
 #define DRM_CLIENT_CAP_ATOMIC	3
 
@@ -698,7 +699,7 @@
  */
 #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
 
-/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
+/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
 	__u64 value;
@@ -950,7 +951,7 @@
 
 #define DRM_IOCTL_MODE_GETFB2		DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
 
-/**
+/*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
  * Generic IOCTLS restart at 0xA0.
@@ -961,7 +962,7 @@
 #define DRM_COMMAND_BASE                0x40
 #define DRM_COMMAND_END			0xA0
 
-/**
+/*
  * Header for events written back to userspace on the drm fd.  The
  * type defines the type of event, the length specifies the total
  * length of the event (including the header), and user_data is
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index fa1f3d6..1987e2e 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -177,8 +177,9 @@
 #define I915_PMU_REQUESTED_FREQUENCY	__I915_PMU_OTHER(1)
 #define I915_PMU_INTERRUPTS		__I915_PMU_OTHER(2)
 #define I915_PMU_RC6_RESIDENCY		__I915_PMU_OTHER(3)
+#define I915_PMU_SOFTWARE_GT_AWAKE_TIME	__I915_PMU_OTHER(4)
 
-#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
 
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index abb89bb..8b281f7 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -216,6 +216,20 @@
 	} u;
 };
 
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL          1
+	__u32 type;
+	union {
+		struct {
+			__u32 longmode;
+			__u32 cpl;
+			__u64 input;
+			__u64 result;
+			__u64 params[6];
+		} hcall;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 
@@ -252,6 +266,8 @@
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
 #define KVM_EXIT_AP_RESET_HOLD    32
+#define KVM_EXIT_X86_BUS_LOCK     33
+#define KVM_EXIT_XEN              34
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -428,6 +444,8 @@
 			__u32 index; /* kernel -> user */
 			__u64 data; /* kernel <-> user */
 		} msr;
+		/* KVM_EXIT_XEN */
+		struct kvm_xen_exit xen;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -1058,6 +1076,7 @@
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_X86_BUS_LOCK_EXIT 193
 #define KVM_CAP_PPC_DAWR1 194
 
 #ifdef KVM_CAP_IRQ_ROUTING
@@ -1132,6 +1151,10 @@
 #endif
 
 #ifdef KVM_CAP_XEN_HVM
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR	(1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO		(1 << 2)
+
 struct kvm_xen_hvm_config {
 	__u32 flags;
 	__u32 msr;
@@ -1566,6 +1589,45 @@
 /* Available with KVM_CAP_DIRTY_LOG_RING */
 #define KVM_RESET_DIRTY_RINGS		_IO(KVMIO, 0xc7)
 
+/* Per-VM Xen attributes */
+#define KVM_XEN_HVM_GET_ATTR	_IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
+#define KVM_XEN_HVM_SET_ATTR	_IOW(KVMIO,  0xc9, struct kvm_xen_hvm_attr)
+
+struct kvm_xen_hvm_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u8 long_mode;
+		__u8 vector;
+		struct {
+			__u64 gfn;
+		} shared_info;
+		__u64 pad[8];
+	} u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE		0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR		0x2
+
+/* Per-vCPU Xen attributes */
+#define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
+#define KVM_XEN_VCPU_SET_ATTR	_IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
+
+struct kvm_xen_vcpu_attr {
+	__u16 type;
+	__u16 pad[3];
+	union {
+		__u64 gpa;
+		__u64 pad[8];
+	} u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO	0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO	0x1
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */
@@ -1594,6 +1656,8 @@
 	KVM_SEV_DBG_ENCRYPT,
 	/* Guest certificates commands */
 	KVM_SEV_CERT_EXPORT,
+	/* Attestation report */
+	KVM_SEV_GET_ATTESTATION_REPORT,
 
 	KVM_SEV_NR_MAX,
 };
@@ -1646,6 +1710,12 @@
 	__u32 len;
 };
 
+struct kvm_sev_attestation_report {
+	__u8 mnonce[16];
+	__u64 uaddr;
+	__u32 len;
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX	(1 << 2)
@@ -1767,4 +1837,7 @@
 	__u64 offset;
 };
 
+#define KVM_BUS_LOCK_DETECTION_OFF             (1 << 0)
+#define KVM_BUS_LOCK_DETECTION_EXIT            (1 << 1)
+
 #endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index dd8306e..e6524ea 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_MOUNT_H
 #define _UAPI_LINUX_MOUNT_H
 
+#include <linux/types.h>
+
 /*
  * These are the fs-independent mount-flags: up to 32 flags are supported
  *
@@ -117,5 +119,19 @@
 #define MOUNT_ATTR_NOATIME	0x00000010 /* - Do not update access times. */
 #define MOUNT_ATTR_STRICTATIME	0x00000020 /* - Always perform atime updates */
 #define MOUNT_ATTR_NODIRATIME	0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP	0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+	__u64 attr_set;
+	__u64 attr_clr;
+	__u64 propagation;
+	__u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0	32 /* sizeof first published struct */
 
 #endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
index 58b1eb7..a5feb76 100644
--- a/tools/include/uapi/linux/openat2.h
+++ b/tools/include/uapi/linux/openat2.h
@@ -35,5 +35,9 @@
 #define RESOLVE_IN_ROOT		0x10 /* Make all jumps to "/" and ".."
 					be scoped inside the dirfd
 					(similar to chroot(2)). */
+#define RESOLVE_CACHED		0x20 /* Only complete if resolution can be
+					completed through cached lookup. May
+					return -EAGAIN if that's not
+					possible. */
 
 #endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 17465d4..a0aaf38 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -26,13 +26,10 @@
 
 void perf_evlist__init(struct perf_evlist *evlist)
 {
-	int i;
-
-	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
-		INIT_HLIST_HEAD(&evlist->heads[i]);
 	INIT_LIST_HEAD(&evlist->entries);
 	evlist->nr_entries = 0;
 	fdarray__init(&evlist->pollfd, 64);
+	perf_evlist__reset_id_hash(evlist);
 }
 
 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@@ -237,6 +234,14 @@
 	hlist_add_head(&sid->node, &evlist->heads[hash]);
 }
 
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
+{
+	int i;
+
+	for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+		INIT_HLIST_HEAD(&evlist->heads[i]);
+}
+
 void perf_evlist__id_add(struct perf_evlist *evlist,
 			 struct perf_evsel *evsel,
 			 int cpu, int thread, u64 id)
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 2d0fa02..212c290 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -124,4 +124,6 @@
 			   struct perf_evsel *evsel,
 			   int cpu, int thread, int fd);
 
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
+
 #endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index c0a6640..9af8b8d 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -29,7 +29,7 @@
 	Show just the sample frequency used for each event.
 
 -v::
---verbose=::
+--verbose::
 	Show all fields.
 
 -g::
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 1e91121..6e82b7c 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -28,8 +28,8 @@
 	specified: function_graph or function.
 
 -v::
---verbose=::
-        Verbosity level.
+--verbose::
+        Increase the verbosity level.
 
 -F::
 --funcs::
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index f3c6209..c97527d 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -20,5 +20,5 @@
 OPTIONS
 -------
 -v::
---verbose=::
+--verbose::
 	Increase verbosity level, showing details about symbol table loading, etc.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index abc9b5d..f0da8cf 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -97,8 +97,8 @@
 	Filter out events for these pids and for 'trace' itself (comma separated list).
 
 -v::
---verbose=::
-        Verbosity level.
+--verbose::
+        Increase the verbosity level.
 
 --no-inherit::
 	Child tasks do not inherit counters.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5345ac7..f6e6096 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -607,7 +607,7 @@
 arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
 
 $(arch_errno_name_array): $(arch_errno_tbl)
-	$(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
+	$(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@
 
 sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
 sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -1001,14 +1001,6 @@
 
 ### Cleaning rules
 
-#
-# This is here, not in Makefile.config, because Makefile.config does
-# not get included for the clean target:
-#
-config-clean:
-	$(call QUIET_CLEAN, config)
-	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-
 python-clean:
 	$(python-clean)
 
@@ -1048,7 +1040,7 @@
 bpf-skel-clean:
 	$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
 	$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index bd446ab..c25c878 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -156,6 +156,10 @@
 	return err;
 }
 
+#define ETM_SET_OPT_CTXTID	(1 << 0)
+#define ETM_SET_OPT_TS		(1 << 1)
+#define ETM_SET_OPT_MASK	(ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS)
+
 static int cs_etm_set_option(struct auxtrace_record *itr,
 			     struct evsel *evsel, u32 option)
 {
@@ -169,17 +173,17 @@
 		    !cpu_map__has(online_cpus, i))
 			continue;
 
-		if (option & ETM_OPT_CTXTID) {
+		if (option & ETM_SET_OPT_CTXTID) {
 			err = cs_etm_set_context_id(itr, evsel, i);
 			if (err)
 				goto out;
 		}
-		if (option & ETM_OPT_TS) {
+		if (option & ETM_SET_OPT_TS) {
 			err = cs_etm_set_timestamp(itr, evsel, i);
 			if (err)
 				goto out;
 		}
-		if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
+		if (option & ~(ETM_SET_OPT_MASK))
 			/* Nothing else is currently supported */
 			goto out;
 	}
@@ -406,7 +410,7 @@
 		evsel__set_sample_bit(cs_etm_evsel, CPU);
 
 		err = cs_etm_set_option(itr, cs_etm_evsel,
-					ETM_OPT_CTXTID | ETM_OPT_TS);
+					ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS);
 		if (err)
 			goto out;
 	}
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index f744eb5..0b2480c 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -9,9 +9,7 @@
 #
 0	nospu	restart_syscall			sys_restart_syscall
 1	nospu	exit				sys_exit
-2	32	fork				ppc_fork			sys_fork
-2	64	fork				sys_fork
-2	spu	fork				sys_ni_syscall
+2	nospu	fork				sys_fork
 3	common	read				sys_read
 4	common	write				sys_write
 5	common	open				sys_open			compat_sys_open
@@ -160,9 +158,7 @@
 119	32	sigreturn			sys_sigreturn			compat_sys_sigreturn
 119	64	sigreturn			sys_ni_syscall
 119	spu	sigreturn			sys_ni_syscall
-120	32	clone				ppc_clone			sys_clone
-120	64	clone				sys_clone
-120	spu	clone				sys_ni_syscall
+120	nospu	clone				sys_clone
 121	common	setdomainname			sys_setdomainname
 122	common	uname				sys_newuname
 123	common	modify_ldt			sys_ni_syscall
@@ -244,9 +240,7 @@
 186	spu	sendfile			sys_sendfile64
 187	common	getpmsg				sys_ni_syscall
 188	common 	putpmsg				sys_ni_syscall
-189	32	vfork				ppc_vfork			sys_vfork
-189	64	vfork				sys_vfork
-189	spu	vfork				sys_ni_syscall
+189	nospu	vfork				sys_vfork
 190	common	ugetrlimit			sys_getrlimit			compat_sys_getrlimit
 191	common	readahead			sys_readahead			compat_sys_readahead
 192	32	mmap2				sys_mmap2			compat_sys_mmap2
@@ -322,9 +316,7 @@
 248	32	clock_nanosleep			sys_clock_nanosleep_time32
 248	64	clock_nanosleep			sys_clock_nanosleep
 248	spu	clock_nanosleep			sys_clock_nanosleep
-249	32	swapcontext			ppc_swapcontext			compat_sys_swapcontext
-249	64	swapcontext			sys_swapcontext
-249	spu	swapcontext			sys_ni_syscall
+249	nospu	swapcontext			sys_swapcontext			compat_sys_swapcontext
 250	common	tgkill				sys_tgkill
 251	32	utimes				sys_utimes_time32
 251	64	utimes				sys_utimes
@@ -522,12 +514,11 @@
 432	common	fsmount				sys_fsmount
 433	common	fspick				sys_fspick
 434	common	pidfd_open			sys_pidfd_open
-435	32	clone3				ppc_clone3			sys_clone3
-435	64	clone3				sys_clone3
-435	spu	clone3				sys_ni_syscall
+435	nospu	clone3				sys_clone3
 436	common	close_range			sys_close_range
 437	common	openat2				sys_openat2
 438	common	pidfd_getfd			sys_pidfd_getfd
 439	common	faccessat2			sys_faccessat2
 440	common	process_madvise			sys_process_madvise
 441	common	epoll_pwait2			sys_epoll_pwait2		compat_sys_epoll_pwait2
+442	common	mount_setattr			sys_mount_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index d443423..3abef21 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
 439  common	faccessat2		sys_faccessat2			sys_faccessat2
 440  common	process_madvise		sys_process_madvise		sys_process_madvise
 441  common	epoll_pwait2		sys_epoll_pwait2		compat_sys_epoll_pwait2
+442  common	mount_setattr		sys_mount_setattr		sys_mount_setattr
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 8cc6642..5a9f9a7 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -10,10 +10,11 @@
 # Syscall table generation
 #
 
-out    := $(OUTPUT)arch/x86/include/generated/asm
-header := $(out)/syscalls_64.c
-sys    := $(srctree)/tools/perf/arch/x86/entry/syscalls
-systbl := $(sys)/syscalltbl.sh
+generated := $(OUTPUT)arch/x86/include/generated
+out       := $(generated)/asm
+header    := $(out)/syscalls_64.c
+sys       := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl    := $(sys)/syscalltbl.sh
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
@@ -22,6 +23,6 @@
 	$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
 
 clean::
-	$(call QUIET_CLEAN, x86) $(RM) $(header)
+	$(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
 
 archheaders: $(header)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 7867212..7bf01cb 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -363,6 +363,7 @@
 439	common	faccessat2		sys_faccessat2
 440	common	process_madvise		sys_process_madvise
 441	common	epoll_pwait2		sys_epoll_pwait2
+442	common	mount_setattr		sys_mount_setattr
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 6a54b94..0e20f3d 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -10,6 +10,7 @@
 int test__insn_x86(struct test *test __maybe_unused, int subtest);
 int test__intel_pt_pkt_decoder(struct test *test, int subtest);
 int test__bp_modify(struct test *test, int subtest);
+int test__x86_sample_parsing(struct test *test, int subtest);
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 36d4f24..28d7933 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,6 @@
 
 perf-y += arch-tests.o
 perf-y += rdpmc.o
+perf-y += sample-parsing.o
 perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
 perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index bc25d72..71aa673 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -31,6 +31,10 @@
 	},
 #endif
 	{
+		.desc = "x86 Sample parsing",
+		.func = test__x86_sample_parsing,
+	},
+	{
 		.func = NULL,
 	},
 
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index f782ef8..4f75ae9 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
-#include "../../../../arch/x86/include/asm/insn.h"
 #include <string.h>
 
 #include "debug.h"
 #include "tests/tests.h"
 #include "arch-tests.h"
+#include "../../../../arch/x86/include/asm/insn.h"
 
 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
 
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
new file mode 100644
index 0000000..c92db87
--- /dev/null
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+#include "util/synthetic-events.h"
+
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define COMP(m) do {					\
+	if (s1->m != s2->m) {				\
+		pr_debug("Samples differ at '"#m"'\n");	\
+		return false;				\
+	}						\
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+			 const struct perf_sample *s2,
+			 u64 type)
+{
+	if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+		COMP(ins_lat);
+
+	return true;
+}
+
+static int do_test(u64 sample_type)
+{
+	struct evsel evsel = {
+		.needs_swap = false,
+		.core = {
+			. attr = {
+				.sample_type = sample_type,
+				.read_format = 0,
+			},
+		},
+	};
+	union perf_event *event;
+	struct perf_sample sample = {
+		.weight		= 101,
+		.ins_lat        = 102,
+	};
+	struct perf_sample sample_out;
+	size_t i, sz, bufsz;
+	int err, ret = -1;
+
+	sz = perf_event__sample_event_size(&sample, sample_type, 0);
+	bufsz = sz + 4096; /* Add a bit for overrun checking */
+	event = malloc(bufsz);
+	if (!event) {
+		pr_debug("malloc failed\n");
+		return -1;
+	}
+
+	memset(event, 0xff, bufsz);
+	event->header.type = PERF_RECORD_SAMPLE;
+	event->header.misc = 0;
+	event->header.size = sz;
+
+	err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+	if (err) {
+		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+			 "perf_event__synthesize_sample", sample_type, err);
+		goto out_free;
+	}
+
+	/* The data does not contain 0xff so we use that to check the size */
+	for (i = bufsz; i > 0; i--) {
+		if (*(i - 1 + (u8 *)event) != 0xff)
+			break;
+	}
+	if (i != sz) {
+		pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+			 i, sz);
+		goto out_free;
+	}
+
+	evsel.sample_size = __evsel__sample_size(sample_type);
+
+	err = evsel__parse_sample(&evsel, event, &sample_out);
+	if (err) {
+		pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+			 "evsel__parse_sample", sample_type, err);
+		goto out_free;
+	}
+
+	if (!samples_same(&sample, &sample_out, sample_type)) {
+		pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+			 sample_type);
+		goto out_free;
+	}
+
+	ret = 0;
+out_free:
+	free(event);
+
+	return ret;
+}
+
+/**
+ * test__x86_sample_parsing - test X86 specific sample parsing
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample. If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ *
+ * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
+ * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
+ */
+int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+	return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
+}
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 3e67915..34d600c 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -1,10 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../../../../arch/x86/include/asm/insn.h"
 #include "archinsn.h"
 #include "event.h"
 #include "machine.h"
 #include "thread.h"
 #include "symbol.h"
+#include "../../../../arch/x86/include/asm/insn.h"
 
 void arch_fetch_insn(struct perf_sample *sample,
 		     struct thread *thread,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 11726ec..20b87e2 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -344,18 +344,22 @@
 
 static void bind_to_memnode(int node)
 {
-	unsigned long nodemask;
+	struct bitmask *node_mask;
 	int ret;
 
 	if (node == NUMA_NO_NODE)
 		return;
 
-	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
-	nodemask = 1L << node;
+	node_mask = numa_allocate_nodemask();
+	BUG_ON(!node_mask);
 
-	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
-	dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+	numa_bitmask_clearall(node_mask);
+	numa_bitmask_setbit(node_mask, node);
 
+	ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
+	dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
+
+	numa_bitmask_free(node_mask);
 	BUG_ON(ret);
 }
 
@@ -876,8 +880,6 @@
 	prctl(0, bytes_worked);
 }
 
-#define MAX_NR_NODES	64
-
 /*
  * Count the number of nodes a process's threads
  * are spread out on.
@@ -888,10 +890,15 @@
  */
 static int count_process_nodes(int process_nr)
 {
-	char node_present[MAX_NR_NODES] = { 0, };
+	char *node_present;
 	int nodes;
 	int n, t;
 
+	node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
+	BUG_ON(!node_present);
+	for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
+		node_present[nodes] = 0;
+
 	for (t = 0; t < g->p.nr_threads; t++) {
 		struct thread_data *td;
 		int task_nr;
@@ -901,17 +908,20 @@
 		td = g->threads + task_nr;
 
 		node = numa_node_of_cpu(td->curr_cpu);
-		if (node < 0) /* curr_cpu was likely still -1 */
+		if (node < 0) /* curr_cpu was likely still -1 */ {
+			free(node_present);
 			return 0;
+		}
 
 		node_present[node] = 1;
 	}
 
 	nodes = 0;
 
-	for (n = 0; n < MAX_NR_NODES; n++)
+	for (n = 0; n < g->p.nr_nodes; n++)
 		nodes += node_present[n];
 
+	free(node_present);
 	return nodes;
 }
 
@@ -980,7 +990,7 @@
 {
 	unsigned int loops_done_min, loops_done_max;
 	int process_groups;
-	int nodes[MAX_NR_NODES];
+	int *nodes;
 	int distance;
 	int nr_min;
 	int nr_max;
@@ -994,6 +1004,8 @@
 	if (!g->p.show_convergence && !g->p.measure_convergence)
 		return;
 
+	nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
+	BUG_ON(!nodes);
 	for (node = 0; node < g->p.nr_nodes; node++)
 		nodes[node] = 0;
 
@@ -1035,8 +1047,10 @@
 
 	BUG_ON(sum > g->p.nr_tasks);
 
-	if (0 && (sum < g->p.nr_tasks))
+	if (0 && (sum < g->p.nr_tasks)) {
+		free(nodes);
 		return;
+	}
 
 	/*
 	 * Count the number of distinct process groups present
@@ -1088,6 +1102,8 @@
 		}
 		tprintf("\n");
 	}
+
+	free(nodes);
 }
 
 static void show_summary(double runtime_ns_max, int l, double *convergence)
@@ -1413,7 +1429,7 @@
 	g->p.nr_nodes = numa_max_node() + 1;
 
 	/* char array in count_process_nodes(): */
-	BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+	BUG_ON(g->p.nr_nodes < 0);
 
 	if (g->p.show_quiet && !g->p.show_details)
 		g->p.show_details = -1;
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index cecce93..488f6e6 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -309,11 +309,11 @@
 		       num_groups, num_groups * 2 * num_fds,
 		       thread_mode ? "threads" : "processes");
 		printf(" %14s: %lu.%03lu [sec]\n", "Total time",
-		       diff.tv_sec,
+		       (unsigned long) diff.tv_sec,
 		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 	case BENCH_FORMAT_SIMPLE:
-		printf("%lu.%03lu\n", diff.tv_sec,
+		printf("%lu.%03lu\n", (unsigned long) diff.tv_sec,
 		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 	default:
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 3c88d1f..a960e7a 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -156,7 +156,7 @@
 		result_usec += diff.tv_usec;
 
 		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
-		       diff.tv_sec,
+		       (unsigned long) diff.tv_sec,
 		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 
 		printf(" %14lf usecs/op\n",
@@ -168,7 +168,7 @@
 
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n",
-		       diff.tv_sec,
+		       (unsigned long) diff.tv_sec,
 		       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 		break;
 
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
index 5fe621c..9b75101 100644
--- a/tools/perf/bench/syscall.c
+++ b/tools/perf/bench/syscall.c
@@ -54,7 +54,7 @@
 		result_usec += diff.tv_usec;
 
 		printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
-		       diff.tv_sec,
+		       (unsigned long) diff.tv_sec,
 		       (unsigned long) (diff.tv_usec/1000));
 
 		printf(" %14lf usecs/op\n",
@@ -66,7 +66,7 @@
 
 	case BENCH_FORMAT_SIMPLE:
 		printf("%lu.%03lu\n",
-		       diff.tv_sec,
+		       (unsigned long) diff.tv_sec,
 		       (unsigned long) (diff.tv_usec / 1000));
 		break;
 
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 617feaf..ace8772 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -161,7 +161,7 @@
 	struct daemon_session *session;
 	char name[100];
 
-	if (get_session_name(var, name, sizeof(name)))
+	if (get_session_name(var, name, sizeof(name) - 1))
 		return -EINVAL;
 
 	var = strchr(var, '.');
@@ -373,12 +373,12 @@
 	dup2(fd, 2);
 	close(fd);
 
-	if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) {
+	if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) {
 		perror("failed: create control fifo");
 		return -1;
 	}
 
-	if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) {
+	if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) {
 		perror("failed: create ack fifo");
 		return -1;
 	}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8f6c784c..878e04b 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1236,7 +1236,8 @@
 
  out_delete:
 	data__for_each_file(i, d) {
-		perf_session__delete(d->session);
+		if (!IS_ERR(d->session))
+			perf_session__delete(d->session);
 		data__free(d);
 	}
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 85b6a46..7ec18ff 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3964,9 +3964,6 @@
 
 	evlist__config(evlist, &trace->opts, &callchain_param);
 
-	signal(SIGCHLD, sig_handler);
-	signal(SIGINT, sig_handler);
-
 	if (forks) {
 		err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
 		if (err < 0) {
@@ -4827,6 +4824,8 @@
 
 	signal(SIGSEGV, sighandler_dump_stack);
 	signal(SIGFPE, sighandler_dump_stack);
+	signal(SIGCHLD, sig_handler);
+	signal(SIGINT, sig_handler);
 
 	trace.evlist = evlist__new();
 	trace.sctbl = syscalltbl__new();
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 0cfb3e2..133f0ed 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -20,9 +20,8 @@
 fi
 
 BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
-NOBUILDID=0000000000000000000000000000000000000000
 
-perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS
 if [ ! -s $BUILDIDS ] ; then
 	echo "perf archive: no build-ids found"
 	rm $BUILDIDS || true
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index ec972e0..dd39ce9 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -182,14 +182,20 @@
 	struct stat st;
 	char path_perf[PATH_MAX];
 	char path_dir[PATH_MAX];
+	char *exec_path;
 
 	/* First try development tree tests. */
 	if (!lstat("./tests", &st))
 		return run_dir("./tests", "./perf");
 
+	exec_path = get_argv_exec_path();
+	if (exec_path == NULL)
+		return -1;
+
 	/* Then installed path. */
-	snprintf(path_dir,  PATH_MAX, "%s/tests", get_argv_exec_path());
+	snprintf(path_dir,  PATH_MAX, "%s/tests", exec_path);
 	snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+	free(exec_path);
 
 	if (!lstat(path_dir, &st) &&
 	    !lstat(path_perf, &st))
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 280f034..2fdc7b2 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -706,13 +706,9 @@
 out_put:
 	thread__put(thread);
 out_err:
-
-	if (evlist) {
-		evlist__delete(evlist);
-	} else {
-		perf_cpu_map__put(cpus);
-		perf_thread_map__put(threads);
-	}
+	evlist__delete(evlist);
+	perf_cpu_map__put(cpus);
+	perf_thread_map__put(threads);
 	machine__delete_threads(machine);
 	machine__delete(machine);
 
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 29c793a..0472b11 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -106,6 +106,8 @@
 		return -1;
 
 	cpu_map__snprint(map, buf, sizeof(buf));
+	perf_cpu_map__put(map);
+
 	return !strcmp(buf, str);
 }
 
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index e6f1b2a..a0438b0 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -154,10 +154,9 @@
 	if (evlist) {
 		evlist__disable(evlist);
 		evlist__delete(evlist);
-	} else {
-		perf_cpu_map__put(cpus);
-		perf_thread_map__put(threads);
 	}
+	perf_cpu_map__put(cpus);
+	perf_thread_map__put(threads);
 
 	return err;
 }
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 57093ae..73ae8f7 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -158,8 +158,6 @@
 
 out_delete_evlist:
 	evlist__delete(evlist);
-	cpus	= NULL;
-	threads = NULL;
 out_free_cpus:
 	perf_cpu_map__put(cpus);
 out_free_threads:
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 7cff026..680c3cf 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -167,6 +167,8 @@
 
 out_err:
 	evlist__delete(evlist);
+	perf_cpu_map__put(cpus);
+	perf_thread_map__put(threads);
 	return err;
 }
 
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 0dbe3aa..8fd8a4e 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -129,9 +129,6 @@
 	if (type & PERF_SAMPLE_WEIGHT)
 		COMP(weight);
 
-	if (type & PERF_SAMPLE_WEIGHT_STRUCT)
-		COMP(ins_lat);
-
 	if (type & PERF_SAMPLE_DATA_SRC)
 		COMP(data_src);
 
@@ -245,7 +242,6 @@
 		.cgroup		= 114,
 		.data_page_size = 115,
 		.code_page_size = 116,
-		.ins_lat        = 117,
 		.aux_sample	= {
 			.size	= sizeof(aux_data),
 			.data	= (void *)aux_data,
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
index e5b824d..5ad3ca8 100755
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -140,10 +140,10 @@
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
 	sed -i -e "s|BASE|${base}|" ${config}
@@ -159,14 +159,14 @@
 	# check 1st session
 	# pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0
 	local line=`perf daemon --config ${config} -x: | head -2 | tail -1`
-	check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \
+	check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \
 			 ${base}/session-size/output ${base}/session-size/control \
 			 ${base}/session-size/ack "0"
 
 	# check 2nd session
 	# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
 	local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
-	check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+	check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
 			 ${base}/session-time/output ${base}/session-time/control \
 			 ${base}/session-time/ack "0"
 
@@ -190,10 +190,10 @@
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
 	sed -i -e "s|BASE|${base}|" ${config}
@@ -204,7 +204,7 @@
 	# check 2nd session
 	# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
 	local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
-	check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+	check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
 			 ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
 	local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
 
@@ -215,10 +215,10 @@
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 EOF
 
 	# TEST 1 - change config
@@ -238,7 +238,7 @@
 	# check reconfigured 2nd session
 	# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
 	local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
-	check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \
+	check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \
 			 ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
 
 	# TEST 2 - empty config
@@ -309,10 +309,10 @@
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
 	sed -i -e "s|BASE|${base}|" ${config}
@@ -361,7 +361,7 @@
 base=BASE
 
 [session-test]
-run = -e cpu-clock --switch-output
+run = -e cpu-clock --switch-output -m 1 sleep 10
 EOF
 
 	sed -i -e "s|BASE|${base}|" ${config}
@@ -400,10 +400,10 @@
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
 	sed -i -e "s|BASE|${base}|" ${config}
@@ -439,7 +439,7 @@
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 EOF
 
 	sed -i -e "s|BASE|${base}|" ${config}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index a49c9e2..7498884 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -42,8 +42,8 @@
 		.disabled = 1,
 		.freq = 1,
 	};
-	struct perf_cpu_map *cpus;
-	struct perf_thread_map *threads;
+	struct perf_cpu_map *cpus = NULL;
+	struct perf_thread_map *threads = NULL;
 	struct mmap *md;
 
 	attr.sample_freq = 500;
@@ -66,14 +66,11 @@
 	if (!cpus || !threads) {
 		err = -ENOMEM;
 		pr_debug("Not enough memory to create thread/cpu maps\n");
-		goto out_free_maps;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__set_maps(&evlist->core, cpus, threads);
 
-	cpus	= NULL;
-	threads = NULL;
-
 	if (evlist__open(evlist)) {
 		const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
 
@@ -129,10 +126,9 @@
 		err = -1;
 	}
 
-out_free_maps:
+out_delete_evlist:
 	perf_cpu_map__put(cpus);
 	perf_thread_map__put(threads);
-out_delete_evlist:
 	evlist__delete(evlist);
 	return err;
 }
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 15a2ab7..3ebaa75 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -574,10 +574,9 @@
 	if (evlist) {
 		evlist__disable(evlist);
 		evlist__delete(evlist);
-	} else {
-		perf_cpu_map__put(cpus);
-		perf_thread_map__put(threads);
 	}
+	perf_cpu_map__put(cpus);
+	perf_thread_map__put(threads);
 
 	return err;
 
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bbf94e4..4c2969d 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -75,14 +75,11 @@
 	if (!cpus || !threads) {
 		err = -ENOMEM;
 		pr_debug("Not enough memory to create thread/cpu maps\n");
-		goto out_free_maps;
+		goto out_delete_evlist;
 	}
 
 	perf_evlist__set_maps(&evlist->core, cpus, threads);
 
-	cpus	= NULL;
-	threads = NULL;
-
 	err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
 	if (err < 0) {
 		pr_debug("Couldn't run the workload!\n");
@@ -137,7 +134,7 @@
 		if (retry_count++ > 1000) {
 			pr_debug("Failed after retrying 1000 times\n");
 			err = -1;
-			goto out_free_maps;
+			goto out_delete_evlist;
 		}
 
 		goto retry;
@@ -148,10 +145,9 @@
 		err = -1;
 	}
 
-out_free_maps:
+out_delete_evlist:
 	perf_cpu_map__put(cpus);
 	perf_thread_map__put(threads);
-out_delete_evlist:
 	evlist__delete(evlist);
 	return err;
 }
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 28f51c4..d1e208b 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -102,6 +102,7 @@
 	TEST_ASSERT_VAL("failed to synthesize map",
 		!perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
 
+	perf_thread_map__put(threads);
 	return 0;
 }
 
@@ -109,12 +110,12 @@
 {
 	struct perf_thread_map *threads;
 	char *str;
-	int i;
 
 	TEST_ASSERT_VAL("failed to allocate map string",
 			asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
 
 	threads = thread_map__new_str(str, NULL, 0, false);
+	free(str);
 
 	TEST_ASSERT_VAL("failed to allocate thread_map",
 			threads);
@@ -141,9 +142,6 @@
 	TEST_ASSERT_VAL("failed to not remove thread",
 			thread_map__remove(threads, 0));
 
-	for (i = 0; i < threads->nr; i++)
-		zfree(&threads->map[i].comm);
-
-	free(threads);
+	perf_thread_map__put(threads);
 	return 0;
 }
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5121b4d..882cd1f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1306,6 +1306,7 @@
 		perf_evsel__free_fd(&evsel->core);
 		perf_evsel__free_id(&evsel->core);
 	}
+	perf_evlist__reset_id_hash(&evlist->core);
 }
 
 static int evlist__create_syswide_maps(struct evlist *evlist)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1bf7686..7ecbc8e 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,6 +46,7 @@
 #include "string2.h"
 #include "memswap.h"
 #include "util.h"
+#include "hashmap.h"
 #include "../perf-sys.h"
 #include "util/parse-branch-options.h"
 #include <internal/xyarray.h>
@@ -1390,7 +1391,9 @@
 	zfree(&evsel->group_name);
 	zfree(&evsel->name);
 	zfree(&evsel->pmu_name);
-	zfree(&evsel->per_pkg_mask);
+	evsel__zero_per_pkg(evsel);
+	hashmap__free(evsel->per_pkg_mask);
+	evsel->per_pkg_mask = NULL;
 	zfree(&evsel->metric_events);
 	perf_evsel__object.fini(evsel);
 }
@@ -2781,3 +2784,16 @@
 
 	return store_evsel_ids(evsel, evlist);
 }
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+	struct hashmap_entry *cur;
+	size_t bkt;
+
+	if (evsel->per_pkg_mask) {
+		hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+			free((char *)cur->key);
+
+		hashmap__clear(evsel->per_pkg_mask);
+	}
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4e8e49f..6026487 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -19,6 +19,7 @@
 union perf_event;
 struct bpf_counter_ops;
 struct target;
+struct hashmap;
 
 typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
 
@@ -112,7 +113,7 @@
 	bool			merged_stat;
 	bool			reset_group;
 	bool			errored;
-	unsigned long		*per_pkg_mask;
+	struct hashmap		*per_pkg_mask;
 	struct evsel		*leader;
 	struct list_head	config_terms;
 	int			err;
@@ -433,4 +434,5 @@
 
 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
 
+void evsel__zero_per_pkg(struct evsel *evsel);
 #endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4fe9e2a..20effdf 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1618,8 +1618,8 @@
 
 	fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
 	fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
-		    tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
-		    clockid_ns.tv_sec, clockid_ns.tv_nsec,
+		    tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+		    (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
 		    clockid_name(clockid));
 }
 
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 692e56d..fbc40a2 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -77,8 +77,7 @@
 	if (strstarts(filename, "/system/lib/")) {
 		char *ndk, *app;
 		const char *arch;
-		size_t ndk_length;
-		size_t app_length;
+		int ndk_length, app_length;
 
 		ndk = getenv("NDK_ROOT");
 		app = getenv("APP_PLATFORM");
@@ -106,8 +105,8 @@
 		if (new_length > PATH_MAX)
 			return false;
 		snprintf(newfilename, new_length,
-			"%s/platforms/%s/arch-%s/usr/lib/%s",
-			ndk, app, arch, libname);
+			"%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+			ndk_length, ndk, app_length, app, arch, libname);
 
 		return true;
 	}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d5b6aff..d57ac86 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -89,6 +89,7 @@
 %type <str> PE_EVENT_NAME
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
 %type <str> PE_DRV_CFG_TERM
+%type <str> event_pmu_name
 %destructor { free ($$); } <str>
 %type <term> event_term
 %destructor { parse_events_term__delete ($$); } <term>
@@ -272,8 +273,11 @@
 	   event_legacy_raw sep_dc |
 	   event_bpf_file
 
+event_pmu_name:
+PE_NAME | PE_PMU_EVENT_PRE
+
 event_pmu:
-PE_NAME opt_pmu_config
+event_pmu_name opt_pmu_config
 {
 	struct parse_events_state *parse_state = _parse_state;
 	struct parse_events_error *error = parse_state->error;
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 71b7535..845dd46 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -36,3 +36,4 @@
 util/units.c
 util/affinity.c
 util/rwsem.c
+util/hashmap.c
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0d5ad42..552b590 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -3140,7 +3140,7 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		if (sort__mode != SORT_MODE__MEMORY)
+		if (sort__mode != SORT_MODE__BRANCH)
 			return -EINVAL;
 
 		return __sort_dimension__add_output(list, sd);
@@ -3152,7 +3152,7 @@
 		if (strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
-		if (sort__mode != SORT_MODE__BRANCH)
+		if (sort__mode != SORT_MODE__MEMORY)
 			return -EINVAL;
 
 		return __sort_dimension__add_output(list, sd);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cce7a76..7f09cda 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -983,7 +983,7 @@
 	if (config->interval_clear)
 		puts(CONSOLE_CLEAR);
 
-	sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+	sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
 
 	if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
 		switch (config->aggr_mode) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5d8af29..c400f8d 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -13,6 +13,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
+#include "hashmap.h"
 #include <linux/zalloc.h>
 
 void update_stats(struct stats *stats, u64 val)
@@ -277,18 +278,29 @@
 	}
 }
 
-static void zero_per_pkg(struct evsel *counter)
+static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
 {
-	if (counter->per_pkg_mask)
-		memset(counter->per_pkg_mask, 0, cpu__max_cpu());
+	uint64_t *key = (uint64_t *) __key;
+
+	return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(const void *__key1, const void *__key2,
+			 void *ctx __maybe_unused)
+{
+	uint64_t *key1 = (uint64_t *) __key1;
+	uint64_t *key2 = (uint64_t *) __key2;
+
+	return *key1 == *key2;
 }
 
 static int check_per_pkg(struct evsel *counter,
 			 struct perf_counts_values *vals, int cpu, bool *skip)
 {
-	unsigned long *mask = counter->per_pkg_mask;
+	struct hashmap *mask = counter->per_pkg_mask;
 	struct perf_cpu_map *cpus = evsel__cpus(counter);
-	int s;
+	int s, d, ret = 0;
+	uint64_t *key;
 
 	*skip = false;
 
@@ -299,7 +311,7 @@
 		return 0;
 
 	if (!mask) {
-		mask = zalloc(cpu__max_cpu());
+		mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
 		if (!mask)
 			return -ENOMEM;
 
@@ -321,8 +333,25 @@
 	if (s < 0)
 		return -1;
 
-	*skip = test_and_set_bit(s, mask) == 1;
-	return 0;
+	/*
+	 * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+	 * We use hashmap(socket, die) to check the used socket+die pair.
+	 */
+	d = cpu_map__get_die(cpus, cpu, NULL).die;
+	if (d < 0)
+		return -1;
+
+	key = malloc(sizeof(*key));
+	if (!key)
+		return -ENOMEM;
+
+	*key = (uint64_t)d << 32 | s;
+	if (hashmap__find(mask, (void *)key, NULL))
+		*skip = true;
+	else
+		ret = hashmap__add(mask, (void *)key, (void *)1);
+
+	return ret;
 }
 
 static int
@@ -422,7 +451,7 @@
 	}
 
 	if (counter->per_pkg)
-		zero_per_pkg(counter);
+		evsel__zero_per_pkg(counter);
 
 	ret = process_counter_maps(config, counter);
 	if (ret)
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f507dff..8a01af7 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -361,6 +361,7 @@
 		pr_debug("error reading saved cmdlines\n");
 		goto out;
 	}
+	buf[ret] = '\0';
 
 	parse_saved_cmdline(pevent, buf, size);
 	ret = 0;