Merge local branch 'x86-codegen'

Merge trivial x86 code generation annoyances

 - Introduce helper macros for clang asm input problems

 - use said macros to improve trivially stupid code generation issues in
   bitops and array_index_mask_nospec

 - also improve codegen with 32-bit array index comparisons

None of these really matter, but I look at code generation and profiles
fairly regularly, and these misfeatures caused the generated code to
look really odd and distract from the real issues.

* branch 'x86-codegen' of local tree:
  x86: improve bitop code generation with clang
  x86: improve array_index_mask_nospec() code generation
  clang: work around asm input constraint problems
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 63bdc6b8..7b44b3c 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -33,20 +33,16 @@
  * Returns:
  *     0 - (index < size)
  */
-static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
-		unsigned long size)
-{
-	unsigned long mask;
-
-	asm volatile ("cmp %1,%2; sbb %0,%0;"
-			:"=r" (mask)
-			:"g"(size),"r" (index)
-			:"cc");
-	return mask;
-}
-
-/* Override the default implementation from linux/nospec.h. */
-#define array_index_mask_nospec array_index_mask_nospec
+#define array_index_mask_nospec(idx,sz) ({	\
+	typeof((idx)+(sz)) __idx = (idx);	\
+	typeof(__idx) __sz = (sz);		\
+	unsigned long __mask;			\
+	asm volatile ("cmp %1,%2; sbb %0,%0"	\
+			:"=r" (__mask)		\
+			:ASM_INPUT_G (__sz),	\
+			 "r" (__idx)		\
+			:"cc");			\
+	__mask; })
 
 /* Prevent speculative execution past this barrier. */
 #define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 990eb68..b96d459 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -250,7 +250,7 @@
 {
 	asm("rep; bsf %1,%0"
 		: "=r" (word)
-		: "rm" (word));
+		: ASM_INPUT_RM (word));
 	return word;
 }
 
@@ -297,7 +297,7 @@
 
 	asm("bsr %1,%0"
 	    : "=r" (word)
-	    : "rm" (word));
+	    : ASM_INPUT_RM (word));
 	return word;
 }
 
@@ -320,7 +320,7 @@
 	 */
 	asm("bsfl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (-1));
+	    : ASM_INPUT_RM (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsfl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -377,7 +377,7 @@
 	 */
 	asm("bsrl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (-1));
+	    : ASM_INPUT_RM (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsrl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -416,7 +416,7 @@
 	 */
 	asm("bsrq %1,%q0"
 	    : "+r" (bitpos)
-	    : "rm" (x));
+	    : ASM_INPUT_RM (x));
 	return bitpos + 1;
 }
 #else
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 49feac0..4c1a39d 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -118,3 +118,13 @@
 
 #define __diag_ignore_all(option, comment) \
 	__diag_clang(13, ignore, option)
+
+/*
+ * clang has horrible behavior with "g" or "rm" constraints for asm
+ * inputs, turning them into something worse than "m". Avoid using
+ * constraints with multiple possible uses (but "ir" seems to be ok):
+ *
+ *	https://github.com/llvm/llvm-project/issues/20571
+ */
+#define ASM_INPUT_G "ir"
+#define ASM_INPUT_RM "r"
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index d1a9dbb..93600de 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -409,6 +409,15 @@
 #define asm_goto_output(x...) asm volatile goto(x)
 #endif
 
+/*
+ * Clang has trouble with constraints with multiple
+ * alternative behaviors (mainly "g" and "rm").
+ */
+#ifndef ASM_INPUT_G
+  #define ASM_INPUT_G "g"
+  #define ASM_INPUT_RM "rm"
+#endif
+
 #ifdef CONFIG_CC_HAS_ASM_INLINE
 #define asm_inline asm __inline
 #else