| From 8214899342981dbd49ae24aadbbd19e9e7830684 Mon Sep 17 00:00:00 2001 |
| From: Piotr Luc <piotr.luc@intel.com> |
| Date: Tue, 18 Oct 2016 17:01:11 +0200 |
| Subject: [PATCH] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features |
| |
| commit 8214899342981dbd49ae24aadbbd19e9e7830684 upstream. |
| |
| AVX512_4VNNIW - Vector instructions for deep learning enhanced word |
| variable precision. |
| AVX512_4FMAPS - Vector instructions for deep learning floating-point |
| single precision. |
| |
| These new instructions are to be used in future Intel Xeon & Xeon Phi |
| processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new |
| instructions are supported by a processor. |
| |
| The spec can be found in the Intel Software Developer Manual (SDM) or in |
| the Instruction Set Extensions Programming Reference (ISE). |
| |
| Define new feature flags to enumerate the new instructions in /proc/cpuinfo |
| accordingly to CPUID bits and add the required xsave extensions which are |
| required for proper operation. |
| |
| Signed-off-by: Piotr Luc <piotr.luc@intel.com> |
| Cc: Denys Vlasenko <dvlasenk@redhat.com> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Brian Gerst <brgerst@gmail.com> |
| Cc: Dave Hansen <dave.hansen@intel.com> |
| Cc: Borislav Petkov <bp@alien8.de> |
| Cc: Andy Lutomirski <luto@kernel.org> |
| Cc: Josh Poimboeuf <jpoimboe@redhat.com> |
| Cc: Linus Torvalds <torvalds@linux-foundation.org> |
| Link: http://lkml.kernel.org/r/20161018150111.29926-1-piotr.luc@intel.com |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| |
| diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h |
| index 1188bc849ee3..a39629206864 100644 |
| --- a/arch/x86/include/asm/cpufeatures.h |
| +++ b/arch/x86/include/asm/cpufeatures.h |
| @@ -194,6 +194,8 @@ |
| #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
| |
| #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
| +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ |
| +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
| |
| /* Virtualization flags: Linux defined, word 8 */ |
| #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
| diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c |
| index 8cb57df9398d..1db8dc490b66 100644 |
| --- a/arch/x86/kernel/cpu/scattered.c |
| +++ b/arch/x86/kernel/cpu/scattered.c |
| @@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) |
| |
| static const struct cpuid_bit cpuid_bits[] = { |
| { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, |
| + { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, |
| + { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, |
| { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, |
| { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, |
| { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, |
| diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c |
| index 124aa5c593f8..095ef7ddd6ae 100644 |
| --- a/arch/x86/kernel/fpu/xstate.c |
| +++ b/arch/x86/kernel/fpu/xstate.c |
| @@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void) |
| setup_clear_cpu_cap(X86_FEATURE_MPX); |
| setup_clear_cpu_cap(X86_FEATURE_XGETBV1); |
| setup_clear_cpu_cap(X86_FEATURE_PKU); |
| + setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); |
| + setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); |
| } |
| |
| /* |
| diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h |
| index 1188bc849ee3..a39629206864 100644 |
| --- a/tools/arch/x86/include/asm/cpufeatures.h |
| +++ b/tools/arch/x86/include/asm/cpufeatures.h |
| @@ -194,6 +194,8 @@ |
| #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ |
| |
| #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ |
| +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ |
| +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ |
| |
| /* Virtualization flags: Linux defined, word 8 */ |
| #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ |
| -- |
| 2.15.0 |
| |