diff --git a/build/moz.configure/toolchain.configure b/build/moz.configure/toolchain.configure index e1bf3a20a2bd..9eefb8a73212 100644 --- a/build/moz.configure/toolchain.configure +++ b/build/moz.configure/toolchain.configure @@ -3244,3 +3244,8 @@ set_config("SSSE3_FLAGS", ["-mssse3"]) set_config("SSE4_2_FLAGS", ["-msse4.2"]) set_config("FMA_FLAGS", ["-mfma"]) set_config("AVX2_FLAGS", ["-mavx2"]) +set_config( + "AVXVNNI_FLAGS", + ["-mavxvnni"], + try_compile(check_msg="for -mavxvnni support", flags=["-mavxvnni"]), +) diff --git a/js/src/intgemm/IntegerGemmIntrinsic.cpp b/js/src/intgemm/IntegerGemmIntrinsic.cpp index 800e6153b793..feb4e3787296 100644 --- a/js/src/intgemm/IntegerGemmIntrinsic.cpp +++ b/js/src/intgemm/IntegerGemmIntrinsic.cpp @@ -20,7 +20,10 @@ #include "wasm/WasmInstance.h" #include "wasm/WasmLog.h" -#if defined(USE_AVX2) +#if defined(USE_AVXVNNI) +# define SUPPORTED_ARCHS \ + xsimd::arch_list +#elif defined(USE_AVX2) # define SUPPORTED_ARCHS \ xsimd::arch_list #elif defined(USE_SSSE3) diff --git a/js/src/intgemm/moz.build b/js/src/intgemm/moz.build index cac050738729..653670a02f84 100644 --- a/js/src/intgemm/moz.build +++ b/js/src/intgemm/moz.build @@ -37,6 +37,12 @@ if CONFIG["INTEL_ARCHITECTURE"]: SOURCES[ "/third_party/gemmology/kernels/GemmologyEngineAVX2.cpp" ].flags += CONFIG["AVX2_FLAGS"] + if CONFIG["AVXVNNI_FLAGS"]: + DEFINES["USE_AVXVNNI"] = True + SOURCES += ["/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp"] + SOURCES[ + "/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp" + ].flags += CONFIG["AVXVNNI_FLAGS"] if CONFIG["TARGET_CPU"] == "aarch64": DEFINES["USE_NEON"] = True diff --git a/mozglue/misc/SSE.cpp b/mozglue/misc/SSE.cpp index ca0b4c3c86de..74f3917788d0 100644 --- a/mozglue/misc/SSE.cpp +++ b/mozglue/misc/SSE.cpp @@ -40,6 +40,20 @@ static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, return (regs[reg] & bits) == bits; } +static bool has_cpuid_bits_ex(unsigned int level, CPUIDRegister reg, + unsigned int bits) { + unsigned int regs[4]; + unsigned int eax, ebx, ecx, edx; + unsigned max = __get_cpuid_max(level & 0x80000000u, nullptr); + if (level > max) return false; + __cpuid_count(level, 1, eax, ebx, ecx, edx); + regs[0] = eax; + regs[1] = ebx; + regs[2] = ecx; + regs[3] = edx; + return (regs[reg] & bits) == bits; +} + #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64)) enum CPUIDRegister { eax = 0, ebx = 1, ecx = 2, edx = 3 }; @@ -48,12 +62,12 @@ static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, unsigned int bits) { // Check that the level in question is supported. int regs[4]; - __cpuid(regs, level & 0x80000000u); + __cpuid_ex(regs, level & 0x80000000u, 1); if (unsigned(regs[0]) < level) return false; // "The __cpuid intrinsic clears the ECX register before calling the cpuid // instruction." - __cpuid(regs, level); + __cpuid_ex(regs, level, 1); return (unsigned(regs[reg]) & bits) == bits; } @@ -77,6 +91,20 @@ static void moz_cpuid(int CPUInfo[4], int InfoType) { "D"(CPUInfo) // %edi : "%ecx", "%edx", "%esi"); } +static void moz_cpuid_ex(int CPUInfo[4], int InfoType) { + asm("xchg %esi, %ebx\n" + "movl 1, %ecx\n" + "cpuid\n" + "movl %eax, (%edi)\n" + "movl %ebx, 4(%edi)\n" + "movl %ecx, 8(%edi)\n" + "movl %edx, 12(%edi)\n" + "xchg %esi, %ebx\n" + : + : "a"(InfoType), // %eax + "D"(CPUInfo) // %edi + : "%ecx", "%edx", "%esi"); +} # else static void moz_cpuid(int CPUInfo[4], int InfoType) { asm("xchg %rsi, %rbx\n" @@ -92,6 +120,20 @@ static void moz_cpuid(int CPUInfo[4], int InfoType) { "D"(CPUInfo) // %rdi : "%ecx", "%edx", "%rsi"); } +static void moz_cpuid_ex(int CPUInfo[4], int InfoType) { + asm("xchg %rsi, %rbx\n" + "movl 1, %ecx\n" + "cpuid\n" + "movl %eax, (%rdi)\n" + "movl %ebx, 4(%rdi)\n" + "movl %ecx, 8(%rdi)\n" + "movl %edx, 12(%rdi)\n" + "xchg %rsi, %rbx\n" + : + : "a"(InfoType), // %eax + "D"(CPUInfo) // %rdi + : "%ecx", "%edx", "%rsi"); +} # endif static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, @@ -105,6 +147,17 @@ static bool has_cpuid_bits(unsigned int level, CPUIDRegister reg, return (unsigned(regs[reg]) & bits) == bits; } +static bool has_cpuid_bits_ex(unsigned int level, CPUIDRegister reg, + unsigned int bits) { + // Check that the level in question is supported. + volatile int regs[4]; + moz_cpuid_ex((int*)regs, level & 0x80000000u); + if (unsigned(regs[0]) < level) return false; + + moz_cpuid_ex((int*)regs, level); + return (unsigned(regs[reg]) & bits) == bits; +} + #endif // end CPUID declarations } // namespace @@ -179,6 +232,10 @@ bool avx_enabled = has_avx(); bool avx2_enabled = has_avx() && has_cpuid_bits(7u, ebx, (1u << 5)); # endif +# if !defined(MOZILLA_PRESUME_AVXVNNI) +bool avxvnni_enabled = has_cpuid_bits_ex(7u, eax, (1u << 4)); +# endif + # if !defined(MOZILLA_PRESUME_AES) bool aes_enabled = has_cpuid_bits(1u, ecx, (1u << 25)); # endif diff --git a/mozglue/misc/SSE.h b/mozglue/misc/SSE.h index 0b87366a8043..d7c7e4ae973a 100644 --- a/mozglue/misc/SSE.h +++ b/mozglue/misc/SSE.h @@ -138,6 +138,10 @@ // It's ok to use AVX instructions based on the -march option. # define MOZILLA_PRESUME_AVX2 1 # endif +# ifdef __AVXVNNI__ +// It's ok to use AVX instructions based on the -march option. +# define MOZILLA_PRESUME_AVXVNNI 1 +# endif # ifdef __AES__ // It's ok to use AES instructions based on the -march option. # define MOZILLA_PRESUME_AES 1 @@ -224,6 +228,9 @@ extern bool MFBT_DATA avx_enabled; # if !defined(MOZILLA_PRESUME_AVX2) extern bool MFBT_DATA avx2_enabled; # endif +# if !defined(MOZILLA_PRESUME_AVXVNNI) +extern bool MFBT_DATA avxvnni_enabled; +# endif # if !defined(MOZILLA_PRESUME_AES) extern bool MFBT_DATA aes_enabled; # endif @@ -350,6 +357,16 @@ inline bool supports_avx2() { return sse_private::avx2_enabled; } inline bool supports_avx2() { return false; } #endif +#if defined(MOZILLA_PRESUME_AVXVNNI) +# define MOZILLA_MAY_SUPPORT_AVXVNNI 1 +inline bool supports_avxvnni() { return true; } +#elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) +# define MOZILLA_MAY_SUPPORT_AVXVNNI 1 +inline bool supports_avxvnni() { return sse_private::avxvnni_enabled; } +#else +inline bool supports_avxvnni() { return false; } +#endif + #if defined(MOZILLA_PRESUME_AES) # define MOZILLA_MAY_SUPPORT_AES 1 inline bool supports_aes() { return true; } diff --git a/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp b/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp new file mode 100644 index 000000000000..c0a057346b9b --- /dev/null +++ b/third_party/gemmology/kernels/GemmologyEngineAVXVNNI.cpp @@ -0,0 +1,19 @@ +/* -*- mode: c++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* this source code form is subject to the terms of the mozilla public + * license, v. 2.0. if a copy of the mpl was not distributed with this file, + * You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +namespace gemmology { +template struct Engine; +template void Engine::SelectColumnsB(int8_t const*, int8_t*, + size_t, uint32_t const*, + uint32_t const*); +template void Engine::Shift::Multiply( + uint8_t const*, int8_t const*, size_t, size_t, size_t, + gemmology::callbacks::UnquantizeAndAddBiasAndWrite); +template void Engine::Shift::PrepareBias( + int8_t const*, size_t, size_t, + gemmology::callbacks::UnquantizeAndAddBiasAndWrite); +} // namespace gemmology