Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 895782 Details for
Bug 933827
www-client/chromium-125.0.6422.112[system-toolchain]: "LLVM ERROR: Do not know how to split this operator's operand!" with sys-devel/clang-18.1.6
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
chromium-125.0.6422.112-skia-xnnpack-disable-avx512.patch
chromium-125.0.6422.112-skia-xnnpack-disable-avx512.patch (text/plain), 89.87 KB, created by
Matt Whitlock
on 2024-06-13 20:23:44 UTC
(
hide
)
Description:
chromium-125.0.6422.112-skia-xnnpack-disable-avx512.patch
Filename:
MIME Type:
Creator:
Matt Whitlock
Created:
2024-06-13 20:23:44 UTC
Size:
89.87 KB
patch
obsolete
>diff '--color=auto' -Naur a/skia/BUILD.gn b/skia/BUILD.gn >--- a/skia/BUILD.gn 2024-05-23 22:39:28.813508300 +0000 >+++ b/skia/BUILD.gn 2024-06-08 15:09:02.000000000 +0000 >@@ -769,32 +769,11 @@ > } > visibility = [ ":skcms" ] > } >- skia_source_set("skcms_TransformSkx") { >- sources = skcms_TransformSkx >- if (!is_win) { >- cflags = [ >- "-w", >- "-mavx512f", >- "-mavx512dq", >- "-mavx512cd", >- "-mavx512bw", >- "-mavx512vl", >- "-std=c11", >- ] >- } else { >- cflags = [ "/arch:AVX512" ] >- } >- visibility = [ ":skcms" ] >- } > } else { > skia_source_set("skcms_TransformHsw") { > sources = [] > visibility = [ ":skcms" ] > } >- skia_source_set("skcms_TransformSkx") { >- sources = [] >- visibility = [ ":skcms" ] >- } > } > > source_set("skcms_TransformBaseline_and_public") { >@@ -823,7 +802,6 @@ > deps = [ > ":skcms_TransformBaseline_and_public", > ":skcms_TransformHsw", >- ":skcms_TransformSkx", > ] > public = > rebase_path(skcms_public_headers, ".", "//third_party/skia/modules/skcms") >diff '--color=auto' -Naur a/third_party/skia/modules/skcms/skcms.cc b/third_party/skia/modules/skcms/skcms.cc >--- a/third_party/skia/modules/skcms/skcms.cc 2024-05-23 22:40:15.318143800 +0000 >+++ b/third_party/skia/modules/skcms/skcms.cc 2024-06-08 15:09:02.000000000 +0000 >@@ -2787,11 +2787,6 @@ > auto run = baseline::run_program; > switch (cpu_type()) { > case CpuType::SKX: >- #if !defined(SKCMS_DISABLE_SKX) >- run = skx::run_program; >- break; >- #endif >- > case CpuType::HSW: > #if !defined(SKCMS_DISABLE_HSW) > run = hsw::run_program; >diff '--color=auto' -Naur a/third_party/xnnpack/BUILD.gn b/third_party/xnnpack/BUILD.gn >--- a/third_party/xnnpack/BUILD.gn 2024-05-23 22:39:52.779897200 +0000 >+++ b/third_party/xnnpack/BUILD.gn 2024-06-08 15:09:02.000000000 +0000 >@@ -61,13 +61,7 @@ > if (current_cpu == "x64" || current_cpu == "x86") { > xnnpack_deps = [ > ":amalgam_avx-no-avx2-no-f16c-no-fma", >- ":amalgam_avx2-avxvnni-f16c-fma", >- ":amalgam_avx512f", > ":amalgam_f16c-fma-avx2", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vbmi", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni", > ":amalgam_f16c-fma-no-avx2", > ":amalgam_f16c-no-avx2-no-fma", > ":amalgam_sse2-no-sse3", >@@ -83,13 +77,7 @@ > > xnnpack_standalone_deps = [ > ":amalgam_avx-no-avx2-no-f16c-no-fma_standalone", >- ":amalgam_avx2-avxvnni-f16c-fma_standalone", >- ":amalgam_avx512f_standalone", > ":amalgam_f16c-fma-avx2_standalone", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vbmi_standalone", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni_standalone", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni_standalone", >- ":amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl_standalone", > ":amalgam_f16c-fma-no-avx2_standalone", > ":amalgam_f16c-no-avx2-no-fma_standalone", > ":amalgam_sse2-no-sse3_standalone", >@@ -304,102 +292,6 @@ > } > } > >- source_set("amalgam_avx2-avxvnni-f16c-fma") { >- cflags = [ >- "-mavx2", >- "-mavxvnni", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avxvnni.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- } >- >- # This is a target that cannot depend on //base. >- source_set("amalgam_avx2-avxvnni-f16c-fma_standalone") { >- cflags = [ >- "-mavx2", >- "-mavxvnni", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avxvnni.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool:pthreadpool_standalone", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- >- if (!(is_android && use_order_profiling)) { >- assert_no_deps = [ "//base" ] >- } >- } >- >- source_set("amalgam_avx512f") { >- cflags = [ "-mavx512f" ] >- >- sources = [ "src/src/amalgam/gen/avx512f.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- } >- >- # This is a target that cannot depend on //base. >- source_set("amalgam_avx512f_standalone") { >- cflags = [ "-mavx512f" ] >- >- sources = [ "src/src/amalgam/gen/avx512f.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool:pthreadpool_standalone", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- >- if (!(is_android && use_order_profiling)) { >- assert_no_deps = [ "//base" ] >- } >- } >- > source_set("amalgam_f16c-fma-avx2") { > cflags = [ > "-mavx2", >@@ -435,257 +327,6 @@ > > configs -= [ "//build/config/compiler:chromium_code" ] > configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool:pthreadpool_standalone", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- >- if (!(is_android && use_order_profiling)) { >- assert_no_deps = [ "//base" ] >- } >- } >- >- source_set("amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vl", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512skx.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- } >- >- # This is a target that cannot depend on //base. >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl_standalone") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vl", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512skx.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool:pthreadpool_standalone", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- >- if (!(is_android && use_order_profiling)) { >- assert_no_deps = [ "//base" ] >- } >- } >- >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vbmi") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vbmi", >- "-mavx512vl", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512vbmi.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- } >- >- # This is a target that cannot depend on //base. >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vbmi_standalone") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vbmi", >- "-mavx512vl", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512vbmi.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool:pthreadpool_standalone", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- >- if (!(is_android && use_order_profiling)) { >- assert_no_deps = [ "//base" ] >- } >- } >- >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vl", >- "-mavx512vnni", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512vnni.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- } >- >- # This is a target that cannot depend on //base. >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni_standalone") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vl", >- "-mavx512vnni", >- "-mf16c", >- "-mfma", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512vnni.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool:pthreadpool_standalone", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- >- if (!(is_android && use_order_profiling)) { >- assert_no_deps = [ "//base" ] >- } >- } >- >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vl", >- "-mavx512vnni", >- "-mf16c", >- "-mfma", >- "-mgfni", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512vnnigfni.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] >- configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] >- >- deps = [ >- "//third_party/cpuinfo", >- "//third_party/fp16", >- "//third_party/fxdiv", >- "//third_party/pthreadpool", >- ] >- >- public_configs = [ ":xnnpack_config" ] >- } >- >- # This is a target that cannot depend on //base. >- source_set( >- "amalgam_f16c-fma-avx512f-avx512cd-avx512bw-avx512dq-avx512vl-avx512vnni-gfni_standalone") { >- cflags = [ >- "-mavx512bw", >- "-mavx512cd", >- "-mavx512dq", >- "-mavx512f", >- "-mavx512vl", >- "-mavx512vnni", >- "-mf16c", >- "-mfma", >- "-mgfni", >- ] >- >- sources = [ "src/src/amalgam/gen/avx512vnnigfni.c" ] >- >- configs -= [ "//build/config/compiler:chromium_code" ] >- configs += [ "//build/config/compiler:no_chromium_code" ] > configs += [ "//build/config/sanitizers:cfi_icall_generalize_pointers" ] > > deps = [ >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/binary-elementwise-config.c b/third_party/xnnpack/src/src/configs/binary-elementwise-config.c >--- a/third_party/xnnpack/src/src/configs/binary-elementwise-config.c 2024-05-23 22:40:15.450157000 +0000 >+++ b/third_party/xnnpack/src/src/configs/binary-elementwise-config.c 2024-06-08 15:11:58.000000000 +0000 >@@ -338,13 +338,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__avx512f_u32; >- f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx512f_u32; >- f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx512f_u32; >- f32_vadd_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; >- f32_vadd_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vadd_minmax_ukernel__avx_u16; > f32_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx_u16; > f32_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vaddc_minmax_ukernel__avx_u16; >@@ -429,13 +423,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__avx512f_u32; >- f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__avx512f_u32; >- f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__avx512f_u32; >- f32_vdiv_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; >- f32_vdiv_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vdiv_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdiv_minmax_ukernel__avx_u16; > f32_vdiv_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vdivc_minmax_ukernel__avx_u16; > f32_vdiv_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrdivc_minmax_ukernel__avx_u16; >@@ -517,12 +505,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__avx512f_u32; >- f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx512f_u32; >- f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx512f_u32; >- f32_vmax_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vmax_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmax_ukernel__avx_u16; > f32_vmax_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx_u16; > f32_vmax_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmaxc_ukernel__avx_u16; >@@ -590,12 +573,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__avx512f_u32; >- f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx512f_u32; >- f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx512f_u32; >- f32_vmin_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vmin_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmin_ukernel__avx_u16; > f32_vmin_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx_u16; > f32_vmin_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vminc_ukernel__avx_u16; >@@ -666,13 +644,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__avx512f_u32; >- f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx512f_u32; >- f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx512f_u32; >- f32_vmul_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; >- f32_vmul_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vmul_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmul_minmax_ukernel__avx_u16; > f32_vmul_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx_u16; > f32_vmul_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vmulc_minmax_ukernel__avx_u16; >@@ -757,13 +729,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__avx512f_u32; >- f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__avx512f_u32; >- f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__avx512f_u32; >- f32_vsub_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; >- f32_vsub_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vsub_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsub_minmax_ukernel__avx_u16; > f32_vsub_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsubc_minmax_ukernel__avx_u16; > f32_vsub_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vrsubc_minmax_ukernel__avx_u16; >@@ -845,12 +811,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__avx512f_u32; >- f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx512f_u32; >- f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx512f_u32; >- f32_vsqrdiff_config.minmax.element_tile = 32; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_vsqrdiff_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiff_ukernel__avx_u16; > f32_vsqrdiff_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx_u16; > f32_vsqrdiff_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_f32_vsqrdiffc_ukernel__avx_u16; >@@ -907,13 +868,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__avx512skx_mul32_ld128_u16; >- qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_u16; >- qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_u16; >- qs8_vadd_config.init.qs8_add = xnn_init_qs8_add_minmax_avx512_params; >- qs8_vadd_config.minmax.element_tile = 16; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qs8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_u16; > qs8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_u16; > qs8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_u16; >@@ -1045,13 +1000,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__avx512skx_mul32_ld128_u16; >- qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_u16; >- qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx512skx_mul32_ld128_u16; >- qu8_vadd_config.init.qu8_add = xnn_init_qu8_add_minmax_avx512_params; >- qu8_vadd_config.minmax.element_tile = 16; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qu8_vadd_config.minmax.op_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vadd_minmax_ukernel__avx2_mul32_ld64_u16; > qu8_vadd_config.minmax.opc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_u16; > qu8_vadd_config.minmax.ropc_ukernel = (xnn_vbinary_ukernel_fn) xnn_qu8_vaddc_minmax_ukernel__avx2_mul32_ld64_u16; >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/dwconv-config.c b/third_party/xnnpack/src/src/configs/dwconv-config.c >--- a/third_party/xnnpack/src/src/configs/dwconv-config.c 2024-05-23 22:40:15.450157000 +0000 >+++ b/third_party/xnnpack/src/src/configs/dwconv-config.c 2024-06-08 15:12:51.000000000 +0000 >@@ -301,48 +301,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_f32_dwconv_minmax_ukernel_3p16c__avx512f; >- f32_dwconv_config[0].init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_dwconv_config[0].channel_tile = 16; >- f32_dwconv_config[0].channel_subtile = 16; >- f32_dwconv_config[0].channel_round = 1; >- f32_dwconv_config[0].primary_tile = 3; >- >- f32_dwconv_config[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_f32_dwconv_minmax_ukernel_4p16c__avx512f; >- f32_dwconv_config[1].init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_dwconv_config[1].channel_tile = 16; >- f32_dwconv_config[1].channel_subtile = 16; >- f32_dwconv_config[1].channel_round = 1; >- f32_dwconv_config[1].primary_tile = 4; >- >- f32_dwconv_config[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_f32_dwconv_minmax_ukernel_9p16c__avx512f; >- f32_dwconv_config[2].init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_dwconv_config[2].channel_tile = 16; >- f32_dwconv_config[2].channel_subtile = 16; >- f32_dwconv_config[2].channel_round = 1; >- f32_dwconv_config[2].primary_tile = 9; >- >- // Multipass microkernel "acc" value should match unipass and also match across different hardware config. >- // Accumulation (FMA) can produce different results, which results in tests only failing on certain platforms. >- #if XNN_ENABLE_DWCONV_MULTIPASS >- f32_dwconv_config[3].minmax.multipass = (xnn_dwconv_multipass_ukernel_fn) xnn_f32_dwconv_minmax_ukernel_5f5m5l32c16s1r__avx512f; >- f32_dwconv_config[3].init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_dwconv_config[3].channel_tile = 32; >- f32_dwconv_config[3].channel_subtile = 16; >- f32_dwconv_config[3].channel_round = 1; >- f32_dwconv_config[3].primary_tile = 5; >- f32_dwconv_config[3].middle_tile = 5; >- f32_dwconv_config[3].last_tile = 5; >- #else >- f32_dwconv_config[3].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_f32_dwconv_minmax_ukernel_25p16c__avx512f; >- f32_dwconv_config[3].init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_dwconv_config[3].channel_tile = 16; >- f32_dwconv_config[3].channel_subtile = 16; >- f32_dwconv_config[3].channel_round = 1; >- f32_dwconv_config[3].primary_tile = 25; >- #endif // XNN_ENABLE_DWCONV_MULTIPASS >- } else if (hardware_config->use_x86_fma3) { >+ if (hardware_config->use_x86_fma3) { > f32_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_f32_dwconv_minmax_ukernel_3p16c__fma3; > f32_dwconv_config[0].init.f32 = xnn_init_f32_minmax_avx_params; > f32_dwconv_config[0].channel_tile = 16; >@@ -768,23 +727,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qs8_qc8w_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_3p32c__avx512skx_mul32; >- qs8_qc8w_dwconv_config[0].init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512_params; >- qs8_qc8w_dwconv_config[0].channel_tile = 32; >- qs8_qc8w_dwconv_config[0].channel_subtile = 32; >- qs8_qc8w_dwconv_config[0].channel_round = 1; >- qs8_qc8w_dwconv_config[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_9p32c__avx512skx_mul32; >- qs8_qc8w_dwconv_config[1].init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512_params; >- qs8_qc8w_dwconv_config[1].channel_tile = 32; >- qs8_qc8w_dwconv_config[1].channel_subtile = 32; >- qs8_qc8w_dwconv_config[1].channel_round = 1; >- qs8_qc8w_dwconv_config[2].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_25p32c__avx512skx_mul32; >- qs8_qc8w_dwconv_config[2].init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512_params; >- qs8_qc8w_dwconv_config[2].channel_tile = 32; >- qs8_qc8w_dwconv_config[2].channel_subtile = 32; >- qs8_qc8w_dwconv_config[2].channel_round = 1; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qs8_qc8w_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_qc8w_dwconv_minmax_fp32_ukernel_3p16c__avx2_mul32; > qs8_qc8w_dwconv_config[0].init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx2_params; > qs8_qc8w_dwconv_config[0].channel_tile = 16; >@@ -982,18 +925,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qs8_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_dwconv_minmax_fp32_ukernel_9p32c__avx512skx_mul32; >- qs8_dwconv_config[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx512_params; >- qs8_dwconv_config[0].channel_tile = 32; >- qs8_dwconv_config[0].channel_subtile = 32; >- qs8_dwconv_config[0].channel_round = 1; >- qs8_dwconv_config[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_dwconv_minmax_fp32_ukernel_25p32c__avx512skx_mul32; >- qs8_dwconv_config[1].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx512_params; >- qs8_dwconv_config[1].channel_tile = 32; >- qs8_dwconv_config[1].channel_subtile = 32; >- qs8_dwconv_config[1].channel_round = 1; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qs8_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qs8_dwconv_minmax_fp32_ukernel_9p16c__avx2_mul32; > qs8_dwconv_config[0].init.qs8 = xnn_init_qs8_conv_minmax_fp32_avx2_params; > qs8_dwconv_config[0].channel_tile = 16; >@@ -1146,18 +1078,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qu8_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qu8_dwconv_minmax_fp32_ukernel_9p32c__avx512skx_mul32; >- qu8_dwconv_config[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params; >- qu8_dwconv_config[0].channel_tile = 32; >- qu8_dwconv_config[0].channel_subtile = 32; >- qu8_dwconv_config[0].channel_round = 1; >- qu8_dwconv_config[1].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qu8_dwconv_minmax_fp32_ukernel_25p32c__avx512skx_mul32; >- qu8_dwconv_config[1].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params; >- qu8_dwconv_config[1].channel_tile = 32; >- qu8_dwconv_config[1].channel_subtile = 32; >- qu8_dwconv_config[1].channel_round = 1; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qu8_dwconv_config[0].minmax.unipass = (xnn_dwconv_unipass_ukernel_fn) xnn_qu8_dwconv_minmax_fp32_ukernel_9p16c__avx2_mul32; > qu8_dwconv_config[0].init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx2_params; > qu8_dwconv_config[0].channel_tile = 16; >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/gemm-config.c b/third_party/xnnpack/src/src/configs/gemm-config.c >--- a/third_party/xnnpack/src/src/configs/gemm-config.c 2024-05-23 22:40:15.474159500 +0000 >+++ b/third_party/xnnpack/src/src/configs/gemm-config.c 2024-06-08 15:14:37.000000000 +0000 >@@ -768,17 +768,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast); >- f32_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast); >- f32_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast); >- f32_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast); >- f32_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_f32_gemm_gio_w; >- f32_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_x32_packw_gemm_goi_ukernel_x16__avx512f_u4_prfm; >- f32_gemm_config.mr = 7; >- f32_gemm_config.nr = 16; >- } else if (hardware_config->use_x86_fma3) { >+ if (hardware_config->use_x86_fma3) { > switch (cpuinfo_get_core(0)->uarch) { > case cpuinfo_uarch_zen: > case cpuinfo_uarch_dhyana: >@@ -1143,14 +1133,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- f32_qc4w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc4w_gemm_minmax_ukernel_1x32__avx512skx_broadcast); >- f32_qc4w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc4w_gemm_minmax_ukernel_7x32__avx512skx_broadcast); >- f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512_params; >- f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_f32_qc4w_gemm_goi_w; >- f32_qc4w_gemm_config.mr = 7; >- f32_qc4w_gemm_config.nr = 32; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_qc4w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc4w_gemm_minmax_ukernel_1x16__avx2_broadcast); > f32_qc4w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc4w_gemm_minmax_ukernel_3x16__avx2_broadcast); > f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx_params; >@@ -1303,15 +1286,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- f32_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc8w_gemm_minmax_ukernel_1x32__avx512skx_broadcast); >- f32_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc8w_gemm_minmax_ukernel_7x32__avx512skx_broadcast); >- f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; >- f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_f32_qs8w_gemm_gio_w; >- f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_x8_packw_gemm_goi_ukernel_x32__scalar_int_u2; >- f32_qc8w_gemm_config.mr = 7; >- f32_qc8w_gemm_config.nr = 32; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc8w_gemm_minmax_ukernel_1x16__avx2_broadcast); > f32_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_f32_qc8w_gemm_minmax_ukernel_5x16__avx2_broadcast); > f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_avx_params; >@@ -1498,49 +1473,7 @@ > #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnnigfni) { >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx512vnnigfni_prfm); >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx512vnnigfni_prfm); >- qd8_f16_qc4w_gemm_config.init.f16_qc4w = xnn_init_f16_qc4w_minmax_avxvnni_params; >- qd8_f16_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f16_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f16_qc4w_gemm_config.mr = 7; >- qd8_f16_qc4w_gemm_config.nr = 8; >- qd8_f16_qc4w_gemm_config.log2_kr = 3; >- qd8_f16_qc4w_gemm_config.planes = 2; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx512vnni_prfm); >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_7x8c8__avx512vnni_prfm); >- qd8_f16_qc4w_gemm_config.init.f16_qc4w = xnn_init_f16_qc4w_minmax_avxvnni_params; >- qd8_f16_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f16_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f16_qc4w_gemm_config.mr = 7; >- qd8_f16_qc4w_gemm_config.nr = 8; >- qd8_f16_qc4w_gemm_config.log2_kr = 3; >- qd8_f16_qc4w_gemm_config.planes = 2; >- #if XNN_ENABLE_AVXVNNI >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avxvnni) { >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm); >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm); >- qd8_f16_qc4w_gemm_config.init.f16_qc4w = xnn_init_f16_qc4w_minmax_avxvnni_params; >- qd8_f16_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f16_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f16_qc4w_gemm_config.mr = 5; >- qd8_f16_qc4w_gemm_config.nr = 8; >- qd8_f16_qc4w_gemm_config.log2_kr = 3; >- qd8_f16_qc4w_gemm_config.planes = 2; >- #endif >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx512skx); >- qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_5x8c8__avx512skx); >- qd8_f16_qc4w_gemm_config.init.f16_qc4w = xnn_init_f16_qc4w_minmax_avx_params; >- qd8_f16_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f16_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f16_qc4w_gemm_config.mr = 5; >- qd8_f16_qc4w_gemm_config.nr = 8; >- qd8_f16_qc4w_gemm_config.log2_kr = 3; >- qd8_f16_qc4w_gemm_config.planes = 2; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_1x8c8__avx2); > qd8_f16_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc4w_gemm_minmax_ukernel_3x8c8__avx2); > qd8_f16_qc4w_gemm_config.init.f16_qc4w = xnn_init_f16_qc4w_minmax_avx_params; >@@ -1643,69 +1576,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnnigfni) { >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnnigfni_prfm); >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnnigfni_prfm); >- qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512vnni_params; >- qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f32_qc4w_gemm_config.mr = 7; >- qd8_f32_qc4w_gemm_config.nr = 16; >- qd8_f32_qc4w_gemm_config.log2_kr = 3; >- qd8_f32_qc4w_gemm_config.planes = 2; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512vnni_prfm); >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512vnni_prfm); >- qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512vnni_params; >- qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f32_qc4w_gemm_config.mr = 7; >- qd8_f32_qc4w_gemm_config.nr = 16; >- qd8_f32_qc4w_gemm_config.log2_kr = 3; >- qd8_f32_qc4w_gemm_config.planes = 2; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnnigfni) { >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx512vnnigfni_prfm); >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx512vnnigfni_prfm); >- qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512vnni_params; >- qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f32_qc4w_gemm_config.mr = 7; >- qd8_f32_qc4w_gemm_config.nr = 8; >- qd8_f32_qc4w_gemm_config.log2_kr = 3; >- qd8_f32_qc4w_gemm_config.planes = 2; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx512vnni_prfm); >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x8c8__avx512vnni_prfm); >- qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx512vnni_params; >- qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f32_qc4w_gemm_config.mr = 7; >- qd8_f32_qc4w_gemm_config.nr = 8; >- qd8_f32_qc4w_gemm_config.log2_kr = 3; >- qd8_f32_qc4w_gemm_config.planes = 2; >- #if XNN_ENABLE_AVXVNNI >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avxvnni) { >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm); >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm); >- qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avxvnni_params; >- qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f32_qc4w_gemm_config.mr = 5; >- qd8_f32_qc4w_gemm_config.nr = 8; >- qd8_f32_qc4w_gemm_config.log2_kr = 3; >- qd8_f32_qc4w_gemm_config.planes = 2; >- #endif >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x16c8__avx512skx_prfm); >- qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_7x16c8__avx512skx_prfm); >- qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_scalar_params; >- qd8_f32_qc4w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_qc4w_gemm_gio_w; >- qd8_f32_qc4w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_qc4w_gemm_goi_w; >- qd8_f32_qc4w_gemm_config.mr = 7; >- qd8_f32_qc4w_gemm_config.nr = 16; >- qd8_f32_qc4w_gemm_config.log2_kr = 3; >- qd8_f32_qc4w_gemm_config.planes = 2; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_1x8c8__avx2); > qd8_f32_qc4w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc4w_gemm_minmax_ukernel_3x8c8__avx2); > qd8_f32_qc4w_gemm_config.init.f32_qc4w = xnn_init_f32_qc4w_minmax_avx_params; >@@ -2093,43 +1964,7 @@ > #elif (XNN_ARCH_X86 || XNN_ARCH_X86_64) && !XNN_PLATFORM_MOBILE > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_1x8c8__avx512vnni_prfm); >- qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_7x8c8__avx512vnni_prfm); >- qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_1x8c8__avx512vnni_prfm); >- qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_7x8c8__avx512vnni_prfm); >- qd8_f16_qc8w_gemm_config.init.f16 = xnn_init_f16_minmax_avxvnni_params; >- qd8_f16_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f16_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f16_qc8w_gemm_config.mr = 7; >- qd8_f16_qc8w_gemm_config.nr = 8; >- qd8_f16_qc8w_gemm_config.log2_kr = 3; >- #if XNN_ENABLE_AVXVNNI >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avxvnni) { >- // AVX VNNI should be checked before AVX512SKX as it performs better with VNNI microkernels >- qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm); >- qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm); >- qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_1x8c8__avxvnni_prfm); >- qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_5x8c8__avxvnni_prfm); >- qd8_f16_qc8w_gemm_config.init.f16 = xnn_init_f16_minmax_avxvnni_params; >- qd8_f16_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f16_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f16_qc8w_gemm_config.mr = 5; >- qd8_f16_qc8w_gemm_config.nr = 8; >- qd8_f16_qc8w_gemm_config.log2_kr = 3; >- #endif >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_1x8c8__avx512skx); >- qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_5x8c8__avx512skx); >- qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_1x8c8__avx512skx); >- qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_5x8c8__avx512skx); >- qd8_f16_qc8w_gemm_config.init.f16 = xnn_init_f16_minmax_avx_params; >- qd8_f16_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f16_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f16_qc8w_gemm_config.mr = 5; >- qd8_f16_qc8w_gemm_config.nr = 8; >- qd8_f16_qc8w_gemm_config.log2_kr = 3; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_1x8c8__avx2); > qd8_f16_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f16_qc8w_gemm_minmax_ukernel_3x8c8__avx2); > qd8_f16_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f16_qc8w_igemm_minmax_ukernel_1x8c8__avx2); >@@ -2465,68 +2300,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- #if XNN_ENABLE_AVX512AMX >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512amx) { >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x16c4__avx512amx); >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_7x16c4__avx512amx); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x16c4__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_7x16c4__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_avx512vnni_params; >- qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f32_qc8w_gemm_config.mr = 7; >- qd8_f32_qc8w_gemm_config.nr = 16; >- qd8_f32_qc8w_gemm_config.log2_kr = 2; >- } else >- #endif >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x16c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_7x16c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x16c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_7x16c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_avx512vnni_params; >- qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f32_qc8w_gemm_config.mr = 7; >- qd8_f32_qc8w_gemm_config.nr = 16; >- qd8_f32_qc8w_gemm_config.log2_kr = 3; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x8c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_7x8c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x8c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_7x8c8__avx512vnni_prfm); >- qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_avxvnni_params; >- qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f32_qc8w_gemm_config.mr = 7; >- qd8_f32_qc8w_gemm_config.nr = 8; >- qd8_f32_qc8w_gemm_config.log2_kr = 3; >- #if XNN_ENABLE_AVXVNNI >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avxvnni) { >- // AVX VNNI should be checked before AVX512SKX as it performs better with VNNI microkernels >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x8c8__avxvnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_5x8c8__avxvnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x8c8__avxvnni_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_5x8c8__avxvnni_prfm); >- qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_avxvnni_params; >- qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f32_qc8w_gemm_config.mr = 5; >- qd8_f32_qc8w_gemm_config.nr = 8; >- qd8_f32_qc8w_gemm_config.log2_kr = 3; >- #endif >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x16c8__avx512skx_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_7x16c8__avx512skx_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x16c8__avx512skx_prfm); >- qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_7x16c8__avx512skx_prfm); >- qd8_f32_qc8w_gemm_config.init.f32 = xnn_init_f32_minmax_scalar_params; >- qd8_f32_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qd8_f32_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qd8_f32_qc8w_gemm_config.mr = 7; >- qd8_f32_qc8w_gemm_config.nr = 16; >- qd8_f32_qc8w_gemm_config.log2_kr = 3; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_1x8c8__avx2); > qd8_f32_qc8w_gemm_config.minmax.dqgemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_dqgemm_ukernel((xnn_dqgemm_ukernel_fn) xnn_qd8_f32_qc8w_gemm_minmax_ukernel_3x8c8__avx2); > qd8_f32_qc8w_gemm_config.minmax.dqigemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_dqigemm_ukernel((xnn_dqigemm_ukernel_fn) xnn_qd8_f32_qc8w_igemm_minmax_ukernel_1x8c8__avx2); >@@ -3234,79 +3008,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x16c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x16c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x16c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_7x16c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512vnni_params; >- qs8_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_to_qu8_gemm_gio_w; >- qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_to_qu8_gemm_goi_w; >- qs8_qc8w_gemm_config.pack_igemm_goki = (xnn_pack_conv_goki_w_fn) xnn_pack_qs8_to_qu8_conv_goki_w; >- qs8_qc8w_gemm_config.pack_igemm_kgo = (xnn_pack_conv_kgo_w_fn) xnn_pack_qs8_to_qu8_conv_kgo_w; >- qs8_qc8w_gemm_config.pack_deconv_goki = (xnn_pack_deconv_goki_w_fn) xnn_pack_qs8_to_qu8_deconv_goki_w; >- qs8_qc8w_gemm_config.mr = 7; >- qs8_qc8w_gemm_config.nr = 16; >- qs8_qc8w_gemm_config.log2_kr = 3; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512vnni) { >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x8c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x8c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_7x8c8__avx512vnni_prfm); >- qs8_qc8w_gemm_config.init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avxvnni_params; >- qs8_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_to_qu8_gemm_gio_w; >- qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_to_qu8_gemm_goi_w; >- qs8_qc8w_gemm_config.pack_igemm_goki = (xnn_pack_conv_goki_w_fn) xnn_pack_qs8_to_qu8_conv_goki_w; >- qs8_qc8w_gemm_config.pack_igemm_kgo = (xnn_pack_conv_kgo_w_fn) xnn_pack_qs8_to_qu8_conv_kgo_w; >- qs8_qc8w_gemm_config.pack_deconv_goki = (xnn_pack_deconv_goki_w_fn) xnn_pack_qs8_to_qu8_deconv_goki_w; >- qs8_qc8w_gemm_config.mr = 7; >- qs8_qc8w_gemm_config.nr = 8; >- qs8_qc8w_gemm_config.log2_kr = 3; >- #if XNN_ENABLE_AVXVNNI >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avxvnni) { >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avxvnni_prfm); >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__avxvnni_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x8c8__avxvnni_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(5)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_5x8c8__avxvnni_prfm); >- qs8_qc8w_gemm_config.init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512vnni_params; >- qs8_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_to_qu8_gemm_gio_w; >- qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_to_qu8_gemm_goi_w; >- qs8_qc8w_gemm_config.pack_igemm_goki = (xnn_pack_conv_goki_w_fn) xnn_pack_qs8_to_qu8_conv_goki_w; >- qs8_qc8w_gemm_config.pack_igemm_kgo = (xnn_pack_conv_kgo_w_fn) xnn_pack_qs8_to_qu8_conv_kgo_w; >- qs8_qc8w_gemm_config.pack_deconv_goki = (xnn_pack_deconv_goki_w_fn) xnn_pack_qs8_to_qu8_deconv_goki_w; >- qs8_qc8w_gemm_config.mr = 5; >- qs8_qc8w_gemm_config.nr = 8; >- qs8_qc8w_gemm_config.log2_kr = 3; >- #endif >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm); >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm); >- qs8_qc8w_gemm_config.init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512_params; >- qs8_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qs8_qc8w_gemm_config.pack_igemm_goki = (xnn_pack_conv_goki_w_fn) xnn_pack_qs8_conv_goki_w; >- qs8_qc8w_gemm_config.pack_igemm_kgo = (xnn_pack_conv_kgo_w_fn) xnn_pack_qs8_conv_kgo_w; >- qs8_qc8w_gemm_config.pack_deconv_goki = (xnn_pack_deconv_goki_w_fn) xnn_pack_qs8_deconv_goki_w; >- qs8_qc8w_gemm_config.mr = 7; >- qs8_qc8w_gemm_config.nr = 16; >- qs8_qc8w_gemm_config.log2_kr = 3; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avx512skx); >- qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_4x8c8__avx512skx); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x8c8__avx512skx); >- qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_4x8c8__avx512skx); >- qs8_qc8w_gemm_config.init.qs8_qc8w = xnn_init_qs8_qc8w_conv_minmax_fp32_avx512_params; >- qs8_qc8w_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qs8_gemm_gio_w; >- qs8_qc8w_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qs8_gemm_goi_w; >- qs8_qc8w_gemm_config.pack_igemm_goki = (xnn_pack_conv_goki_w_fn) xnn_pack_qs8_conv_goki_w; >- qs8_qc8w_gemm_config.pack_igemm_kgo = (xnn_pack_conv_kgo_w_fn) xnn_pack_qs8_conv_kgo_w; >- qs8_qc8w_gemm_config.pack_deconv_goki = (xnn_pack_deconv_goki_w_fn) xnn_pack_qs8_deconv_goki_w; >- qs8_qc8w_gemm_config.mr = 4; // TODO: upgrade to 5x8 prfm when supported >- qs8_qc8w_gemm_config.nr = 8; >- qs8_qc8w_gemm_config.log2_kr = 3; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__avx2); > qs8_qc8w_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_3x8c8__avx2); > qs8_qc8w_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qs8_qc8w_igemm_minmax_fp32_ukernel_1x8c8__avx2); >@@ -3696,29 +3398,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qu8_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qu8_gemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm); >- qu8_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qu8_gemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm); >- qu8_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qu8_igemm_minmax_fp32_ukernel_1x16c8__avx512skx_prfm); >- qu8_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(7)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qu8_igemm_minmax_fp32_ukernel_7x16c8__avx512skx_prfm); >- qu8_gemm_config.init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params; >- qu8_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qu8_gemm_gio_w; >- qu8_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qu8_gemm_goi_w; >- qu8_gemm_config.mr = 7; >- qu8_gemm_config.nr = 16; >- qu8_gemm_config.log2_kr = 3; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- qu8_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qu8_gemm_minmax_fp32_ukernel_1x8c8__avx512skx); >- qu8_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qu8_gemm_minmax_fp32_ukernel_4x8c8__avx512skx); >- qu8_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx512skx); >- qu8_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(4)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qu8_igemm_minmax_fp32_ukernel_4x8c8__avx512skx); >- qu8_gemm_config.init.qu8 = xnn_init_qu8_conv_minmax_fp32_avx512_params; >- qu8_gemm_config.pack_gemm_gio = (xnn_packw_gemm_gio_ukernel_fn) xnn_pack_qu8_gemm_gio_w; >- qu8_gemm_config.pack_gemm_goi = (xnn_packw_gemm_goi_ukernel_fn) xnn_pack_qu8_gemm_goi_w; >- qu8_gemm_config.mr = 4; // TODO: upgrade to 5x8 prfm when supported >- qu8_gemm_config.nr = 8; >- qu8_gemm_config.log2_kr = 3; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qu8_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qu8_gemm_minmax_fp32_ukernel_1x8c8__avx2); > qu8_gemm_config.minmax.gemm[XNN_MR_TO_INDEX(3)] = xnn_init_hmp_gemm_ukernel((xnn_gemm_ukernel_fn) xnn_qu8_gemm_minmax_fp32_ukernel_3x8c8__avx2); > qu8_gemm_config.minmax.igemm[XNN_MR_TO_INDEX(1)] = xnn_init_hmp_igemm_ukernel((xnn_igemm_ukernel_fn) xnn_qu8_igemm_minmax_fp32_ukernel_1x8c8__avx2); >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/hardware-config.c b/third_party/xnnpack/src/src/configs/hardware-config.c >--- a/third_party/xnnpack/src/src/configs/hardware-config.c 2024-05-23 22:40:15.474159500 +0000 >+++ b/third_party/xnnpack/src/src/configs/hardware-config.c 2024-06-08 15:15:52.000000000 +0000 >@@ -112,16 +112,15 @@ > hardware_config.use_x86_f16c = cpuinfo_has_x86_f16c(); > hardware_config.use_x86_fma3 = cpuinfo_has_x86_fma3(); > hardware_config.use_x86_avx2 = cpuinfo_has_x86_avx2(); >- hardware_config.use_x86_avx512f = cpuinfo_has_x86_avx512f(); >- hardware_config.use_x86_avx512skx = hardware_config.use_x86_avx512f && >- cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512vl(); >- hardware_config.use_x86_avx512vbmi = hardware_config.use_x86_avx512skx && cpuinfo_has_x86_avx512vbmi(); >- hardware_config.use_x86_avx512vnni = hardware_config.use_x86_avx512skx && cpuinfo_has_x86_avx512vnni(); >- hardware_config.use_x86_avx512vnnigfni = hardware_config.use_x86_avx512vnni && cpuinfo_has_x86_gfni(); >+ hardware_config.use_x86_avx512f = 0; >+ hardware_config.use_x86_avx512skx = 0; >+ hardware_config.use_x86_avx512vbmi = 0; >+ hardware_config.use_x86_avx512vnni = 0; >+ hardware_config.use_x86_avx512vnnigfni = 0; > #if XNN_ENABLE_AVX512AMX > // TODO(fbarchard): Use cpuinfo_has_x86_amx_int8 when available. > // Infer AMX support from Sapphire Rapids having fp16 and amx. >- hardware_config.use_x86_avx512amx = hardware_config.use_x86_avx512vnnigfni && cpuinfo_has_x86_avx512fp16(); >+ hardware_config.use_x86_avx512amx = 0; > #if XNN_ARCH_X86_64 && defined(__linux__) && !defined(CHROMIUM) > if (hardware_config.use_x86_avx512amx) { > size_t status = xnn_syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA, 0); >@@ -135,7 +134,7 @@ > hardware_config.use_x86_avx512amx = 0; > #endif > #if XNN_ENABLE_AVXVNNI >- hardware_config.use_x86_avxvnni = hardware_config.use_x86_avx2 && cpuinfo_has_x86_avxvnni(); >+ hardware_config.use_x86_avxvnni = 0; > #else > hardware_config.use_x86_avxvnni = 0; > #endif >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/prelu-config.c b/third_party/xnnpack/src/src/configs/prelu-config.c >--- a/third_party/xnnpack/src/src/configs/prelu-config.c 2024-05-23 22:40:15.474159500 +0000 >+++ b/third_party/xnnpack/src/src/configs/prelu-config.c 2024-06-08 15:09:02.000000000 +0000 >@@ -77,11 +77,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_prelu_config.ukernel = (xnn_prelu_ukernel_fn) xnn_f32_prelu_ukernel__avx512f_2x16; >- f32_prelu_config.row_tile = 2; >- f32_prelu_config.channel_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_prelu_config.ukernel = (xnn_prelu_ukernel_fn) xnn_f32_prelu_ukernel__avx_2x16; > f32_prelu_config.row_tile = 2; > f32_prelu_config.channel_tile = 16; >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/reduce-config.c b/third_party/xnnpack/src/src/configs/reduce-config.c >--- a/third_party/xnnpack/src/src/configs/reduce-config.c 2024-05-23 22:40:15.474159500 +0000 >+++ b/third_party/xnnpack/src/src/configs/reduce-config.c 2024-06-08 15:09:02.000000000 +0000 >@@ -106,12 +106,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rminmax_config = (struct xnn_reduce_config) { >- .ukernel = (xnn_reduce_ukernel_fn) xnn_f32_rminmax_ukernel__avx512f_u64_acc4, >- .element_tile = 64, >- }; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rminmax_config = (struct xnn_reduce_config) { > .ukernel = (xnn_reduce_ukernel_fn) xnn_f32_rminmax_ukernel__avx_u32_acc4, > .init.f32_default = xnn_init_f32_default_avx_params, >@@ -173,13 +168,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rsum_config = (struct xnn_reduce_config) { >- .ukernel = (xnn_reduce_ukernel_fn) xnn_f32_rsum_ukernel__avx512f_u64_acc4, >- .init.f32_scale = xnn_init_f32_scale_scalar_params, >- .element_tile = 64, >- }; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rsum_config = (struct xnn_reduce_config) { > .ukernel = (xnn_reduce_ukernel_fn) xnn_f32_rsum_ukernel__avx_u32_acc4, > .init.f32_scale = xnn_init_f32_scale_avx_params, >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/rmax-config.c b/third_party/xnnpack/src/src/configs/rmax-config.c >--- a/third_party/xnnpack/src/src/configs/rmax-config.c 2024-05-23 22:40:15.474159500 +0000 >+++ b/third_party/xnnpack/src/src/configs/rmax-config.c 2024-06-08 15:09:02.000000000 +0000 >@@ -68,9 +68,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512f) { >- f32_rmax_config.ukernel = (xnn_rmax_ukernel_fn) xnn_f32_rmax_ukernel__avx512f_u64_acc4; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rmax_config.ukernel = (xnn_rmax_ukernel_fn) xnn_f32_rmax_ukernel__avx_u32_acc4; > f32_rmax_config.init.f32 = xnn_init_f32_default_avx_params; > } else { >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/unary-elementwise-config.c b/third_party/xnnpack/src/src/configs/unary-elementwise-config.c >--- a/third_party/xnnpack/src/src/configs/unary-elementwise-config.c 2024-05-23 22:40:15.474159500 +0000 >+++ b/third_party/xnnpack/src/src/configs/unary-elementwise-config.c 2024-06-08 15:09:02.000000000 +0000 >@@ -554,10 +554,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512skx) { >- f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__avx512skx_u16; >- f16_to_f32_cvt_config.element_tile = 16; >- } else if (hardware_config->use_x86_f16c) { >+ if (hardware_config->use_x86_f16c) { > f16_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f16_f32_vcvt_ukernel__f16c_u16; > f16_to_f32_cvt_config.element_tile = 16; > } else if (hardware_config->use_x86_avx) { >@@ -631,11 +628,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__avx512f_u16; >- f32_abs_config.init.f32_abs = xnn_init_f32_abs_avx512_params; >- f32_abs_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_abs_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vabs_ukernel__avx_u16; > f32_abs_config.init.f32_abs = xnn_init_f32_abs_avx_params; > f32_abs_config.element_tile = 16; >@@ -680,11 +673,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__avx512f_u16; >- f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_scalar_params; >- f32_clamp_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_clamp_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vclamp_ukernel__avx_u16; > f32_clamp_config.init.f32_minmax = xnn_init_f32_minmax_avx_params; > f32_clamp_config.element_tile = 16; >@@ -746,11 +735,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__avx512f_rr1_p6_u128; >- f32_elu_config.init.f32_elu = xnn_init_f32_elu_avx512_rr1_p6_params; >- f32_elu_config.element_tile = 128; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_elu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_u56; > f32_elu_config.init.f32_elu = xnn_init_f32_elu_avx2_rr1_lut4_p4_params; > f32_elu_config.element_tile = 56; >@@ -824,11 +809,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__avx512f_u16; >- f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_avx512_params; >- f32_hswish_config.element_tile = 16; >- } else if (hardware_config->use_x86_fma3) { >+ if (hardware_config->use_x86_fma3) { > f32_hswish_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vhswish_ukernel__fma3_u16; > f32_hswish_config.init.f32_hswish = xnn_init_f32_hswish_avx_params; > f32_hswish_config.element_tile = 16; >@@ -888,11 +869,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__avx512f_u16; >- f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_scalar_params; >- f32_lrelu_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_lrelu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vlrelu_ukernel__avx_u16; > f32_lrelu_config.init.f32_lrelu = xnn_init_f32_lrelu_avx_params; > f32_lrelu_config.element_tile = 16; >@@ -961,11 +938,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__avx512f_u16; >- f32_neg_config.init.f32_neg = xnn_init_f32_neg_avx512_params; >- f32_neg_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_neg_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vneg_ukernel__avx_u16; > f32_neg_config.init.f32_neg = xnn_init_f32_neg_avx_params; > f32_neg_config.element_tile = 16; >@@ -1029,10 +1002,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__avx512f_u16; >- f32_rndd_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rndd_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndd_ukernel__avx_u16; > f32_rndd_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; > f32_rndd_config.element_tile = 16; >@@ -1081,10 +1051,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__avx512f_u16; >- f32_rndne_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rndne_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndne_ukernel__avx_u16; > f32_rndne_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; > f32_rndne_config.element_tile = 16; >@@ -1133,10 +1100,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__avx512f_u16; >- f32_rndu_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rndu_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndu_ukernel__avx_u16; > f32_rndu_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; > f32_rndu_config.element_tile = 16; >@@ -1185,10 +1149,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__avx512f_u16; >- f32_rndz_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_rndz_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrndz_ukernel__avx_u16; > f32_rndz_config.init.f32_rnd = xnn_init_f32_rnd_avx_params; > f32_rndz_config.element_tile = 16; >@@ -1235,11 +1196,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__avx512f_rr2_lut32_p2_perm2_scalef_div_u64; >- f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_avx512_rr2_lut32_p2_params; >- f32_sigmoid_config.element_tile = 64; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_sigmoid_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsigmoid_ukernel__avx2_rr1_p5_div_u40; > f32_sigmoid_config.init.f32_sigmoid = xnn_init_f32_sigmoid_avx2_rr1_p5_params; > f32_sigmoid_config.element_tile = 40; >@@ -1298,10 +1255,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__avx512f_u16; >- f32_sqr_config.element_tile = 16; >- } else if (hardware_config->use_x86_avx) { >+ if (hardware_config->use_x86_avx) { > f32_sqr_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vsqr_ukernel__avx_u16; > f32_sqr_config.init.f32_default = xnn_init_f32_default_avx_params; > f32_sqr_config.element_tile = 16; >@@ -1368,11 +1322,7 @@ > #if XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512f) { >- f32_rsqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrsqrt_ukernel__avx512f_rsqrt_u32; >- f32_rsqrt_config.init.f32_rsqrt = xnn_init_f32_rsqrt_avx512_params; >- f32_rsqrt_config.element_tile = 32; >- } else if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_fma3) { >+ if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_fma3) { > f32_rsqrt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vrsqrt_ukernel__fma3_rsqrt_u16; > f32_rsqrt_config.init.f32_rsqrt = xnn_init_f32_rsqrt_fma3_params; > f32_rsqrt_config.element_tile = 16; >@@ -1416,11 +1366,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__avx512skx_expm1minus_rr1_lut4_p4h3ts_perm_div_u64; >- f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_avx512_expm1minus_rr1_lut4_p4h3_perm_params; >- f32_tanh_config.element_tile = 64; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_tanh_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_vtanh_ukernel__avx2_expm1minus_rr1_lut4_p4h3ts_perm_div_u32; > f32_tanh_config.init.f32_tanh = xnn_init_f32_tanh_avx_expm1minus_rr1_lut4_p4h3_perm_params; > f32_tanh_config.element_tile = 32; >@@ -1500,10 +1446,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512skx) { >- f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__avx512skx_u16; >- f32_to_f16_cvt_config.element_tile = 16; >- } else if (hardware_config->use_x86_f16c) { >+ if (hardware_config->use_x86_f16c) { > f32_to_f16_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_f16_vcvt_ukernel__f16c_u16; > f32_to_f16_cvt_config.init.f32_f16_cvt = xnn_init_f32_f16_cvt_f16c_params; > f32_to_f16_cvt_config.element_tile = 16; >@@ -1571,11 +1514,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512skx) { >- f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__avx512skx_u128; >- f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx512_params; >- f32_to_qs8_cvt_config.element_tile = 128; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_to_qs8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qs8_vcvt_ukernel__avx2_u64; > f32_to_qs8_cvt_config.init.f32_qs8_cvt = xnn_init_f32_qs8_cvt_avx2_params; > f32_to_qs8_cvt_config.element_tile = 64; >@@ -1645,11 +1584,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512skx) { >- f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__avx512skx_u128; >- f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx512_params; >- f32_to_qu8_cvt_config.element_tile = 128; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > f32_to_qu8_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_f32_qu8_vcvt_ukernel__avx2_u64; > f32_to_qu8_cvt_config.init.f32_qu8_cvt = xnn_init_f32_qu8_cvt_avx2_params; > f32_to_qu8_cvt_config.element_tile = 64; >@@ -1939,11 +1874,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512skx) { >- qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__avx512skx_u32; >- qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx512_params; >- qs8_to_f32_cvt_config.element_tile = 32; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qs8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qs8_f32_vcvt_ukernel__avx2_u16; > qs8_to_f32_cvt_config.init.qs8_f32_cvt = xnn_init_qs8_f32_cvt_avx_params; > qs8_to_f32_cvt_config.element_tile = 16; >@@ -2161,11 +2092,7 @@ > #elif XNN_ARCH_X86 || XNN_ARCH_X86_64 > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); >- if (hardware_config->use_x86_avx512skx) { >- qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__avx512skx_u32; >- qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx512_params; >- qu8_to_f32_cvt_config.element_tile = 32; >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > qu8_to_f32_cvt_config.ukernel = (xnn_vunary_ukernel_fn) xnn_qu8_f32_vcvt_ukernel__avx2_u16; > qu8_to_f32_cvt_config.init.qu8_f32_cvt = xnn_init_qu8_f32_cvt_avx_params; > qu8_to_f32_cvt_config.element_tile = 16; >diff '--color=auto' -Naur a/third_party/xnnpack/src/src/configs/x8-lut-config.c b/third_party/xnnpack/src/src/configs/x8-lut-config.c >--- a/third_party/xnnpack/src/src/configs/x8-lut-config.c 2024-05-23 22:40:15.478159700 +0000 >+++ b/third_party/xnnpack/src/src/configs/x8-lut-config.c 2024-06-08 15:09:02.000000000 +0000 >@@ -36,15 +36,7 @@ > const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); > assert(hardware_config != NULL); > >- if (!XNN_PLATFORM_MOBILE && hardware_config->use_x86_avx512skx) { >- if (hardware_config->use_x86_avx512vbmi) { >- x8_lut_config.microkernel = xnn_x8_lut_ukernel__avx512vbmi_vpermx2b_u128; >- x8_lut_config.tile_size = 128; >- } else { >- x8_lut_config.microkernel = xnn_x8_lut_ukernel__avx512skx_vpshufb_u64; >- x8_lut_config.tile_size = 64; >- } >- } else if (hardware_config->use_x86_avx2) { >+ if (hardware_config->use_x86_avx2) { > x8_lut_config.microkernel = xnn_x8_lut_ukernel__avx2_u128; > x8_lut_config.tile_size = 128; > } else if (hardware_config->use_x86_avx) {
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 933827
: 895782