19#ifndef HWY_HIGHWAY_INCLUDED
20#define HWY_HIGHWAY_INCLUDED
41#define HWY_FULL1(T) hwy::HWY_NAMESPACE::ScalableTag<T>
42#define HWY_FULL2(T, LMUL) \
43 hwy::HWY_NAMESPACE::ScalableTag<T, hwy::CeilLog2(HWY_MAX(0, LMUL))>
44#define HWY_3TH_ARG(arg1, arg2, arg3, ...) arg3
46#define HWY_FULL_RECOMPOSER(args_with_paren) HWY_3TH_ARG args_with_paren
48#define HWY_CHOOSE_FULL(...) \
49 HWY_FULL_RECOMPOSER((__VA_ARGS__, HWY_FULL2, HWY_FULL1, ))
50#define HWY_FULL(...) HWY_CHOOSE_FULL(__VA_ARGS__())(__VA_ARGS__)
53#define HWY_CAPPED(T, MAX_N) hwy::HWY_NAMESPACE::CappedTag<T, MAX_N>
69#if HWY_STATIC_TARGET == HWY_SCALAR
70#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SCALAR::FUNC_NAME
71#elif HWY_STATIC_TARGET == HWY_EMU128
72#define HWY_STATIC_DISPATCH(FUNC_NAME) N_EMU128::FUNC_NAME
73#elif HWY_STATIC_TARGET == HWY_RVV
74#define HWY_STATIC_DISPATCH(FUNC_NAME) N_RVV::FUNC_NAME
75#elif HWY_STATIC_TARGET == HWY_WASM_EMU256
76#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM_EMU256::FUNC_NAME
77#elif HWY_STATIC_TARGET == HWY_WASM
78#define HWY_STATIC_DISPATCH(FUNC_NAME) N_WASM::FUNC_NAME
79#elif HWY_STATIC_TARGET == HWY_NEON
80#define HWY_STATIC_DISPATCH(FUNC_NAME) N_NEON::FUNC_NAME
81#elif HWY_STATIC_TARGET == HWY_SVE
82#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE::FUNC_NAME
83#elif HWY_STATIC_TARGET == HWY_SVE2
84#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2::FUNC_NAME
85#elif HWY_STATIC_TARGET == HWY_SVE_256
86#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE_256::FUNC_NAME
87#elif HWY_STATIC_TARGET == HWY_SVE2_128
88#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SVE2_128::FUNC_NAME
89#elif HWY_STATIC_TARGET == HWY_PPC8
90#define HWY_STATIC_DISPATCH(FUNC_NAME) N_PPC8::FUNC_NAME
91#elif HWY_STATIC_TARGET == HWY_SSSE3
92#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSSE3::FUNC_NAME
93#elif HWY_STATIC_TARGET == HWY_SSE4
94#define HWY_STATIC_DISPATCH(FUNC_NAME) N_SSE4::FUNC_NAME
95#elif HWY_STATIC_TARGET == HWY_AVX2
96#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX2::FUNC_NAME
97#elif HWY_STATIC_TARGET == HWY_AVX3
98#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3::FUNC_NAME
99#elif HWY_STATIC_TARGET == HWY_AVX3_DL
100#define HWY_STATIC_DISPATCH(FUNC_NAME) N_AVX3_DL::FUNC_NAME
105#if HWY_TARGETS & HWY_EMU128
106#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_EMU128::FUNC_NAME
107#elif HWY_TARGETS & HWY_SCALAR
108#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &N_SCALAR::FUNC_NAME
112#define HWY_CHOOSE_FALLBACK(FUNC_NAME) &HWY_STATIC_DISPATCH(FUNC_NAME)
115#if HWY_TARGETS & HWY_WASM_EMU256
116#define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) &N_WASM_EMU256::FUNC_NAME
118#define HWY_CHOOSE_WASM_EMU256(FUNC_NAME) nullptr
121#if HWY_TARGETS & HWY_WASM
122#define HWY_CHOOSE_WASM(FUNC_NAME) &N_WASM::FUNC_NAME
124#define HWY_CHOOSE_WASM(FUNC_NAME) nullptr
127#if HWY_TARGETS & HWY_RVV
128#define HWY_CHOOSE_RVV(FUNC_NAME) &N_RVV::FUNC_NAME
130#define HWY_CHOOSE_RVV(FUNC_NAME) nullptr
133#if HWY_TARGETS & HWY_NEON
134#define HWY_CHOOSE_NEON(FUNC_NAME) &N_NEON::FUNC_NAME
136#define HWY_CHOOSE_NEON(FUNC_NAME) nullptr
139#if HWY_TARGETS & HWY_SVE
140#define HWY_CHOOSE_SVE(FUNC_NAME) &N_SVE::FUNC_NAME
142#define HWY_CHOOSE_SVE(FUNC_NAME) nullptr
145#if HWY_TARGETS & HWY_SVE2
146#define HWY_CHOOSE_SVE2(FUNC_NAME) &N_SVE2::FUNC_NAME
148#define HWY_CHOOSE_SVE2(FUNC_NAME) nullptr
151#if HWY_TARGETS & HWY_SVE_256
152#define HWY_CHOOSE_SVE_256(FUNC_NAME) &N_SVE_256::FUNC_NAME
154#define HWY_CHOOSE_SVE_256(FUNC_NAME) nullptr
157#if HWY_TARGETS & HWY_SVE2_128
158#define HWY_CHOOSE_SVE2_128(FUNC_NAME) &N_SVE2_128::FUNC_NAME
160#define HWY_CHOOSE_SVE2_128(FUNC_NAME) nullptr
163#if HWY_TARGETS & HWY_PPC8
164#define HWY_CHOOSE_PCC8(FUNC_NAME) &N_PPC8::FUNC_NAME
166#define HWY_CHOOSE_PPC8(FUNC_NAME) nullptr
169#if HWY_TARGETS & HWY_SSSE3
170#define HWY_CHOOSE_SSSE3(FUNC_NAME) &N_SSSE3::FUNC_NAME
172#define HWY_CHOOSE_SSSE3(FUNC_NAME) nullptr
175#if HWY_TARGETS & HWY_SSE4
176#define HWY_CHOOSE_SSE4(FUNC_NAME) &N_SSE4::FUNC_NAME
178#define HWY_CHOOSE_SSE4(FUNC_NAME) nullptr
181#if HWY_TARGETS & HWY_AVX2
182#define HWY_CHOOSE_AVX2(FUNC_NAME) &N_AVX2::FUNC_NAME
184#define HWY_CHOOSE_AVX2(FUNC_NAME) nullptr
187#if HWY_TARGETS & HWY_AVX3
188#define HWY_CHOOSE_AVX3(FUNC_NAME) &N_AVX3::FUNC_NAME
190#define HWY_CHOOSE_AVX3(FUNC_NAME) nullptr
193#if HWY_TARGETS & HWY_AVX3_DL
194#define HWY_CHOOSE_AVX3_DL(FUNC_NAME) &N_AVX3_DL::FUNC_NAME
196#define HWY_CHOOSE_AVX3_DL(FUNC_NAME) nullptr
203#if HWY_COMPILER_MSVC && HWY_COMPILER_MSVC < 1915
204#define HWY_DISPATCH_WORKAROUND 1
206#define HWY_DISPATCH_WORKAROUND 0
212template <
typename RetType,
typename... Args>
217#if HWY_DISPATCH_WORKAROUND
218 template <FunctionType* const func>
222 return (*func)(args...);
231 template <FunctionType* const table[]>
235 return (table[chosen_target.
GetIndex()])(args...);
241template <
typename RetType,
typename... Args>
246#define HWY_DISPATCH_TABLE(FUNC_NAME) \
247 HWY_CONCAT(FUNC_NAME, HighwayDispatchTable)
278#if HWY_IDE || ((HWY_TARGETS & (HWY_TARGETS - 1)) == 0)
284#define HWY_EXPORT(FUNC_NAME) \
285 HWY_MAYBE_UNUSED static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const \
286 HWY_DISPATCH_TABLE(FUNC_NAME)[1] = {&HWY_STATIC_DISPATCH(FUNC_NAME)}
287#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) HWY_STATIC_DISPATCH(FUNC_NAME)
292#if HWY_DISPATCH_WORKAROUND
294#define HWY_EXPORT(FUNC_NAME) \
295 static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
296 FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
299 &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH( \
300 FUNC_NAME)))::ChooseAndCall<&HWY_STATIC_DISPATCH(FUNC_NAME)>, \
301 HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
302 HWY_CHOOSE_FALLBACK(FUNC_NAME), \
309#define HWY_EXPORT(FUNC_NAME) \
310 static decltype(&HWY_STATIC_DISPATCH(FUNC_NAME)) const HWY_DISPATCH_TABLE( \
311 FUNC_NAME)[HWY_MAX_DYNAMIC_TARGETS + 2] = { \
314 &decltype(hwy::DeduceFunctionCache(&HWY_STATIC_DISPATCH( \
315 FUNC_NAME)))::ChooseAndCall<HWY_DISPATCH_TABLE(FUNC_NAME)>, \
316 HWY_CHOOSE_TARGET_LIST(FUNC_NAME), \
317 HWY_CHOOSE_FALLBACK(FUNC_NAME), \
322#define HWY_DYNAMIC_DISPATCH(FUNC_NAME) \
323 (*(HWY_DISPATCH_TABLE(FUNC_NAME)[hwy::GetChosenTarget().GetIndex()]))
328#define HWY_CAP_INTEGER64 HWY_HAVE_INTEGER64
329#define HWY_CAP_FLOAT16 HWY_HAVE_FLOAT16
330#define HWY_CAP_FLOAT64 HWY_HAVE_FLOAT64
341#if defined(HWY_HIGHWAY_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
342#ifdef HWY_HIGHWAY_PER_TARGET
343#undef HWY_HIGHWAY_PER_TARGET
345#define HWY_HIGHWAY_PER_TARGET
349#if HWY_TARGET == HWY_SSSE3 || HWY_TARGET == HWY_SSE4
351#elif HWY_TARGET == HWY_AVX2
353#elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
355#elif HWY_TARGET == HWY_PPC8
356#error "PPC is not yet supported"
357#elif HWY_TARGET == HWY_NEON
359#elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2 || \
360 HWY_TARGET == HWY_SVE_256 || HWY_TARGET == HWY_SVE2_128
362#elif HWY_TARGET == HWY_WASM_EMU256
364#elif HWY_TARGET == HWY_WASM
366#elif HWY_TARGET == HWY_RVV
368#elif HWY_TARGET == HWY_EMU128
370#elif HWY_TARGET == HWY_SCALAR
373#pragma message("HWY_TARGET does not match any known target")
Definition aligned_allocator.h:27
FunctionCache< RetType, Args... > DeduceFunctionCache(RetType(*)(Args...))
Definition highway.h:242
HWY_DLLEXPORT ChosenTarget & GetChosenTarget()
HWY_DLLEXPORT int64_t SupportedTargets()
size_t HWY_INLINE GetIndex() const
Definition targets.h:301
void Update(int64_t targets)
Definition targets.h:282
RetType() FunctionType(Args...)
Definition highway.h:215
static RetType ChooseAndCall(Args... args)
Definition highway.h:232