15 #ifndef HIGHWAY_HWY_BASE_H_
16 #define HIGHWAY_HWY_BASE_H_
31 #define HWY_STR_IMPL(macro) #macro
32 #define HWY_STR(macro) HWY_STR_IMPL(macro)
38 #define HWY_RESTRICT __restrict
39 #define HWY_INLINE __forceinline
40 #define HWY_NOINLINE __declspec(noinline)
42 #define HWY_NORETURN __declspec(noreturn)
43 #define HWY_LIKELY(expr) (expr)
44 #define HWY_UNLIKELY(expr) (expr)
45 #define HWY_PRAGMA(tokens) __pragma(tokens)
46 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(warning(tokens))
47 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(msc)
48 #define HWY_MAYBE_UNUSED
49 #define HWY_HAS_ASSUME_ALIGNED 0
50 #if (_MSC_VER >= 1700)
51 #define HWY_MUST_USE_RESULT _Check_return_
53 #define HWY_MUST_USE_RESULT
58 #define HWY_RESTRICT __restrict__
59 #define HWY_INLINE inline __attribute__((always_inline))
60 #define HWY_NOINLINE __attribute__((noinline))
61 #define HWY_FLATTEN __attribute__((flatten))
62 #define HWY_NORETURN __attribute__((noreturn))
63 #define HWY_LIKELY(expr) __builtin_expect(!!(expr), 1)
64 #define HWY_UNLIKELY(expr) __builtin_expect(!!(expr), 0)
65 #define HWY_PRAGMA(tokens) _Pragma(#tokens)
66 #define HWY_DIAGNOSTICS(tokens) HWY_PRAGMA(GCC diagnostic tokens)
67 #define HWY_DIAGNOSTICS_OFF(msc, gcc) HWY_DIAGNOSTICS(gcc)
70 #define HWY_MAYBE_UNUSED __attribute__((unused))
71 #define HWY_MUST_USE_RESULT __attribute__((warn_unused_result))
79 #if HWY_HAS_ATTRIBUTE(__format__)
80 #define HWY_FORMAT(idx_fmt, idx_arg) \
81 __attribute__((__format__(__printf__, idx_fmt, idx_arg)))
83 #define HWY_FORMAT(idx_fmt, idx_arg)
91 #if HWY_HAS_BUILTIN(__builtin_assume_aligned)
92 #define HWY_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align))
94 #define HWY_ASSUME_ALIGNED(ptr, align) (ptr)
100 #if HWY_COMPILER_CLANG
101 #define HWY_PUSH_ATTRIBUTES(targets_str) \
102 HWY_PRAGMA(clang attribute push(__attribute__((target(targets_str))), \
103 apply_to = function))
104 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(clang attribute pop)
105 #elif HWY_COMPILER_GCC
106 #define HWY_PUSH_ATTRIBUTES(targets_str) \
107 HWY_PRAGMA(GCC push_options) HWY_PRAGMA(GCC target targets_str)
108 #define HWY_POP_ATTRIBUTES HWY_PRAGMA(GCC pop_options)
110 #define HWY_PUSH_ATTRIBUTES(targets_str)
111 #define HWY_POP_ATTRIBUTES
117 #define HWY_API static HWY_INLINE HWY_FLATTEN HWY_MAYBE_UNUSED
119 #define HWY_CONCAT_IMPL(a, b) a##b
120 #define HWY_CONCAT(a, b) HWY_CONCAT_IMPL(a, b)
122 #define HWY_MIN(a, b) ((a) < (b) ? (a) : (b))
123 #define HWY_MAX(a, b) ((a) > (b) ? (a) : (b))
129 #define HWY_FENCE std::atomic_thread_fence(std::memory_order_acq_rel)
136 #define HWY_REP4(literal) literal, literal, literal, literal
138 #define HWY_ABORT(format, ...) \
139 ::hwy::Abort(__FILE__, __LINE__, format, ##__VA_ARGS__)
142 #define HWY_ASSERT(condition) \
144 if (!(condition)) { \
145 HWY_ABORT("Assert %s", #condition); \
150 #if !defined(HWY_IS_DEBUG_BUILD)
153 #if (!defined(__OPTIMIZE__) && !defined(NDEBUG)) || \
154 defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \
155 defined(THREAD_SANITIZER) || defined(__clang_analyzer__)
156 #define HWY_IS_DEBUG_BUILD 1
158 #define HWY_IS_DEBUG_BUILD 0
162 #if HWY_IS_DEBUG_BUILD
163 #define HWY_DASSERT(condition) HWY_ASSERT(condition)
165 #define HWY_DASSERT(condition) \
170 #if defined(HWY_EMULATE_SVE)
181 #elif HWY_ARCH_RVV && defined(__riscv_vector)
193 #define HWY_ALIGN_MAX alignas(64)
194 #elif HWY_ARCH_RVV && defined(__riscv_vector)
195 #define HWY_ALIGN_MAX alignas(8)
197 #define HWY_ALIGN_MAX alignas(16)
206 #if HWY_ARCH_ARM && (__ARM_FP & 2)
207 #define HWY_NATIVE_FLOAT16 1
209 #define HWY_NATIVE_FLOAT16 0
212 #pragma pack(push, 1)
214 #if defined(HWY_EMULATE_SVE)
215 using float16_t = FarmFloat16;
216 #elif HWY_NATIVE_FLOAT16
217 using float16_t = __fp16;
239 template <
bool Condition,
class T>
246 template <
bool Condition,
class T =
void>
249 template <
typename T,
typename U>
254 template <
typename T>
259 template <
typename T,
typename U>
270 #define HWY_IF_LE128(T, N) hwy::EnableIf<N * sizeof(T) <= 16>* = nullptr
271 #define HWY_IF_LE64(T, N) hwy::EnableIf<N * sizeof(T) <= 8>* = nullptr
272 #define HWY_IF_LE32(T, N) hwy::EnableIf<N * sizeof(T) <= 4>* = nullptr
273 #define HWY_IF_GE64(T, N) hwy::EnableIf<N * sizeof(T) >= 8>* = nullptr
274 #define HWY_IF_GE128(T, N) hwy::EnableIf<N * sizeof(T) >= 16>* = nullptr
275 #define HWY_IF_GT128(T, N) hwy::EnableIf<(N * sizeof(T) > 16)>* = nullptr
277 #define HWY_IF_UNSIGNED(T) hwy::EnableIf<!IsSigned<T>()>* = nullptr
278 #define HWY_IF_SIGNED(T) \
279 hwy::EnableIf<IsSigned<T>() && !IsFloat<T>()>* = nullptr
280 #define HWY_IF_FLOAT(T) hwy::EnableIf<hwy::IsFloat<T>()>* = nullptr
281 #define HWY_IF_NOT_FLOAT(T) hwy::EnableIf<!hwy::IsFloat<T>()>* = nullptr
283 #define HWY_IF_LANE_SIZE(T, bytes) \
284 hwy::EnableIf<sizeof(T) == (bytes)>* = nullptr
285 #define HWY_IF_NOT_LANE_SIZE(T, bytes) \
286 hwy::EnableIf<sizeof(T) != (bytes)>* = nullptr
307 template <
typename T>
311 return IsSame<T, float>() || IsSame<T, double>();
314 template <
typename T>
328 template <
typename T>
330 static_assert(!IsFloat<T>(),
"Only for integer types");
331 return IsSigned<T>() ? T((1ULL << (
sizeof(T) * 8 - 1)) - 1)
332 :
static_cast<T
>(~0ull);
334 template <
typename T>
336 static_assert(!IsFloat<T>(),
"Only for integer types");
337 return IsSigned<T>() ? T(-1) - LimitsMax<T>() : T(0);
342 template <
typename T>
344 return LimitsMin<T>();
355 template <
typename T>
357 return LimitsMax<T>();
369 template <
typename T>
371 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
380 return 0x7FF0000000000000ULL;
385 template <
typename T>
387 static_assert(
sizeof(T) == 0,
"Only instantiate the specializations");
397 return 4503599627370496.0;
405 template <
typename T>
520 template <
typename T>
522 template <
typename T>
524 template <
typename T>
528 template <
typename T>
530 template <
typename T>
544 template <
typename T1,
typename T2>
546 return (a + b - 1) / b;
550 constexpr
inline size_t RoundUpTo(
size_t what,
size_t align) {
551 return DivCeil(what, align) * align;
556 #if HWY_COMPILER_MSVC
558 _BitScanForward(&index, x);
561 return static_cast<size_t>(__builtin_ctz(x));
566 #if HWY_COMPILER_MSVC
569 _BitScanForward64(&index, x);
573 uint32_t lsb =
static_cast<uint32_t
>(x & 0xFFFFFFFF);
576 uint32_t msb =
static_cast<uint32_t
>(x >> 32u);
577 _BitScanForward(&index, msb);
580 _BitScanForward(&index, lsb);
585 return static_cast<size_t>(__builtin_ctzll(x));
590 #if HWY_COMPILER_CLANG || HWY_COMPILER_GCC
591 return static_cast<size_t>(__builtin_popcountll(x));
592 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
593 return _mm_popcnt_u64(x);
594 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_32
595 return _mm_popcnt_u32(uint32_t(x)) + _mm_popcnt_u32(uint32_t(x >> 32));
597 x -= ((x >> 1) & 0x55555555U);
598 x = (((x >> 2) & 0x33333333U) + (x & 0x33333333U));
599 x = (((x >> 4) + x) & 0x0F0F0F0FU);
604 return (
unsigned int)x;
608 #if HWY_COMPILER_MSVC && HWY_ARCH_X86_64
609 #pragma intrinsic(_umul128)
614 #if defined(__SIZEOF_INT128__)
615 __uint128_t product = (__uint128_t)a * (__uint128_t)b;
616 *upper = (uint64_t)(product >> 64);
617 return (uint64_t)(product & 0xFFFFFFFFFFFFFFFFULL);
618 #elif HWY_COMPILER_MSVC && HWY_ARCH_X86_64
619 return _umul128(a, b, upper);
621 constexpr uint64_t kLo32 = 0xFFFFFFFFU;
622 const uint64_t lo_lo = (a & kLo32) * (b & kLo32);
623 const uint64_t hi_lo = (a >> 32) * (b & kLo32);
624 const uint64_t lo_hi = (a & kLo32) * (b >> 32);
625 const uint64_t hi_hi = (a >> 32) * (b >> 32);
626 const uint64_t t = (lo_lo >> 32) + (hi_lo & kLo32) + lo_hi;
627 *upper = (hi_lo >> 32) + (t >> 32) + hi_hi;
628 return (t << 32) | (lo_lo & kLo32);
633 template <
size_t kBytes,
typename From,
typename To>
635 #if HWY_COMPILER_MSVC
637 reinterpret_cast<const uint8_t*
>(from);
638 uint8_t*
HWY_RESTRICT to_bytes =
reinterpret_cast<uint8_t*
>(to);
639 for (
size_t i = 0; i < kBytes; ++i) {
640 to_bytes[i] = from_bytes[i];
644 __builtin_memcpy(to, from, kBytes);
649 uint32_t bits = bf.
bits;
652 CopyBytes<4>(&bits, &f);
658 CopyBytes<4>(&f, &bits);
660 bf.
bits =
static_cast<uint16_t
>(bits >> 16);
#define HWY_RESTRICT
Definition: base.h:58
#define HWY_NORETURN
Definition: base.h:62
#define HWY_API
Definition: base.h:117
#define HWY_MAYBE_UNUSED
Definition: base.h:70
Definition: aligned_allocator.h:23
HWY_API void CopyBytes(const From *from, To *to)
Definition: base.h:634
constexpr T MantissaEnd()
Definition: base.h:386
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero64(const uint64_t x)
Definition: base.h:565
HWY_API float F32FromBF16(bfloat16_t bf)
Definition: base.h:648
HWY_API uint64_t Mul128(uint64_t a, uint64_t b, uint64_t *HWY_RESTRICT upper)
Definition: base.h:613
HWY_API bfloat16_t BF16FromF32(float f)
Definition: base.h:656
typename detail::TypeFromSize< N >::Float FloatFromSize
Definition: base.h:539
typename RemoveConstT< T >::type RemoveConst
Definition: base.h:302
typename detail::TypeFromSize< N >::Unsigned UnsignedFromSize
Definition: base.h:535
constexpr float HighestValue< float >()
Definition: base.h:360
constexpr T ExponentMask()
Definition: base.h:370
typename detail::TypeFromSize< N >::Signed SignedFromSize
Definition: base.h:537
constexpr T1 DivCeil(T1 a, T2 b)
Definition: base.h:545
constexpr float MantissaEnd< float >()
Definition: base.h:391
double float64_t
Definition: base.h:234
constexpr bool IsSigned< bfloat16_t >()
Definition: base.h:323
constexpr bool IsSigned()
Definition: base.h:315
constexpr bool IsSigned< float16_t >()
Definition: base.h:319
constexpr double HighestValue< double >()
Definition: base.h:364
HWY_NORETURN void int const char * format
Definition: base.h:665
float float32_t
Definition: base.h:233
HWY_API size_t PopCount(uint64_t x)
Definition: base.h:589
constexpr double MantissaEnd< double >()
Definition: base.h:395
constexpr uint64_t ExponentMask< uint64_t >()
Definition: base.h:379
HWY_API size_t Num0BitsBelowLS1Bit_Nonzero32(const uint32_t x)
Definition: base.h:555
constexpr T LimitsMin()
Definition: base.h:335
constexpr float LowestValue< float >()
Definition: base.h:347
constexpr HWY_API bool IsSame()
Definition: base.h:260
constexpr double LowestValue< double >()
Definition: base.h:351
constexpr uint32_t ExponentMask< uint32_t >()
Definition: base.h:375
typename EnableIfT< Condition, T >::type EnableIf
Definition: base.h:247
static constexpr HWY_MAYBE_UNUSED size_t kMaxVectorSize
Definition: base.h:185
constexpr T LowestValue()
Definition: base.h:343
HWY_NORETURN void int line
Definition: base.h:665
typename detail::Relations< T >::Unsigned MakeUnsigned
Definition: base.h:521
HWY_NORETURN void HWY_FORMAT(3, 4) Abort(const char *file
typename detail::Relations< T >::Wide MakeWide
Definition: base.h:529
typename detail::Relations< T >::Float MakeFloat
Definition: base.h:525
typename detail::Relations< T >::Signed MakeSigned
Definition: base.h:523
constexpr bool IsFloat()
Definition: base.h:308
constexpr T LimitsMax()
Definition: base.h:329
constexpr T HighestValue()
Definition: base.h:356
constexpr size_t RoundUpTo(size_t what, size_t align)
Definition: base.h:550
typename detail::Relations< T >::Narrow MakeNarrow
Definition: base.h:531
T type
Definition: base.h:243
@ value
Definition: base.h:251
T type
Definition: base.h:298
T type
Definition: base.h:294
uint16_t bits
Definition: base.h:228
int16_t Signed
Definition: base.h:473
float Wide
Definition: base.h:474
uint16_t Unsigned
Definition: base.h:472
double Float
Definition: base.h:488
uint64_t Unsigned
Definition: base.h:486
int64_t Signed
Definition: base.h:487
float Narrow
Definition: base.h:489
int16_t Signed
Definition: base.h:466
float Wide
Definition: base.h:468
uint16_t Unsigned
Definition: base.h:465
uint32_t Unsigned
Definition: base.h:478
double Wide
Definition: base.h:481
float Float
Definition: base.h:480
int32_t Signed
Definition: base.h:479
uint16_t Unsigned
Definition: base.h:428
int16_t Signed
Definition: base.h:429
int32_t Wide
Definition: base.h:430
int8_t Narrow
Definition: base.h:431
uint32_t Unsigned
Definition: base.h:443
int64_t Wide
Definition: base.h:446
float Float
Definition: base.h:445
int16_t Narrow
Definition: base.h:447
int32_t Signed
Definition: base.h:444
int32_t Narrow
Definition: base.h:461
double Float
Definition: base.h:460
uint64_t Unsigned
Definition: base.h:458
int64_t Signed
Definition: base.h:459
int16_t Wide
Definition: base.h:417
int8_t Signed
Definition: base.h:416
uint8_t Unsigned
Definition: base.h:415
uint8_t Narrow
Definition: base.h:424
int16_t Signed
Definition: base.h:422
uint32_t Wide
Definition: base.h:423
uint16_t Unsigned
Definition: base.h:421
uint32_t Unsigned
Definition: base.h:435
uint64_t Wide
Definition: base.h:438
uint16_t Narrow
Definition: base.h:439
float Float
Definition: base.h:437
int32_t Signed
Definition: base.h:436
uint32_t Narrow
Definition: base.h:454
int64_t Signed
Definition: base.h:452
uint64_t Unsigned
Definition: base.h:451
double Float
Definition: base.h:453
int8_t Signed
Definition: base.h:410
uint8_t Unsigned
Definition: base.h:409
uint16_t Wide
Definition: base.h:411
int8_t Signed
Definition: base.h:497
uint8_t Unsigned
Definition: base.h:496
int16_t Signed
Definition: base.h:502
uint16_t Unsigned
Definition: base.h:501
int32_t Signed
Definition: base.h:507
uint32_t Unsigned
Definition: base.h:506
float Float
Definition: base.h:508
double Float
Definition: base.h:514
int64_t Signed
Definition: base.h:513
uint64_t Unsigned
Definition: base.h:512
uint16_t bits
Definition: base.h:223