Grok  9.5.0
test_util-inl.h
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Target-specific helper functions for use by *_test.cc.
16 
17 #include "hwy/tests/hwy_gtest.h"
18 #include "hwy/tests/test_util.h"
19 
20 // Per-target include guard
21 #if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == \
22  defined(HWY_TARGET_TOGGLE)
23 #ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
24 #undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
25 #else
26 #define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
27 #endif
28 
30 namespace hwy {
31 namespace HWY_NAMESPACE {
32 
33 template <typename T, HWY_IF_LANE_SIZE(T, 1)>
34 HWY_NOINLINE void PrintValue(T value) {
35  uint8_t byte;
36  CopyBytes<1>(&value, &byte); // endian-safe: we ensured sizeof(T)=1.
37  fprintf(stderr, "0x%02X,", byte);
38 }
39 
40 #if HWY_CAP_FLOAT16
41 HWY_NOINLINE void PrintValue(float16_t value) {
42  uint16_t bits;
43  CopyBytes<2>(&value, &bits);
44  fprintf(stderr, "0x%02X,", bits);
45 }
46 #endif
47 
48 
49 
50 template <typename T, HWY_IF_NOT_LANE_SIZE(T, 1)>
51 HWY_NOINLINE void PrintValue(T value) {
52  fprintf(stderr, "%g,", double(value));
53 }
54 
55 // Prints lanes around `lane`, in memory order.
56 template <class D, class V = Vec<D>>
57 void Print(const D d, const char* caption, VecArg<V> v, size_t lane_u = 0,
58  size_t max_lanes = 7) {
59  using T = TFromD<D>;
60  const size_t N = Lanes(d);
61  auto lanes = AllocateAligned<T>(N);
62  Store(v, d, lanes.get());
63 
64  const auto info = hwy::detail::MakeTypeInfo<T>();
65  hwy::detail::PrintArray(info, caption, lanes.get(), N, lane_u, max_lanes);
66 }
67 
68 // Compare expected vector to vector.
69 template <class D, typename T = TFromD<D>, class V = Vec<D>>
70 void AssertVecEqual(D d, const T* expected, VecArg<V> actual,
71  const char* filename, const int line) {
72  const size_t N = Lanes(d);
73  auto actual_lanes = AllocateAligned<T>(N);
74  Store(actual, d, actual_lanes.get());
75 
76  const auto info = hwy::detail::MakeTypeInfo<T>();
77  const char* target_name = hwy::TargetName(HWY_TARGET);
78  hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N,
79  target_name, filename, line);
80 }
81 
82 // Compare expected lanes to vector.
83 template <class D, typename T = TFromD<D>, class V = Vec<D>>
84 HWY_NOINLINE void AssertVecEqual(D d, VecArg<V> expected, VecArg<V> actual,
85  const char* filename, int line) {
86  auto expected_lanes = AllocateAligned<T>(Lanes(d));
87  Store(expected, d, expected_lanes.get());
88  AssertVecEqual(d, expected_lanes.get(), actual, filename, line);
89 }
90 
91 // Only checks the valid mask elements (those whose index < Lanes(d)).
92 template <class D>
94  const char* filename, int line) {
95  AssertVecEqual(d, VecFromMask(d, a), VecFromMask(d, b), filename, line);
96 
97  const char* target_name = hwy::TargetName(HWY_TARGET);
98  AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line);
99  AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line);
100  AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line);
101 
102  // TODO(janwas): remove RVV once implemented (cast or vse1)
103 #if HWY_TARGET != HWY_RVV && HWY_TARGET != HWY_SCALAR
104  const size_t N = Lanes(d);
105  const Repartition<uint8_t, D> d8;
106  const size_t N8 = Lanes(d8);
107  auto bits_a = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
108  auto bits_b = AllocateAligned<uint8_t>(HWY_MAX(8, N8));
109  memset(bits_a.get(), 0, N8);
110  memset(bits_b.get(), 0, N8);
111  const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get());
112  const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get());
113  AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line);
114  size_t i = 0;
115  // First check whole bytes (if that many elements are still valid)
116  for (; i < N / 8; ++i) {
117  if (bits_a[i] != bits_b[i]) {
118  fprintf(stderr, "Mismatch in byte %zu: %d != %d\n", i, bits_a[i],
119  bits_b[i]);
120  Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
121  Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
122  hwy::Abort(filename, line, "Masks not equal");
123  }
124  }
125  // Then the valid bit(s) in the last byte.
126  const size_t remainder = N % 8;
127  if (remainder != 0) {
128  const int mask = (1 << remainder) - 1;
129  const int valid_a = bits_a[i] & mask;
130  const int valid_b = bits_b[i] & mask;
131  if (valid_a != valid_b) {
132  fprintf(stderr, "Mismatch in last byte %zu: %d != %d\n", i, valid_a,
133  valid_b);
134  Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
135  Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
136  hwy::Abort(filename, line, "Masks not equal");
137  }
138  }
139 #endif
140 }
141 
142 // Only sets valid elements (those whose index < Lanes(d)). This helps catch
143 // tests that are not masking off the (undefined) upper mask elements.
144 //
145 // TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks.
146 template <class D>
148  return FirstN(d, Lanes(d));
149 }
150 
151 template <class D>
153  const auto zero = Zero(RebindToSigned<D>());
154  return RebindMask(d, Lt(zero, zero));
155 }
156 
157 #ifndef HWY_ASSERT_EQ
158 
159 #define HWY_ASSERT_EQ(expected, actual) \
160  hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \
161  __LINE__)
162 
163 #define HWY_ASSERT_STRING_EQ(expected, actual) \
164  hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \
165  __FILE__, __LINE__)
166 
167 #define HWY_ASSERT_VEC_EQ(d, expected, actual) \
168  AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
169 
170 #define HWY_ASSERT_MASK_EQ(d, expected, actual) \
171  AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
172 
173 #endif // HWY_ASSERT_EQ
174 
175 // Helpers for instantiating tests with combinations of lane types / counts.
176 
177 // For ensuring we do not call tests with D such that widening D results in 0
178 // lanes. Example: assume T=u32, VLEN=256, and fraction=1/8: there is no 1/8th
179 // of a u64 vector in this case.
180 template <class D, HWY_IF_NOT_LANE_SIZE_D(D, 8)>
181 HWY_INLINE size_t PromotedLanes(const D d) {
182  return Lanes(RepartitionToWide<decltype(d)>());
183 }
184 // Already the widest possible T, cannot widen.
185 template <class D, HWY_IF_LANE_SIZE_D(D, 8)>
186 HWY_INLINE size_t PromotedLanes(const D d) {
187  return Lanes(d);
188 }
189 
190 // For all power of two N in [kMinLanes, kMul * kMinLanes] (so that recursion
191 // stops at kMul == 0). Note that N may be capped or a fraction.
192 template <typename T, size_t kMul, size_t kMinLanes, class Test,
193  bool kPromote = false>
194 struct ForeachSizeR {
195  static void Do() {
197 
198  // Skip invalid fractions (e.g. 1/8th of u32x4).
199  const size_t lanes = kPromote ? PromotedLanes(d) : Lanes(d);
200  if (lanes < kMinLanes) return;
201 
202  Test()(T(), d);
203 
204  static_assert(kMul != 0, "Recursion should have ended already");
205  ForeachSizeR<T, kMul / 2, kMinLanes, Test, kPromote>::Do();
206  }
207 };
208 
209 // Base case to stop the recursion.
210 template <typename T, size_t kMinLanes, class Test, bool kPromote>
211 struct ForeachSizeR<T, 0, kMinLanes, Test, kPromote> {
212  static void Do() {}
213 };
214 
215 // These adapters may be called directly, or via For*Types:
216 
217 // Calls Test for all power of two N in [1, Lanes(d) / kFactor]. This is for
218 // ops that widen their input, e.g. Combine (not supported by HWY_SCALAR).
219 template <class Test, size_t kFactor = 2>
221  template <typename T>
222  void operator()(T /*unused*/) const {
223 #if HWY_TARGET == HWY_SCALAR
224  // not supported
225 #else
226  constexpr bool kPromote = true;
227 #if HWY_TARGET == HWY_RVV
228  ForeachSizeR<T, 8 / kFactor, HWY_LANES(T), Test, kPromote>::Do();
229  // TODO(janwas): also capped
230  // ForeachSizeR<T, (16 / sizeof(T)) / kFactor, 1, Test, kPromote>::Do();
231 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2
232  // Capped
233  ForeachSizeR<T, (16 / sizeof(T)) / kFactor, 1, Test, kPromote>::Do();
234  // Fractions
235  ForeachSizeR<T, 8 / kFactor, HWY_LANES(T) / 8, Test, kPromote>::Do();
236 #else
237  ForeachSizeR<T, HWY_LANES(T) / kFactor, 1, Test, kPromote>::Do();
238 #endif
239 #endif // HWY_SCALAR
240  }
241 };
242 
243 // Calls Test for all power of two N in [kFactor, Lanes(d)]. This is for ops
244 // that narrow their input, e.g. UpperHalf.
245 template <class Test, size_t kFactor = 2>
247  template <typename T>
248  void operator()(T /*unused*/) const {
249 #if HWY_TARGET == HWY_SCALAR
250  // not supported
251 #elif HWY_TARGET == HWY_RVV
252  ForeachSizeR<T, 8 / kFactor, kFactor * HWY_LANES(T), Test>::Do();
253  // TODO(janwas): also capped
254 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2
255  // Capped
256  ForeachSizeR<T, (16 / sizeof(T)) / kFactor, kFactor, Test>::Do();
257  // Fractions
258  ForeachSizeR<T, 8 / kFactor, kFactor * HWY_LANES(T) / 8, Test>::Do();
259 #elif HWY_TARGET == HWY_SCALAR
260  // not supported
261 #else
262  ForeachSizeR<T, HWY_LANES(T) / kFactor, kFactor, Test>::Do();
263 #endif
264  }
265 };
266 
267 // Calls Test for all power of two N in [16 / sizeof(T), Lanes(d)]. This is for
268 // ops that require at least 128 bits, e.g. AES or 64x64 = 128 mul.
269 template <class Test>
271  template <typename T>
272  void operator()(T /*unused*/) const {
273 #if HWY_TARGET == HWY_SCALAR
274  // not supported
275 #elif HWY_TARGET == HWY_RVV
276  ForeachSizeR<T, 8, HWY_LANES(T), Test>::Do();
277  // TODO(janwas): also capped
278  // ForeachSizeR<T, 1, (16 / sizeof(T)), Test>::Do();
279 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2
280  // Capped
281  ForeachSizeR<T, 1, 16 / sizeof(T), Test>::Do();
282  // Fractions
283  ForeachSizeR<T, 8, HWY_LANES(T) / 8, Test>::Do();
284 #else
285  ForeachSizeR<T, HWY_LANES(T) / (16 / sizeof(T)), (16 / sizeof(T)),
286  Test>::Do();
287 #endif
288  }
289 };
290 
291 // Calls Test for all power of two N in [8 / sizeof(T), Lanes(d)]. This is for
292 // ops that require at least 64 bits, e.g. casts.
293 template <class Test>
295  template <typename T>
296  void operator()(T /*unused*/) const {
297 #if HWY_TARGET == HWY_SCALAR
298  // not supported
299 #elif HWY_TARGET == HWY_RVV
300  ForeachSizeR<T, 8, HWY_LANES(T), Test>::Do();
301  // TODO(janwas): also capped
302  // ForeachSizeR<T, 1, (8 / sizeof(T)), Test>::Do();
303 #elif HWY_TARGET == HWY_SVE || HWY_TARGET == HWY_SVE2
304  // Capped
305  ForeachSizeR<T, 1, 8 / sizeof(T), Test>::Do();
306  // Fractions
307  ForeachSizeR<T, 8, HWY_LANES(T) / 8, Test>::Do();
308 #else
309  ForeachSizeR<T, HWY_LANES(T) / (8 / sizeof(T)), (8 / sizeof(T)),
310  Test>::Do();
311 #endif
312  }
313 };
314 
315 // Calls Test for all N that can be promoted (not the same as Extendable because
316 // HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper.
317 template <class Test, size_t kFactor = 2>
319  template <typename T>
320  void operator()(T /*unused*/) const {
321 #if HWY_TARGET == HWY_SCALAR
322  ForeachSizeR<T, 1, 1, Test, /*kPromote=*/true>::Do();
323 #else
325 #endif
326  }
327 };
328 
329 // Calls Test for all N than can be demoted (not the same as Shrinkable because
330 // HWY_SCALAR has one lane). Also used for LowerHalf, but not UpperHalf.
331 template <class Test, size_t kFactor = 2>
333  template <typename T>
334  void operator()(T /*unused*/) const {
335 #if HWY_TARGET == HWY_SCALAR
337 #else
339 #endif
340  }
341 };
342 
343 // Calls Test for all power of two N in [1, Lanes(d)]. This is the default
344 // for ops that do not narrow nor widen their input, nor require 128 bits.
345 template <class Test>
347  template <typename T>
348  void operator()(T t) const {
350  }
351 };
352 
353 // Type lists to shorten call sites:
354 
355 template <class Func>
356 void ForSignedTypes(const Func& func) {
357  func(int8_t());
358  func(int16_t());
359  func(int32_t());
360 #if HWY_CAP_INTEGER64
361  func(int64_t());
362 #endif
363 }
364 
365 template <class Func>
366 void ForUnsignedTypes(const Func& func) {
367  func(uint8_t());
368  func(uint16_t());
369  func(uint32_t());
370 #if HWY_CAP_INTEGER64
371  func(uint64_t());
372 #endif
373 }
374 
375 template <class Func>
376 void ForIntegerTypes(const Func& func) {
377  ForSignedTypes(func);
378  ForUnsignedTypes(func);
379 }
380 
381 template <class Func>
382 void ForFloatTypes(const Func& func) {
383  func(float());
384 #if HWY_CAP_FLOAT64
385  func(double());
386 #endif
387 }
388 
389 template <class Func>
390 void ForAllTypes(const Func& func) {
391  ForIntegerTypes(func);
392  ForFloatTypes(func);
393 }
394 
395 template <class Func>
396 void ForUIF3264(const Func& func) {
397  func(uint32_t());
398  func(int32_t());
399 #if HWY_CAP_INTEGER64
400  func(uint64_t());
401  func(int64_t());
402 #endif
403 
404  ForFloatTypes(func);
405 }
406 
407 // For tests that involve loops, adjust the trip count so that emulated tests
408 // finish quickly (but always at least 2 iterations to ensure some diversity).
409 constexpr size_t AdjustedReps(size_t max_reps) {
410 #if HWY_ARCH_RVV
411  return HWY_MAX(max_reps / 16, 2);
412 #elif HWY_ARCH_ARM
413  return HWY_MAX(max_reps / 4, 2);
414 #elif HWY_IS_DEBUG_BUILD
415  return HWY_MAX(max_reps / 8, 2);
416 #else
417  return HWY_MAX(max_reps, 2);
418 #endif
419 }
420 
421 // NOLINTNEXTLINE(google-readability-namespace-comments)
422 } // namespace HWY_NAMESPACE
423 } // namespace hwy
425 
426 #endif // per-target include guard
#define HWY_MAX(a, b)
Definition: base.h:123
#define HWY_NOINLINE
Definition: base.h:60
#define HWY_INLINE
Definition: base.h:59
#define HWY_TARGET
Definition: detect_targets.h:330
V VecArg
Definition: shared-inl.h:226
constexpr size_t AdjustedReps(size_t max_reps)
Definition: test_util-inl.h:409
HWY_API auto Lt(V a, V b) -> decltype(a==b)
Definition: arm_neon-inl.h:5035
HWY_API size_t StoreMaskBits(Simd< T, N >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:4528
HWY_API Mask128< T, N > FirstN(const Simd< T, N > d, size_t num)
Definition: arm_neon-inl.h:1806
HWY_API Vec128< T, N > Load(Simd< T, N > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2152
Repartition< MakeWide< TFromD< D > >, D > RepartitionToWide
Definition: shared-inl.h:158
HWY_API Vec128< T, N > VecFromMask(const Mask128< T, N > v)
Definition: arm_neon-inl.h:1607
void ForUIF3264(const Func &func)
Definition: test_util-inl.h:396
constexpr HWY_API size_t Lanes(Simd< T, N >)
Definition: arm_sve-inl.h:226
HWY_NOINLINE void PrintValue(T value)
Definition: test_util-inl.h:34
void ForAllTypes(const Func &func)
Definition: test_util-inl.h:390
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: shared-inl.h:147
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:1619
void ForFloatTypes(const Func &func)
Definition: test_util-inl.h:382
void Print(const D d, const char *caption, VecArg< V > v, size_t lane_u=0, size_t max_lanes=7)
Definition: test_util-inl.h:57
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:4509
void ForIntegerTypes(const Func &func)
Definition: test_util-inl.h:376
HWY_API bool AllFalse(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:4538
HWY_API bool AllTrue(const Simd< T, N > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:4557
void AssertVecEqual(D d, const T *expected, VecArg< V > actual, const char *filename, const int line)
Definition: test_util-inl.h:70
void ForSignedTypes(const Func &func)
Definition: test_util-inl.h:356
HWY_NOINLINE void AssertMaskEqual(D d, VecArg< Mask< D >> a, VecArg< Mask< D >> b, const char *filename, int line)
Definition: test_util-inl.h:93
HWY_INLINE size_t PromotedLanes(const D d)
Definition: test_util-inl.h:181
typename D::template Repartition< T > Repartition
Definition: shared-inl.h:155
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:37
HWY_API Vec128< T, N > Zero(Simd< T, N > d)
Definition: arm_neon-inl.h:710
HWY_INLINE Mask< D > MaskFalse(const D d)
Definition: test_util-inl.h:152
HWY_INLINE Mask< D > MaskTrue(const D d)
Definition: test_util-inl.h:147
void ForUnsignedTypes(const Func &func)
Definition: test_util-inl.h:366
HWY_API void Store(Vec128< T, N > v, Simd< T, N > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2343
typename D::T TFromD
Definition: shared-inl.h:140
void AssertArrayEqual(const TypeInfo &info, const void *expected_void, const void *actual_void, size_t N, const char *target_name, const char *filename, int line)
void PrintArray(const TypeInfo &info, const char *caption, const void *array_void, size_t N, size_t lane_u=0, size_t max_lanes=7)
Definition: aligned_allocator.h:23
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:102
static HWY_MAYBE_UNUSED const char * TargetName(uint32_t target)
Definition: targets.h:74
HWY_INLINE void AssertEqual(const T expected, const T actual, const char *target_name, const char *filename, int line, size_t lane=0)
Definition: test_util.h:173
HWY_NORETURN void int line
Definition: base.h:665
message_info info
Definition: ojph_message.cpp:50
#define HWY_LANES(T)
Definition: set_macros-inl.h:80
#define HWY_NAMESPACE
Definition: set_macros-inl.h:77
Definition: test_util-inl.h:332
void operator()(T) const
Definition: test_util-inl.h:334
Definition: test_util-inl.h:220
void operator()(T) const
Definition: test_util-inl.h:222
Definition: test_util-inl.h:270
void operator()(T) const
Definition: test_util-inl.h:272
Definition: test_util-inl.h:294
void operator()(T) const
Definition: test_util-inl.h:296
Definition: test_util-inl.h:346
void operator()(T t) const
Definition: test_util-inl.h:348
Definition: test_util-inl.h:318
void operator()(T) const
Definition: test_util-inl.h:320
Definition: test_util-inl.h:246
void operator()(T) const
Definition: test_util-inl.h:248
Definition: test_util-inl.h:194
static void Do()
Definition: test_util-inl.h:195
Definition: shared-inl.h:35
Definition: base.h:222
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()