Grok  9.5.0
skeleton-inl.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Demo of functions that might be called from multiple SIMD modules (either
16 // other -inl.h files, or a .cc file between begin/end_target-inl). This is
17 // optional - all SIMD code can reside in .cc files. However, this allows
18 // splitting code into different files while still inlining instead of requiring
19 // calling through function pointers.
20 
21 // Include guard (still compiled once per target)
22 #if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE)
23 #ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
24 #undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
25 #else
26 #define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_
27 #endif
28 
29 // It is fine to #include normal or *-inl headers.
30 #include <stddef.h>
31 
32 #include "hwy/highway.h"
33 
35 namespace skeleton {
36 namespace HWY_NAMESPACE {
37 
38 using namespace hwy::HWY_NAMESPACE;
39 
40 // Example of a type-agnostic (caller-specified lane type) and width-agnostic
41 // (uses best available instruction set) function in a header.
42 //
43 // Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size.
44 template <class D, typename T>
45 HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array,
46  const T* HWY_RESTRICT add_array,
47  const size_t size, T* HWY_RESTRICT x_array) {
48  for (size_t i = 0; i < size; i += Lanes(d)) {
49  const auto mul = Load(d, mul_array + i);
50  const auto add = Load(d, add_array + i);
51  auto x = Load(d, x_array + i);
52  x = MulAdd(mul, x, add);
53  Store(x, d, x_array + i);
54  }
55 }
56 
57 // NOLINTNEXTLINE(google-readability-namespace-comments)
58 } // namespace HWY_NAMESPACE
59 } // namespace skeleton
61 
62 #endif // include guard
#define HWY_RESTRICT
Definition: base.h:58
#define HWY_MAYBE_UNUSED
Definition: base.h:70
Definition: dot-inl.h:30
HWY_API Vec128< T, N > Load(Simd< T, N > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2152
HWY_API Vec128< float, N > MulAdd(const Vec128< float, N > mul, const Vec128< float, N > x, const Vec128< float, N > add)
Definition: arm_neon-inl.h:1232
constexpr HWY_API size_t Lanes(Simd< T, N >)
Definition: arm_sve-inl.h:226
HWY_API void Store(Vec128< T, N > v, Simd< T, N > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2343
HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T *HWY_RESTRICT mul_array, const T *HWY_RESTRICT add_array, const size_t size, T *HWY_RESTRICT x_array)
Definition: skeleton-inl.h:45
Definition: skeleton-inl.h:35
#define HWY_NAMESPACE
Definition: set_macros-inl.h:77
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()