Grok  9.5.0
set_macros-inl.h
Go to the documentation of this file.
1 // Copyright 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Sets macros based on HWY_TARGET.
16 
17 // This include guard is toggled by foreach_target, so avoid the usual _H_
18 // suffix to prevent copybara from renaming it.
19 #if defined(HWY_SET_MACROS_PER_TARGET) == defined(HWY_TARGET_TOGGLE)
20 #ifdef HWY_SET_MACROS_PER_TARGET
21 #undef HWY_SET_MACROS_PER_TARGET
22 #else
23 #define HWY_SET_MACROS_PER_TARGET
24 #endif
25 
26 #endif // HWY_SET_MACROS_PER_TARGET
27 
28 #include "hwy/detect_targets.h"
29 
30 #undef HWY_NAMESPACE
31 #undef HWY_ALIGN
32 #undef HWY_MAX_BYTES
33 #undef HWY_LANES
34 
35 #undef HWY_CAP_INTEGER64
36 #undef HWY_CAP_FLOAT16
37 #undef HWY_CAP_FLOAT64
38 #undef HWY_CAP_GE256
39 #undef HWY_CAP_GE512
40 
41 #undef HWY_TARGET_STR
42 
43 #if defined(HWY_DISABLE_PCLMUL_AES)
44 #define HWY_TARGET_STR_PCLMUL_AES ""
45 #else
46 #define HWY_TARGET_STR_PCLMUL_AES ",pclmul,aes"
47 #endif
48 
49 #if defined(HWY_DISABLE_BMI2_FMA)
50 #define HWY_TARGET_STR_BMI2_FMA ""
51 #else
52 #define HWY_TARGET_STR_BMI2_FMA ",bmi,bmi2,fma"
53 #endif
54 
55 #if defined(HWY_DISABLE_F16C)
56 #define HWY_TARGET_STR_F16C ""
57 #else
58 #define HWY_TARGET_STR_F16C ",f16c"
59 #endif
60 
61 #define HWY_TARGET_STR_SSSE3 "sse2,ssse3"
62 
63 #define HWY_TARGET_STR_SSE4 \
64  HWY_TARGET_STR_SSSE3 ",sse4.1,sse4.2" HWY_TARGET_STR_PCLMUL_AES
65 // Include previous targets, which are the half-vectors of the next target.
66 #define HWY_TARGET_STR_AVX2 \
67  HWY_TARGET_STR_SSE4 ",avx,avx2" HWY_TARGET_STR_BMI2_FMA HWY_TARGET_STR_F16C
68 #define HWY_TARGET_STR_AVX3 \
69  HWY_TARGET_STR_AVX2 ",avx512f,avx512vl,avx512dq,avx512bw"
70 
71 // Before include guard so we redefine HWY_TARGET_STR on each include,
72 // governed by the current HWY_TARGET.
73 //-----------------------------------------------------------------------------
74 // SSSE3
75 #if HWY_TARGET == HWY_SSSE3
76 
77 #define HWY_NAMESPACE N_SSSE3
78 #define HWY_ALIGN alignas(16)
79 #define HWY_MAX_BYTES 16
80 #define HWY_LANES(T) (16 / sizeof(T))
81 
82 #define HWY_CAP_INTEGER64 1
83 #define HWY_CAP_FLOAT16 1
84 #define HWY_CAP_FLOAT64 1
85 #define HWY_CAP_AES 0
86 #define HWY_CAP_GE256 0
87 #define HWY_CAP_GE512 0
88 
89 #define HWY_TARGET_STR HWY_TARGET_STR_SSSE3
90 //-----------------------------------------------------------------------------
91 // SSE4
92 #elif HWY_TARGET == HWY_SSE4
93 
94 #define HWY_NAMESPACE N_SSE4
95 #define HWY_ALIGN alignas(16)
96 #define HWY_MAX_BYTES 16
97 #define HWY_LANES(T) (16 / sizeof(T))
98 
99 #define HWY_CAP_INTEGER64 1
100 #define HWY_CAP_FLOAT16 1
101 #define HWY_CAP_FLOAT64 1
102 #define HWY_CAP_GE256 0
103 #define HWY_CAP_GE512 0
104 
105 #define HWY_TARGET_STR HWY_TARGET_STR_SSE4
106 
107 //-----------------------------------------------------------------------------
108 // AVX2
109 #elif HWY_TARGET == HWY_AVX2
110 
111 #define HWY_NAMESPACE N_AVX2
112 #define HWY_ALIGN alignas(32)
113 #define HWY_MAX_BYTES 32
114 #define HWY_LANES(T) (32 / sizeof(T))
115 
116 #define HWY_CAP_INTEGER64 1
117 #define HWY_CAP_FLOAT16 1
118 #define HWY_CAP_FLOAT64 1
119 #define HWY_CAP_GE256 1
120 #define HWY_CAP_GE512 0
121 
122 #define HWY_TARGET_STR HWY_TARGET_STR_AVX2
123 
124 //-----------------------------------------------------------------------------
125 // AVX3[_DL]
126 #elif HWY_TARGET == HWY_AVX3 || HWY_TARGET == HWY_AVX3_DL
127 
128 #define HWY_ALIGN alignas(64)
129 #define HWY_MAX_BYTES 64
130 #define HWY_LANES(T) (64 / sizeof(T))
131 
132 #define HWY_CAP_INTEGER64 1
133 #define HWY_CAP_FLOAT16 1
134 #define HWY_CAP_FLOAT64 1
135 #define HWY_CAP_GE256 1
136 #define HWY_CAP_GE512 1
137 
138 #if HWY_TARGET == HWY_AVX3
139 
140 #define HWY_NAMESPACE N_AVX3
141 #define HWY_TARGET_STR HWY_TARGET_STR_AVX3
142 
143 #elif HWY_TARGET == HWY_AVX3_DL
144 
145 #define HWY_NAMESPACE N_AVX3_DL
146 #define HWY_TARGET_STR \
147  HWY_TARGET_STR_AVX3 \
148  ",vpclmulqdq,avx512vbmi2,vaes,avxvnni,avx512bitalg,avx512vpopcntdq"
149 
150 #else
151 #error "Logic error"
152 #endif // HWY_TARGET == HWY_AVX3_DL
153 
154 //-----------------------------------------------------------------------------
155 // PPC8
156 #elif HWY_TARGET == HWY_PPC8
157 
158 #define HWY_ALIGN alignas(16)
159 #define HWY_MAX_BYTES 16
160 #define HWY_LANES(T) (16 / sizeof(T))
161 
162 #define HWY_CAP_INTEGER64 1
163 #define HWY_CAP_FLOAT16 0
164 #define HWY_CAP_FLOAT64 1
165 #define HWY_CAP_GE256 0
166 #define HWY_CAP_GE512 0
167 
168 #define HWY_NAMESPACE N_PPC8
169 
170 #define HWY_TARGET_STR "altivec,vsx"
171 
172 //-----------------------------------------------------------------------------
173 // NEON
174 #elif HWY_TARGET == HWY_NEON
175 
176 #define HWY_ALIGN alignas(16)
177 #define HWY_MAX_BYTES 16
178 #define HWY_LANES(T) (16 / sizeof(T))
179 
180 #define HWY_CAP_INTEGER64 1
181 #define HWY_CAP_FLOAT16 1
182 #define HWY_CAP_GE256 0
183 #define HWY_CAP_GE512 0
184 
185 #if HWY_ARCH_ARM_A64
186 #define HWY_CAP_FLOAT64 1
187 #else
188 #define HWY_CAP_FLOAT64 0
189 #endif
190 
191 #define HWY_NAMESPACE N_NEON
192 
193 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
194 
195 //-----------------------------------------------------------------------------
196 // SVE[2]
197 #elif HWY_TARGET == HWY_SVE2 || HWY_TARGET == HWY_SVE
198 
199 #if defined(HWY_EMULATE_SVE) && !defined(__F16C__)
200 #error "Disable HWY_CAP_FLOAT16 or ensure farm_sve actually converts to f16"
201 #endif
202 
203 // SVE only requires lane alignment, not natural alignment of the entire vector.
204 #define HWY_ALIGN alignas(8)
205 
206 #define HWY_MAX_BYTES 256
207 
208 // <= HWY_MAX_BYTES / sizeof(T): exact size. Otherwise a fraction 1/div (div =
209 // 1,2,4,8) is encoded as HWY_LANES(T) / div. This value leaves enough room for
210 // div=8 and demoting to 1/8 the lane width while still exceeding HWY_MAX_BYTES.
211 #define HWY_LANES(T) (32768 / sizeof(T))
212 
213 #define HWY_CAP_INTEGER64 1
214 #define HWY_CAP_FLOAT16 1
215 #define HWY_CAP_FLOAT64 1
216 #define HWY_CAP_GE256 0
217 #define HWY_CAP_GE512 0
218 
219 #if HWY_TARGET == HWY_SVE2
220 #define HWY_NAMESPACE N_SVE2
221 #else
222 #define HWY_NAMESPACE N_SVE
223 #endif
224 
225 // HWY_TARGET_STR remains undefined
226 
227 //-----------------------------------------------------------------------------
228 // WASM
229 #elif HWY_TARGET == HWY_WASM
230 
231 #define HWY_ALIGN alignas(16)
232 #define HWY_MAX_BYTES 16
233 #define HWY_LANES(T) (16 / sizeof(T))
234 
235 #define HWY_CAP_INTEGER64 0
236 #define HWY_CAP_FLOAT16 1
237 #define HWY_CAP_FLOAT64 0
238 #define HWY_CAP_GE256 0
239 #define HWY_CAP_GE512 0
240 
241 #define HWY_NAMESPACE N_WASM
242 
243 #define HWY_TARGET_STR "simd128"
244 
245 //-----------------------------------------------------------------------------
246 // RVV
247 #elif HWY_TARGET == HWY_RVV
248 
249 // RVV only requires lane alignment, not natural alignment of the entire vector,
250 // and the compiler already aligns builtin types, so nothing to do here.
251 #define HWY_ALIGN
252 
253 // The spec requires VLEN <= 2^16 bits, so the limit is 2^16 bytes (LMUL=8).
254 #define HWY_MAX_BYTES 65536
255 
256 // <= HWY_MAX_BYTES / sizeof(T): exact size. Otherwise a fraction 1/div (div =
257 // 1,2,4,8) is encoded as HWY_LANES(T) / div. This value leaves enough room for
258 // div=8 and demoting to 1/8 the lane width while still exceeding HWY_MAX_BYTES.
259 #define HWY_LANES(T) (8388608 / sizeof(T))
260 
261 #define HWY_CAP_INTEGER64 1
262 #define HWY_CAP_FLOAT64 1
263 #define HWY_CAP_GE256 0
264 #define HWY_CAP_GE512 0
265 
266 #if defined(__riscv_zfh)
267 #define HWY_CAP_FLOAT16 1
268 #else
269 #define HWY_CAP_FLOAT16 0
270 #endif
271 
272 #define HWY_NAMESPACE N_RVV
273 
274 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
275 // (rv64gcv is not a valid target)
276 
277 //-----------------------------------------------------------------------------
278 // SCALAR
279 #elif HWY_TARGET == HWY_SCALAR
280 
281 #define HWY_ALIGN
282 #define HWY_MAX_BYTES 8
283 #define HWY_LANES(T) 1
284 
285 #define HWY_CAP_INTEGER64 1
286 #define HWY_CAP_FLOAT16 1
287 #define HWY_CAP_FLOAT64 1
288 #define HWY_CAP_GE256 0
289 #define HWY_CAP_GE512 0
290 
291 #define HWY_NAMESPACE N_SCALAR
292 
293 // HWY_TARGET_STR remains undefined so HWY_ATTR is a no-op.
294 
295 #else
296 #pragma message("HWY_TARGET does not match any known target")
297 #endif // HWY_TARGET
298 
299 // Clang <9 requires this be invoked at file scope, before any namespace.
300 #undef HWY_BEFORE_NAMESPACE
301 #if defined(HWY_TARGET_STR)
302 #define HWY_BEFORE_NAMESPACE() \
303  HWY_PUSH_ATTRIBUTES(HWY_TARGET_STR) \
304  static_assert(true, "For requiring trailing semicolon")
305 #else
306 // avoids compiler warning if no HWY_TARGET_STR
307 #define HWY_BEFORE_NAMESPACE() \
308  static_assert(true, "For requiring trailing semicolon")
309 #endif
310 
311 // Clang <9 requires any namespaces be closed before this macro.
312 #undef HWY_AFTER_NAMESPACE
313 #if defined(HWY_TARGET_STR)
314 #define HWY_AFTER_NAMESPACE() \
315  HWY_POP_ATTRIBUTES \
316  static_assert(true, "For requiring trailing semicolon")
317 #else
318 // avoids compiler warning if no HWY_TARGET_STR
319 #define HWY_AFTER_NAMESPACE() \
320  static_assert(true, "For requiring trailing semicolon")
321 #endif
322 
323 #undef HWY_ATTR
324 #if defined(HWY_TARGET_STR) && HWY_HAS_ATTRIBUTE(target)
325 #define HWY_ATTR __attribute__((target(HWY_TARGET_STR)))
326 #else
327 #define HWY_ATTR
328 #endif
329 
330 // DEPRECATED
331 #undef HWY_GATHER_LANES
332 #define HWY_GATHER_LANES(T) HWY_LANES(T)