LLVM OpenMP* Runtime Library
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <stddef.h>
14 #include <atomic>
15 
16 #include "kmp.h"
17 #include "kmp_i18n.h"
18 #include "kmp_io.h"
19 #include "kmp_itt.h"
20 #include "kmp_lock.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if KMP_USE_FUTEX
25 #include <sys/syscall.h>
26 #include <unistd.h>
27 // We should really include <futex.h>, but that causes compatibility problems on
28 // different Linux* OS distributions that either require that you include (or
29 // break when you try to include) <pci/types.h>. Since all we need is the two
30 // macros below (which are part of the kernel ABI, so can't change) we just
31 // define the constants here and don't include <futex.h>
32 #ifndef FUTEX_WAIT
33 #define FUTEX_WAIT 0
34 #endif
35 #ifndef FUTEX_WAKE
36 #define FUTEX_WAKE 1
37 #endif
38 #endif
39 
40 /* Implement spin locks for internal library use. */
41 /* The algorithm implemented is Lamport's bakery lock [1974]. */
42 
43 void __kmp_validate_locks(void) {
44  int i;
45  kmp_uint32 x, y;
46 
47  /* Check to make sure unsigned arithmetic does wraps properly */
48  x = ~((kmp_uint32)0) - 2;
49  y = x - 2;
50 
51  for (i = 0; i < 8; ++i, ++x, ++y) {
52  kmp_uint32 z = (x - y);
53  KMP_ASSERT(z == 2);
54  }
55 
56  KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57 }
58 
59 /* ------------------------------------------------------------------------ */
60 /* test and set locks */
61 
62 // For the non-nested locks, we can only assume that the first 4 bytes were
63 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
65 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66 //
67 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
68 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69 
70 static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71  return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72 }
73 
74 static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75  return lck->lk.depth_locked != -1;
76 }
77 
78 __forceinline static int
79 __kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80  KMP_MB();
81 
82 #ifdef USE_LOCK_PROFILE
83  kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84  if ((curr != 0) && (curr != gtid + 1))
85  __kmp_printf("LOCK CONTENTION: %p\n", lck);
86 /* else __kmp_printf( "." );*/
87 #endif /* USE_LOCK_PROFILE */
88 
89  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91 
92  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94  KMP_FSYNC_ACQUIRED(lck);
95  return KMP_LOCK_ACQUIRED_FIRST;
96  }
97 
98  kmp_uint32 spins;
99  KMP_FSYNC_PREPARE(lck);
100  KMP_INIT_YIELD(spins);
101  kmp_backoff_t backoff = __kmp_spin_backoff_params;
102  do {
103  __kmp_spin_backoff(&backoff);
104  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
105  } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
106  !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
107  KMP_FSYNC_ACQUIRED(lck);
108  return KMP_LOCK_ACQUIRED_FIRST;
109 }
110 
111 int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
112  int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
113  return retval;
114 }
115 
116 static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
117  kmp_int32 gtid) {
118  char const *const func = "omp_set_lock";
119  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
120  __kmp_is_tas_lock_nestable(lck)) {
121  KMP_FATAL(LockNestableUsedAsSimple, func);
122  }
123  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
124  KMP_FATAL(LockIsAlreadyOwned, func);
125  }
126  return __kmp_acquire_tas_lock(lck, gtid);
127 }
128 
129 int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
130  kmp_int32 tas_free = KMP_LOCK_FREE(tas);
131  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
132  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
133  __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
134  KMP_FSYNC_ACQUIRED(lck);
135  return TRUE;
136  }
137  return FALSE;
138 }
139 
140 static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
141  kmp_int32 gtid) {
142  char const *const func = "omp_test_lock";
143  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
144  __kmp_is_tas_lock_nestable(lck)) {
145  KMP_FATAL(LockNestableUsedAsSimple, func);
146  }
147  return __kmp_test_tas_lock(lck, gtid);
148 }
149 
150 int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
151  KMP_MB(); /* Flush all pending memory write invalidates. */
152 
153  KMP_FSYNC_RELEASING(lck);
154  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
155  KMP_MB(); /* Flush all pending memory write invalidates. */
156 
157  KMP_YIELD_OVERSUB();
158  return KMP_LOCK_RELEASED;
159 }
160 
161 static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
162  kmp_int32 gtid) {
163  char const *const func = "omp_unset_lock";
164  KMP_MB(); /* in case another processor initialized lock */
165  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
166  __kmp_is_tas_lock_nestable(lck)) {
167  KMP_FATAL(LockNestableUsedAsSimple, func);
168  }
169  if (__kmp_get_tas_lock_owner(lck) == -1) {
170  KMP_FATAL(LockUnsettingFree, func);
171  }
172  if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
173  (__kmp_get_tas_lock_owner(lck) != gtid)) {
174  KMP_FATAL(LockUnsettingSetByAnother, func);
175  }
176  return __kmp_release_tas_lock(lck, gtid);
177 }
178 
179 void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
180  lck->lk.poll = KMP_LOCK_FREE(tas);
181 }
182 
183 void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
184 
185 static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
186  char const *const func = "omp_destroy_lock";
187  if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
188  __kmp_is_tas_lock_nestable(lck)) {
189  KMP_FATAL(LockNestableUsedAsSimple, func);
190  }
191  if (__kmp_get_tas_lock_owner(lck) != -1) {
192  KMP_FATAL(LockStillOwned, func);
193  }
194  __kmp_destroy_tas_lock(lck);
195 }
196 
197 // nested test and set locks
198 
199 int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
200  KMP_DEBUG_ASSERT(gtid >= 0);
201 
202  if (__kmp_get_tas_lock_owner(lck) == gtid) {
203  lck->lk.depth_locked += 1;
204  return KMP_LOCK_ACQUIRED_NEXT;
205  } else {
206  __kmp_acquire_tas_lock_timed_template(lck, gtid);
207  lck->lk.depth_locked = 1;
208  return KMP_LOCK_ACQUIRED_FIRST;
209  }
210 }
211 
212 static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
213  kmp_int32 gtid) {
214  char const *const func = "omp_set_nest_lock";
215  if (!__kmp_is_tas_lock_nestable(lck)) {
216  KMP_FATAL(LockSimpleUsedAsNestable, func);
217  }
218  return __kmp_acquire_nested_tas_lock(lck, gtid);
219 }
220 
221 int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
222  int retval;
223 
224  KMP_DEBUG_ASSERT(gtid >= 0);
225 
226  if (__kmp_get_tas_lock_owner(lck) == gtid) {
227  retval = ++lck->lk.depth_locked;
228  } else if (!__kmp_test_tas_lock(lck, gtid)) {
229  retval = 0;
230  } else {
231  KMP_MB();
232  retval = lck->lk.depth_locked = 1;
233  }
234  return retval;
235 }
236 
237 static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
238  kmp_int32 gtid) {
239  char const *const func = "omp_test_nest_lock";
240  if (!__kmp_is_tas_lock_nestable(lck)) {
241  KMP_FATAL(LockSimpleUsedAsNestable, func);
242  }
243  return __kmp_test_nested_tas_lock(lck, gtid);
244 }
245 
246 int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
247  KMP_DEBUG_ASSERT(gtid >= 0);
248 
249  KMP_MB();
250  if (--(lck->lk.depth_locked) == 0) {
251  __kmp_release_tas_lock(lck, gtid);
252  return KMP_LOCK_RELEASED;
253  }
254  return KMP_LOCK_STILL_HELD;
255 }
256 
257 static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
258  kmp_int32 gtid) {
259  char const *const func = "omp_unset_nest_lock";
260  KMP_MB(); /* in case another processor initialized lock */
261  if (!__kmp_is_tas_lock_nestable(lck)) {
262  KMP_FATAL(LockSimpleUsedAsNestable, func);
263  }
264  if (__kmp_get_tas_lock_owner(lck) == -1) {
265  KMP_FATAL(LockUnsettingFree, func);
266  }
267  if (__kmp_get_tas_lock_owner(lck) != gtid) {
268  KMP_FATAL(LockUnsettingSetByAnother, func);
269  }
270  return __kmp_release_nested_tas_lock(lck, gtid);
271 }
272 
273 void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
274  __kmp_init_tas_lock(lck);
275  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
276 }
277 
278 void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
279  __kmp_destroy_tas_lock(lck);
280  lck->lk.depth_locked = 0;
281 }
282 
283 static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
284  char const *const func = "omp_destroy_nest_lock";
285  if (!__kmp_is_tas_lock_nestable(lck)) {
286  KMP_FATAL(LockSimpleUsedAsNestable, func);
287  }
288  if (__kmp_get_tas_lock_owner(lck) != -1) {
289  KMP_FATAL(LockStillOwned, func);
290  }
291  __kmp_destroy_nested_tas_lock(lck);
292 }
293 
294 #if KMP_USE_FUTEX
295 
296 /* ------------------------------------------------------------------------ */
297 /* futex locks */
298 
299 // futex locks are really just test and set locks, with a different method
300 // of handling contention. They take the same amount of space as test and
301 // set locks, and are allocated the same way (i.e. use the area allocated by
302 // the compiler for non-nested locks / allocate nested locks on the heap).
303 
304 static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
305  return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
306 }
307 
308 static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
309  return lck->lk.depth_locked != -1;
310 }
311 
312 __forceinline static int
313 __kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
314  kmp_int32 gtid_code = (gtid + 1) << 1;
315 
316  KMP_MB();
317 
318 #ifdef USE_LOCK_PROFILE
319  kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
320  if ((curr != 0) && (curr != gtid_code))
321  __kmp_printf("LOCK CONTENTION: %p\n", lck);
322 /* else __kmp_printf( "." );*/
323 #endif /* USE_LOCK_PROFILE */
324 
325  KMP_FSYNC_PREPARE(lck);
326  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
327  lck, lck->lk.poll, gtid));
328 
329  kmp_int32 poll_val;
330 
331  while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
332  &(lck->lk.poll), KMP_LOCK_FREE(futex),
333  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
334 
335  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
336  KA_TRACE(
337  1000,
338  ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
339  lck, gtid, poll_val, cond));
340 
341  // NOTE: if you try to use the following condition for this branch
342  //
343  // if ( poll_val & 1 == 0 )
344  //
345  // Then the 12.0 compiler has a bug where the following block will
346  // always be skipped, regardless of the value of the LSB of poll_val.
347  if (!cond) {
348  // Try to set the lsb in the poll to indicate to the owner
349  // thread that they need to wake this thread up.
350  if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
351  poll_val | KMP_LOCK_BUSY(1, futex))) {
352  KA_TRACE(
353  1000,
354  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
355  lck, lck->lk.poll, gtid));
356  continue;
357  }
358  poll_val |= KMP_LOCK_BUSY(1, futex);
359 
360  KA_TRACE(1000,
361  ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
362  lck->lk.poll, gtid));
363  }
364 
365  KA_TRACE(
366  1000,
367  ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
368  lck, gtid, poll_val));
369 
370  long rc;
371  if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
372  NULL, 0)) != 0) {
373  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
374  "failed (rc=%ld errno=%d)\n",
375  lck, gtid, poll_val, rc, errno));
376  continue;
377  }
378 
379  KA_TRACE(1000,
380  ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
381  lck, gtid, poll_val));
382  // This thread has now done a successful futex wait call and was entered on
383  // the OS futex queue. We must now perform a futex wake call when releasing
384  // the lock, as we have no idea how many other threads are in the queue.
385  gtid_code |= 1;
386  }
387 
388  KMP_FSYNC_ACQUIRED(lck);
389  KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
390  lck->lk.poll, gtid));
391  return KMP_LOCK_ACQUIRED_FIRST;
392 }
393 
394 int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
395  int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
396  return retval;
397 }
398 
399 static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
400  kmp_int32 gtid) {
401  char const *const func = "omp_set_lock";
402  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
403  __kmp_is_futex_lock_nestable(lck)) {
404  KMP_FATAL(LockNestableUsedAsSimple, func);
405  }
406  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
407  KMP_FATAL(LockIsAlreadyOwned, func);
408  }
409  return __kmp_acquire_futex_lock(lck, gtid);
410 }
411 
412 int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
413  if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
414  KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
415  KMP_FSYNC_ACQUIRED(lck);
416  return TRUE;
417  }
418  return FALSE;
419 }
420 
421 static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
422  kmp_int32 gtid) {
423  char const *const func = "omp_test_lock";
424  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
425  __kmp_is_futex_lock_nestable(lck)) {
426  KMP_FATAL(LockNestableUsedAsSimple, func);
427  }
428  return __kmp_test_futex_lock(lck, gtid);
429 }
430 
431 int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
432  KMP_MB(); /* Flush all pending memory write invalidates. */
433 
434  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
435  lck, lck->lk.poll, gtid));
436 
437  KMP_FSYNC_RELEASING(lck);
438 
439  kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
440 
441  KA_TRACE(1000,
442  ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
443  lck, gtid, poll_val));
444 
445  if (KMP_LOCK_STRIP(poll_val) & 1) {
446  KA_TRACE(1000,
447  ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
448  lck, gtid));
449  syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
450  NULL, NULL, 0);
451  }
452 
453  KMP_MB(); /* Flush all pending memory write invalidates. */
454 
455  KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
456  lck->lk.poll, gtid));
457 
458  KMP_YIELD_OVERSUB();
459  return KMP_LOCK_RELEASED;
460 }
461 
462 static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
463  kmp_int32 gtid) {
464  char const *const func = "omp_unset_lock";
465  KMP_MB(); /* in case another processor initialized lock */
466  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
467  __kmp_is_futex_lock_nestable(lck)) {
468  KMP_FATAL(LockNestableUsedAsSimple, func);
469  }
470  if (__kmp_get_futex_lock_owner(lck) == -1) {
471  KMP_FATAL(LockUnsettingFree, func);
472  }
473  if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
474  (__kmp_get_futex_lock_owner(lck) != gtid)) {
475  KMP_FATAL(LockUnsettingSetByAnother, func);
476  }
477  return __kmp_release_futex_lock(lck, gtid);
478 }
479 
480 void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
481  TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
482 }
483 
484 void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
485 
486 static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
487  char const *const func = "omp_destroy_lock";
488  if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
489  __kmp_is_futex_lock_nestable(lck)) {
490  KMP_FATAL(LockNestableUsedAsSimple, func);
491  }
492  if (__kmp_get_futex_lock_owner(lck) != -1) {
493  KMP_FATAL(LockStillOwned, func);
494  }
495  __kmp_destroy_futex_lock(lck);
496 }
497 
498 // nested futex locks
499 
500 int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
501  KMP_DEBUG_ASSERT(gtid >= 0);
502 
503  if (__kmp_get_futex_lock_owner(lck) == gtid) {
504  lck->lk.depth_locked += 1;
505  return KMP_LOCK_ACQUIRED_NEXT;
506  } else {
507  __kmp_acquire_futex_lock_timed_template(lck, gtid);
508  lck->lk.depth_locked = 1;
509  return KMP_LOCK_ACQUIRED_FIRST;
510  }
511 }
512 
513 static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
514  kmp_int32 gtid) {
515  char const *const func = "omp_set_nest_lock";
516  if (!__kmp_is_futex_lock_nestable(lck)) {
517  KMP_FATAL(LockSimpleUsedAsNestable, func);
518  }
519  return __kmp_acquire_nested_futex_lock(lck, gtid);
520 }
521 
522 int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
523  int retval;
524 
525  KMP_DEBUG_ASSERT(gtid >= 0);
526 
527  if (__kmp_get_futex_lock_owner(lck) == gtid) {
528  retval = ++lck->lk.depth_locked;
529  } else if (!__kmp_test_futex_lock(lck, gtid)) {
530  retval = 0;
531  } else {
532  KMP_MB();
533  retval = lck->lk.depth_locked = 1;
534  }
535  return retval;
536 }
537 
538 static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
539  kmp_int32 gtid) {
540  char const *const func = "omp_test_nest_lock";
541  if (!__kmp_is_futex_lock_nestable(lck)) {
542  KMP_FATAL(LockSimpleUsedAsNestable, func);
543  }
544  return __kmp_test_nested_futex_lock(lck, gtid);
545 }
546 
547 int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
548  KMP_DEBUG_ASSERT(gtid >= 0);
549 
550  KMP_MB();
551  if (--(lck->lk.depth_locked) == 0) {
552  __kmp_release_futex_lock(lck, gtid);
553  return KMP_LOCK_RELEASED;
554  }
555  return KMP_LOCK_STILL_HELD;
556 }
557 
558 static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
559  kmp_int32 gtid) {
560  char const *const func = "omp_unset_nest_lock";
561  KMP_MB(); /* in case another processor initialized lock */
562  if (!__kmp_is_futex_lock_nestable(lck)) {
563  KMP_FATAL(LockSimpleUsedAsNestable, func);
564  }
565  if (__kmp_get_futex_lock_owner(lck) == -1) {
566  KMP_FATAL(LockUnsettingFree, func);
567  }
568  if (__kmp_get_futex_lock_owner(lck) != gtid) {
569  KMP_FATAL(LockUnsettingSetByAnother, func);
570  }
571  return __kmp_release_nested_futex_lock(lck, gtid);
572 }
573 
574 void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
575  __kmp_init_futex_lock(lck);
576  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
577 }
578 
579 void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
580  __kmp_destroy_futex_lock(lck);
581  lck->lk.depth_locked = 0;
582 }
583 
584 static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
585  char const *const func = "omp_destroy_nest_lock";
586  if (!__kmp_is_futex_lock_nestable(lck)) {
587  KMP_FATAL(LockSimpleUsedAsNestable, func);
588  }
589  if (__kmp_get_futex_lock_owner(lck) != -1) {
590  KMP_FATAL(LockStillOwned, func);
591  }
592  __kmp_destroy_nested_futex_lock(lck);
593 }
594 
595 #endif // KMP_USE_FUTEX
596 
597 /* ------------------------------------------------------------------------ */
598 /* ticket (bakery) locks */
599 
600 static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
601  return std::atomic_load_explicit(&lck->lk.owner_id,
602  std::memory_order_relaxed) -
603  1;
604 }
605 
606 static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
607  return std::atomic_load_explicit(&lck->lk.depth_locked,
608  std::memory_order_relaxed) != -1;
609 }
610 
611 static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
612  return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
613  std::memory_order_acquire) == my_ticket;
614 }
615 
616 __forceinline static int
617 __kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
618  kmp_int32 gtid) {
619  kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
620  &lck->lk.next_ticket, 1U, std::memory_order_relaxed);
621 
622 #ifdef USE_LOCK_PROFILE
623  if (std::atomic_load_explicit(&lck->lk.now_serving,
624  std::memory_order_relaxed) != my_ticket)
625  __kmp_printf("LOCK CONTENTION: %p\n", lck);
626 /* else __kmp_printf( "." );*/
627 #endif /* USE_LOCK_PROFILE */
628 
629  if (std::atomic_load_explicit(&lck->lk.now_serving,
630  std::memory_order_acquire) == my_ticket) {
631  return KMP_LOCK_ACQUIRED_FIRST;
632  }
633  KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
634  return KMP_LOCK_ACQUIRED_FIRST;
635 }
636 
637 int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
638  int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
639  return retval;
640 }
641 
642 static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
643  kmp_int32 gtid) {
644  char const *const func = "omp_set_lock";
645 
646  if (!std::atomic_load_explicit(&lck->lk.initialized,
647  std::memory_order_relaxed)) {
648  KMP_FATAL(LockIsUninitialized, func);
649  }
650  if (lck->lk.self != lck) {
651  KMP_FATAL(LockIsUninitialized, func);
652  }
653  if (__kmp_is_ticket_lock_nestable(lck)) {
654  KMP_FATAL(LockNestableUsedAsSimple, func);
655  }
656  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
657  KMP_FATAL(LockIsAlreadyOwned, func);
658  }
659 
660  __kmp_acquire_ticket_lock(lck, gtid);
661 
662  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
663  std::memory_order_relaxed);
664  return KMP_LOCK_ACQUIRED_FIRST;
665 }
666 
667 int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
668  kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
669  std::memory_order_relaxed);
670 
671  if (std::atomic_load_explicit(&lck->lk.now_serving,
672  std::memory_order_relaxed) == my_ticket) {
673  kmp_uint32 next_ticket = my_ticket + 1;
674  if (std::atomic_compare_exchange_strong_explicit(
675  &lck->lk.next_ticket, &my_ticket, next_ticket,
676  std::memory_order_acquire, std::memory_order_acquire)) {
677  return TRUE;
678  }
679  }
680  return FALSE;
681 }
682 
683 static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
684  kmp_int32 gtid) {
685  char const *const func = "omp_test_lock";
686 
687  if (!std::atomic_load_explicit(&lck->lk.initialized,
688  std::memory_order_relaxed)) {
689  KMP_FATAL(LockIsUninitialized, func);
690  }
691  if (lck->lk.self != lck) {
692  KMP_FATAL(LockIsUninitialized, func);
693  }
694  if (__kmp_is_ticket_lock_nestable(lck)) {
695  KMP_FATAL(LockNestableUsedAsSimple, func);
696  }
697 
698  int retval = __kmp_test_ticket_lock(lck, gtid);
699 
700  if (retval) {
701  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
702  std::memory_order_relaxed);
703  }
704  return retval;
705 }
706 
707 int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
708  kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
709  std::memory_order_relaxed) -
710  std::atomic_load_explicit(&lck->lk.now_serving,
711  std::memory_order_relaxed);
712 
713  std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
714  std::memory_order_release);
715 
716  KMP_YIELD(distance >
717  (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
718  return KMP_LOCK_RELEASED;
719 }
720 
721 static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
722  kmp_int32 gtid) {
723  char const *const func = "omp_unset_lock";
724 
725  if (!std::atomic_load_explicit(&lck->lk.initialized,
726  std::memory_order_relaxed)) {
727  KMP_FATAL(LockIsUninitialized, func);
728  }
729  if (lck->lk.self != lck) {
730  KMP_FATAL(LockIsUninitialized, func);
731  }
732  if (__kmp_is_ticket_lock_nestable(lck)) {
733  KMP_FATAL(LockNestableUsedAsSimple, func);
734  }
735  if (__kmp_get_ticket_lock_owner(lck) == -1) {
736  KMP_FATAL(LockUnsettingFree, func);
737  }
738  if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
739  (__kmp_get_ticket_lock_owner(lck) != gtid)) {
740  KMP_FATAL(LockUnsettingSetByAnother, func);
741  }
742  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
743  return __kmp_release_ticket_lock(lck, gtid);
744 }
745 
746 void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
747  lck->lk.location = NULL;
748  lck->lk.self = lck;
749  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
750  std::memory_order_relaxed);
751  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
752  std::memory_order_relaxed);
753  std::atomic_store_explicit(
754  &lck->lk.owner_id, 0,
755  std::memory_order_relaxed); // no thread owns the lock.
756  std::atomic_store_explicit(
757  &lck->lk.depth_locked, -1,
758  std::memory_order_relaxed); // -1 => not a nested lock.
759  std::atomic_store_explicit(&lck->lk.initialized, true,
760  std::memory_order_release);
761 }
762 
763 void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
764  std::atomic_store_explicit(&lck->lk.initialized, false,
765  std::memory_order_release);
766  lck->lk.self = NULL;
767  lck->lk.location = NULL;
768  std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
769  std::memory_order_relaxed);
770  std::atomic_store_explicit(&lck->lk.now_serving, 0U,
771  std::memory_order_relaxed);
772  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
773  std::atomic_store_explicit(&lck->lk.depth_locked, -1,
774  std::memory_order_relaxed);
775 }
776 
777 static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
778  char const *const func = "omp_destroy_lock";
779 
780  if (!std::atomic_load_explicit(&lck->lk.initialized,
781  std::memory_order_relaxed)) {
782  KMP_FATAL(LockIsUninitialized, func);
783  }
784  if (lck->lk.self != lck) {
785  KMP_FATAL(LockIsUninitialized, func);
786  }
787  if (__kmp_is_ticket_lock_nestable(lck)) {
788  KMP_FATAL(LockNestableUsedAsSimple, func);
789  }
790  if (__kmp_get_ticket_lock_owner(lck) != -1) {
791  KMP_FATAL(LockStillOwned, func);
792  }
793  __kmp_destroy_ticket_lock(lck);
794 }
795 
796 // nested ticket locks
797 
798 int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
799  KMP_DEBUG_ASSERT(gtid >= 0);
800 
801  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
802  std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
803  std::memory_order_relaxed);
804  return KMP_LOCK_ACQUIRED_NEXT;
805  } else {
806  __kmp_acquire_ticket_lock_timed_template(lck, gtid);
807  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
808  std::memory_order_relaxed);
809  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
810  std::memory_order_relaxed);
811  return KMP_LOCK_ACQUIRED_FIRST;
812  }
813 }
814 
815 static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
816  kmp_int32 gtid) {
817  char const *const func = "omp_set_nest_lock";
818 
819  if (!std::atomic_load_explicit(&lck->lk.initialized,
820  std::memory_order_relaxed)) {
821  KMP_FATAL(LockIsUninitialized, func);
822  }
823  if (lck->lk.self != lck) {
824  KMP_FATAL(LockIsUninitialized, func);
825  }
826  if (!__kmp_is_ticket_lock_nestable(lck)) {
827  KMP_FATAL(LockSimpleUsedAsNestable, func);
828  }
829  return __kmp_acquire_nested_ticket_lock(lck, gtid);
830 }
831 
832 int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
833  int retval;
834 
835  KMP_DEBUG_ASSERT(gtid >= 0);
836 
837  if (__kmp_get_ticket_lock_owner(lck) == gtid) {
838  retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
839  std::memory_order_relaxed) +
840  1;
841  } else if (!__kmp_test_ticket_lock(lck, gtid)) {
842  retval = 0;
843  } else {
844  std::atomic_store_explicit(&lck->lk.depth_locked, 1,
845  std::memory_order_relaxed);
846  std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
847  std::memory_order_relaxed);
848  retval = 1;
849  }
850  return retval;
851 }
852 
853 static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
854  kmp_int32 gtid) {
855  char const *const func = "omp_test_nest_lock";
856 
857  if (!std::atomic_load_explicit(&lck->lk.initialized,
858  std::memory_order_relaxed)) {
859  KMP_FATAL(LockIsUninitialized, func);
860  }
861  if (lck->lk.self != lck) {
862  KMP_FATAL(LockIsUninitialized, func);
863  }
864  if (!__kmp_is_ticket_lock_nestable(lck)) {
865  KMP_FATAL(LockSimpleUsedAsNestable, func);
866  }
867  return __kmp_test_nested_ticket_lock(lck, gtid);
868 }
869 
870 int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
871  KMP_DEBUG_ASSERT(gtid >= 0);
872 
873  if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
874  std::memory_order_relaxed) -
875  1) == 0) {
876  std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
877  __kmp_release_ticket_lock(lck, gtid);
878  return KMP_LOCK_RELEASED;
879  }
880  return KMP_LOCK_STILL_HELD;
881 }
882 
883 static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
884  kmp_int32 gtid) {
885  char const *const func = "omp_unset_nest_lock";
886 
887  if (!std::atomic_load_explicit(&lck->lk.initialized,
888  std::memory_order_relaxed)) {
889  KMP_FATAL(LockIsUninitialized, func);
890  }
891  if (lck->lk.self != lck) {
892  KMP_FATAL(LockIsUninitialized, func);
893  }
894  if (!__kmp_is_ticket_lock_nestable(lck)) {
895  KMP_FATAL(LockSimpleUsedAsNestable, func);
896  }
897  if (__kmp_get_ticket_lock_owner(lck) == -1) {
898  KMP_FATAL(LockUnsettingFree, func);
899  }
900  if (__kmp_get_ticket_lock_owner(lck) != gtid) {
901  KMP_FATAL(LockUnsettingSetByAnother, func);
902  }
903  return __kmp_release_nested_ticket_lock(lck, gtid);
904 }
905 
906 void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
907  __kmp_init_ticket_lock(lck);
908  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
909  std::memory_order_relaxed);
910  // >= 0 for nestable locks, -1 for simple locks
911 }
912 
913 void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914  __kmp_destroy_ticket_lock(lck);
915  std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916  std::memory_order_relaxed);
917 }
918 
919 static void
920 __kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
921  char const *const func = "omp_destroy_nest_lock";
922 
923  if (!std::atomic_load_explicit(&lck->lk.initialized,
924  std::memory_order_relaxed)) {
925  KMP_FATAL(LockIsUninitialized, func);
926  }
927  if (lck->lk.self != lck) {
928  KMP_FATAL(LockIsUninitialized, func);
929  }
930  if (!__kmp_is_ticket_lock_nestable(lck)) {
931  KMP_FATAL(LockSimpleUsedAsNestable, func);
932  }
933  if (__kmp_get_ticket_lock_owner(lck) != -1) {
934  KMP_FATAL(LockStillOwned, func);
935  }
936  __kmp_destroy_nested_ticket_lock(lck);
937 }
938 
939 // access functions to fields which don't exist for all lock kinds.
940 
941 static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
942  return lck->lk.location;
943 }
944 
945 static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
946  const ident_t *loc) {
947  lck->lk.location = loc;
948 }
949 
950 static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
951  return lck->lk.flags;
952 }
953 
954 static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
955  kmp_lock_flags_t flags) {
956  lck->lk.flags = flags;
957 }
958 
959 /* ------------------------------------------------------------------------ */
960 /* queuing locks */
961 
962 /* First the states
963  (head,tail) = 0, 0 means lock is unheld, nobody on queue
964  UINT_MAX or -1, 0 means lock is held, nobody on queue
965  h, h means lock held or about to transition,
966  1 element on queue
967  h, t h <> t, means lock is held or about to
968  transition, >1 elements on queue
969 
970  Now the transitions
971  Acquire(0,0) = -1 ,0
972  Release(0,0) = Error
973  Acquire(-1,0) = h ,h h > 0
974  Release(-1,0) = 0 ,0
975  Acquire(h,h) = h ,t h > 0, t > 0, h <> t
976  Release(h,h) = -1 ,0 h > 0
977  Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
978  Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
979 
980  And pictorially
981 
982  +-----+
983  | 0, 0|------- release -------> Error
984  +-----+
985  | ^
986  acquire| |release
987  | |
988  | |
989  v |
990  +-----+
991  |-1, 0|
992  +-----+
993  | ^
994  acquire| |release
995  | |
996  | |
997  v |
998  +-----+
999  | h, h|
1000  +-----+
1001  | ^
1002  acquire| |release
1003  | |
1004  | |
1005  v |
1006  +-----+
1007  | h, t|----- acquire, release loopback ---+
1008  +-----+ |
1009  ^ |
1010  | |
1011  +------------------------------------+
1012  */
1013 
1014 #ifdef DEBUG_QUEUING_LOCKS
1015 
1016 /* Stuff for circular trace buffer */
1017 #define TRACE_BUF_ELE 1024
1018 static char traces[TRACE_BUF_ELE][128] = {0};
1019 static int tc = 0;
1020 #define TRACE_LOCK(X, Y) \
1021  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1022 #define TRACE_LOCK_T(X, Y, Z) \
1023  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1024 #define TRACE_LOCK_HT(X, Y, Z, Q) \
1025  KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1026  Z, Q);
1027 
1028 static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1029  kmp_queuing_lock_t *lck, kmp_int32 head_id,
1030  kmp_int32 tail_id) {
1031  kmp_int32 t, i;
1032 
1033  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1034 
1035  i = tc % TRACE_BUF_ELE;
1036  __kmp_printf_no_lock("%s\n", traces[i]);
1037  i = (i + 1) % TRACE_BUF_ELE;
1038  while (i != (tc % TRACE_BUF_ELE)) {
1039  __kmp_printf_no_lock("%s", traces[i]);
1040  i = (i + 1) % TRACE_BUF_ELE;
1041  }
1042  __kmp_printf_no_lock("\n");
1043 
1044  __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1045  "next_wait:%d, head_id:%d, tail_id:%d\n",
1046  gtid + 1, this_thr->th.th_spin_here,
1047  this_thr->th.th_next_waiting, head_id, tail_id);
1048 
1049  __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1050 
1051  if (lck->lk.head_id >= 1) {
1052  t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1053  while (t > 0) {
1054  __kmp_printf_no_lock("-> %d ", t);
1055  t = __kmp_threads[t - 1]->th.th_next_waiting;
1056  }
1057  }
1058  __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1059  __kmp_printf_no_lock("\n\n");
1060 }
1061 
1062 #endif /* DEBUG_QUEUING_LOCKS */
1063 
1064 static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1065  return TCR_4(lck->lk.owner_id) - 1;
1066 }
1067 
1068 static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1069  return lck->lk.depth_locked != -1;
1070 }
1071 
1072 /* Acquire a lock using a the queuing lock implementation */
1073 template <bool takeTime>
1074 /* [TLW] The unused template above is left behind because of what BEB believes
1075  is a potential compiler problem with __forceinline. */
1076 __forceinline static int
1077 __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1078  kmp_int32 gtid) {
1079  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1080  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1081  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1082  volatile kmp_uint32 *spin_here_p;
1083 
1084 #if OMPT_SUPPORT
1085  ompt_state_t prev_state = ompt_state_undefined;
1086 #endif
1087 
1088  KA_TRACE(1000,
1089  ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1090 
1091  KMP_FSYNC_PREPARE(lck);
1092  KMP_DEBUG_ASSERT(this_thr != NULL);
1093  spin_here_p = &this_thr->th.th_spin_here;
1094 
1095 #ifdef DEBUG_QUEUING_LOCKS
1096  TRACE_LOCK(gtid + 1, "acq ent");
1097  if (*spin_here_p)
1098  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1099  if (this_thr->th.th_next_waiting != 0)
1100  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1101 #endif
1102  KMP_DEBUG_ASSERT(!*spin_here_p);
1103  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1104 
1105  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1106  head_id_p that may follow, not just in execution order, but also in
1107  visibility order. This way, when a releasing thread observes the changes to
1108  the queue by this thread, it can rightly assume that spin_here_p has
1109  already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1110  not premature. If the releasing thread sets spin_here_p to FALSE before
1111  this thread sets it to TRUE, this thread will hang. */
1112  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1113 
1114  while (1) {
1115  kmp_int32 enqueued;
1116  kmp_int32 head;
1117  kmp_int32 tail;
1118 
1119  head = *head_id_p;
1120 
1121  switch (head) {
1122 
1123  case -1: {
1124 #ifdef DEBUG_QUEUING_LOCKS
1125  tail = *tail_id_p;
1126  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1127 #endif
1128  tail = 0; /* to make sure next link asynchronously read is not set
1129  accidentally; this assignment prevents us from entering the
1130  if ( t > 0 ) condition in the enqueued case below, which is not
1131  necessary for this state transition */
1132 
1133  /* try (-1,0)->(tid,tid) */
1134  enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1135  KMP_PACK_64(-1, 0),
1136  KMP_PACK_64(gtid + 1, gtid + 1));
1137 #ifdef DEBUG_QUEUING_LOCKS
1138  if (enqueued)
1139  TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1140 #endif
1141  } break;
1142 
1143  default: {
1144  tail = *tail_id_p;
1145  KMP_DEBUG_ASSERT(tail != gtid + 1);
1146 
1147 #ifdef DEBUG_QUEUING_LOCKS
1148  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1149 #endif
1150 
1151  if (tail == 0) {
1152  enqueued = FALSE;
1153  } else {
1154  /* try (h,t) or (h,h)->(h,tid) */
1155  enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1156 
1157 #ifdef DEBUG_QUEUING_LOCKS
1158  if (enqueued)
1159  TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1160 #endif
1161  }
1162  } break;
1163 
1164  case 0: /* empty queue */
1165  {
1166  kmp_int32 grabbed_lock;
1167 
1168 #ifdef DEBUG_QUEUING_LOCKS
1169  tail = *tail_id_p;
1170  TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1171 #endif
1172  /* try (0,0)->(-1,0) */
1173 
1174  /* only legal transition out of head = 0 is head = -1 with no change to
1175  * tail */
1176  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1177 
1178  if (grabbed_lock) {
1179 
1180  *spin_here_p = FALSE;
1181 
1182  KA_TRACE(
1183  1000,
1184  ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1185  lck, gtid));
1186 #ifdef DEBUG_QUEUING_LOCKS
1187  TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1188 #endif
1189 
1190 #if OMPT_SUPPORT
1191  if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1192  /* change the state before clearing wait_id */
1193  this_thr->th.ompt_thread_info.state = prev_state;
1194  this_thr->th.ompt_thread_info.wait_id = 0;
1195  }
1196 #endif
1197 
1198  KMP_FSYNC_ACQUIRED(lck);
1199  return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1200  }
1201  enqueued = FALSE;
1202  } break;
1203  }
1204 
1205 #if OMPT_SUPPORT
1206  if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1207  /* this thread will spin; set wait_id before entering wait state */
1208  prev_state = this_thr->th.ompt_thread_info.state;
1209  this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1210  this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1211  }
1212 #endif
1213 
1214  if (enqueued) {
1215  if (tail > 0) {
1216  kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1217  KMP_ASSERT(tail_thr != NULL);
1218  tail_thr->th.th_next_waiting = gtid + 1;
1219  /* corresponding wait for this write in release code */
1220  }
1221  KA_TRACE(1000,
1222  ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1223  lck, gtid));
1224 
1225  KMP_MB();
1226  // ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1227  KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1228  // Synchronize writes to both runtime thread structures
1229  // and writes in user code.
1230  KMP_MB();
1231 
1232 #ifdef DEBUG_QUEUING_LOCKS
1233  TRACE_LOCK(gtid + 1, "acq spin");
1234 
1235  if (this_thr->th.th_next_waiting != 0)
1236  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1237 #endif
1238  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1239  KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1240  "waiting on queue\n",
1241  lck, gtid));
1242 
1243 #ifdef DEBUG_QUEUING_LOCKS
1244  TRACE_LOCK(gtid + 1, "acq exit 2");
1245 #endif
1246 
1247 #if OMPT_SUPPORT
1248  /* change the state before clearing wait_id */
1249  this_thr->th.ompt_thread_info.state = prev_state;
1250  this_thr->th.ompt_thread_info.wait_id = 0;
1251 #endif
1252 
1253  /* got lock, we were dequeued by the thread that released lock */
1254  return KMP_LOCK_ACQUIRED_FIRST;
1255  }
1256 
1257  /* Yield if number of threads > number of logical processors */
1258  /* ToDo: Not sure why this should only be in oversubscription case,
1259  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1260  KMP_YIELD_OVERSUB();
1261 
1262 #ifdef DEBUG_QUEUING_LOCKS
1263  TRACE_LOCK(gtid + 1, "acq retry");
1264 #endif
1265  }
1266  KMP_ASSERT2(0, "should not get here");
1267  return KMP_LOCK_ACQUIRED_FIRST;
1268 }
1269 
1270 int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1271  KMP_DEBUG_ASSERT(gtid >= 0);
1272 
1273  int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1274  return retval;
1275 }
1276 
1277 static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1278  kmp_int32 gtid) {
1279  char const *const func = "omp_set_lock";
1280  if (lck->lk.initialized != lck) {
1281  KMP_FATAL(LockIsUninitialized, func);
1282  }
1283  if (__kmp_is_queuing_lock_nestable(lck)) {
1284  KMP_FATAL(LockNestableUsedAsSimple, func);
1285  }
1286  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1287  KMP_FATAL(LockIsAlreadyOwned, func);
1288  }
1289 
1290  __kmp_acquire_queuing_lock(lck, gtid);
1291 
1292  lck->lk.owner_id = gtid + 1;
1293  return KMP_LOCK_ACQUIRED_FIRST;
1294 }
1295 
1296 int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1297  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1298  kmp_int32 head;
1299 #ifdef KMP_DEBUG
1300  kmp_info_t *this_thr;
1301 #endif
1302 
1303  KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1304  KMP_DEBUG_ASSERT(gtid >= 0);
1305 #ifdef KMP_DEBUG
1306  this_thr = __kmp_thread_from_gtid(gtid);
1307  KMP_DEBUG_ASSERT(this_thr != NULL);
1308  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1309 #endif
1310 
1311  head = *head_id_p;
1312 
1313  if (head == 0) { /* nobody on queue, nobody holding */
1314  /* try (0,0)->(-1,0) */
1315  if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1316  KA_TRACE(1000,
1317  ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1318  KMP_FSYNC_ACQUIRED(lck);
1319  return TRUE;
1320  }
1321  }
1322 
1323  KA_TRACE(1000,
1324  ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1325  return FALSE;
1326 }
1327 
1328 static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1329  kmp_int32 gtid) {
1330  char const *const func = "omp_test_lock";
1331  if (lck->lk.initialized != lck) {
1332  KMP_FATAL(LockIsUninitialized, func);
1333  }
1334  if (__kmp_is_queuing_lock_nestable(lck)) {
1335  KMP_FATAL(LockNestableUsedAsSimple, func);
1336  }
1337 
1338  int retval = __kmp_test_queuing_lock(lck, gtid);
1339 
1340  if (retval) {
1341  lck->lk.owner_id = gtid + 1;
1342  }
1343  return retval;
1344 }
1345 
1346 int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1347  volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1348  volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1349 
1350  KA_TRACE(1000,
1351  ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1352  KMP_DEBUG_ASSERT(gtid >= 0);
1353 #if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1354  kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1355 #endif
1356  KMP_DEBUG_ASSERT(this_thr != NULL);
1357 #ifdef DEBUG_QUEUING_LOCKS
1358  TRACE_LOCK(gtid + 1, "rel ent");
1359 
1360  if (this_thr->th.th_spin_here)
1361  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1362  if (this_thr->th.th_next_waiting != 0)
1363  __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1364 #endif
1365  KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1366  KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1367 
1368  KMP_FSYNC_RELEASING(lck);
1369 
1370  while (1) {
1371  kmp_int32 dequeued;
1372  kmp_int32 head;
1373  kmp_int32 tail;
1374 
1375  head = *head_id_p;
1376 
1377 #ifdef DEBUG_QUEUING_LOCKS
1378  tail = *tail_id_p;
1379  TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1380  if (head == 0)
1381  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1382 #endif
1383  KMP_DEBUG_ASSERT(head !=
1384  0); /* holding the lock, head must be -1 or queue head */
1385 
1386  if (head == -1) { /* nobody on queue */
1387  /* try (-1,0)->(0,0) */
1388  if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1389  KA_TRACE(
1390  1000,
1391  ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1392  lck, gtid));
1393 #ifdef DEBUG_QUEUING_LOCKS
1394  TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1395 #endif
1396 
1397 #if OMPT_SUPPORT
1398 /* nothing to do - no other thread is trying to shift blame */
1399 #endif
1400  return KMP_LOCK_RELEASED;
1401  }
1402  dequeued = FALSE;
1403  } else {
1404  KMP_MB();
1405  tail = *tail_id_p;
1406  if (head == tail) { /* only one thread on the queue */
1407 #ifdef DEBUG_QUEUING_LOCKS
1408  if (head <= 0)
1409  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1410 #endif
1411  KMP_DEBUG_ASSERT(head > 0);
1412 
1413  /* try (h,h)->(-1,0) */
1414  dequeued = KMP_COMPARE_AND_STORE_REL64(
1415  RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1416  KMP_PACK_64(-1, 0));
1417 #ifdef DEBUG_QUEUING_LOCKS
1418  TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1419 #endif
1420 
1421  } else {
1422  volatile kmp_int32 *waiting_id_p;
1423  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1424  KMP_DEBUG_ASSERT(head_thr != NULL);
1425  waiting_id_p = &head_thr->th.th_next_waiting;
1426 
1427 /* Does this require synchronous reads? */
1428 #ifdef DEBUG_QUEUING_LOCKS
1429  if (head <= 0 || tail <= 0)
1430  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1431 #endif
1432  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1433 
1434  /* try (h,t)->(h',t) or (t,t) */
1435  KMP_MB();
1436  /* make sure enqueuing thread has time to update next waiting thread
1437  * field */
1438  *head_id_p =
1439  KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1440 #ifdef DEBUG_QUEUING_LOCKS
1441  TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1442 #endif
1443  dequeued = TRUE;
1444  }
1445  }
1446 
1447  if (dequeued) {
1448  kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1449  KMP_DEBUG_ASSERT(head_thr != NULL);
1450 
1451 /* Does this require synchronous reads? */
1452 #ifdef DEBUG_QUEUING_LOCKS
1453  if (head <= 0 || tail <= 0)
1454  __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1455 #endif
1456  KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1457 
1458  /* For clean code only. Thread not released until next statement prevents
1459  race with acquire code. */
1460  head_thr->th.th_next_waiting = 0;
1461 #ifdef DEBUG_QUEUING_LOCKS
1462  TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1463 #endif
1464 
1465  KMP_MB();
1466  /* reset spin value */
1467  head_thr->th.th_spin_here = FALSE;
1468 
1469  KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1470  "dequeuing\n",
1471  lck, gtid));
1472 #ifdef DEBUG_QUEUING_LOCKS
1473  TRACE_LOCK(gtid + 1, "rel exit 2");
1474 #endif
1475  return KMP_LOCK_RELEASED;
1476  }
1477  /* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1478  threads */
1479 
1480 #ifdef DEBUG_QUEUING_LOCKS
1481  TRACE_LOCK(gtid + 1, "rel retry");
1482 #endif
1483 
1484  } /* while */
1485  KMP_ASSERT2(0, "should not get here");
1486  return KMP_LOCK_RELEASED;
1487 }
1488 
1489 static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1490  kmp_int32 gtid) {
1491  char const *const func = "omp_unset_lock";
1492  KMP_MB(); /* in case another processor initialized lock */
1493  if (lck->lk.initialized != lck) {
1494  KMP_FATAL(LockIsUninitialized, func);
1495  }
1496  if (__kmp_is_queuing_lock_nestable(lck)) {
1497  KMP_FATAL(LockNestableUsedAsSimple, func);
1498  }
1499  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1500  KMP_FATAL(LockUnsettingFree, func);
1501  }
1502  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1503  KMP_FATAL(LockUnsettingSetByAnother, func);
1504  }
1505  lck->lk.owner_id = 0;
1506  return __kmp_release_queuing_lock(lck, gtid);
1507 }
1508 
1509 void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1510  lck->lk.location = NULL;
1511  lck->lk.head_id = 0;
1512  lck->lk.tail_id = 0;
1513  lck->lk.next_ticket = 0;
1514  lck->lk.now_serving = 0;
1515  lck->lk.owner_id = 0; // no thread owns the lock.
1516  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1517  lck->lk.initialized = lck;
1518 
1519  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1520 }
1521 
1522 void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1523  lck->lk.initialized = NULL;
1524  lck->lk.location = NULL;
1525  lck->lk.head_id = 0;
1526  lck->lk.tail_id = 0;
1527  lck->lk.next_ticket = 0;
1528  lck->lk.now_serving = 0;
1529  lck->lk.owner_id = 0;
1530  lck->lk.depth_locked = -1;
1531 }
1532 
1533 static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1534  char const *const func = "omp_destroy_lock";
1535  if (lck->lk.initialized != lck) {
1536  KMP_FATAL(LockIsUninitialized, func);
1537  }
1538  if (__kmp_is_queuing_lock_nestable(lck)) {
1539  KMP_FATAL(LockNestableUsedAsSimple, func);
1540  }
1541  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1542  KMP_FATAL(LockStillOwned, func);
1543  }
1544  __kmp_destroy_queuing_lock(lck);
1545 }
1546 
1547 // nested queuing locks
1548 
1549 int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1550  KMP_DEBUG_ASSERT(gtid >= 0);
1551 
1552  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1553  lck->lk.depth_locked += 1;
1554  return KMP_LOCK_ACQUIRED_NEXT;
1555  } else {
1556  __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1557  KMP_MB();
1558  lck->lk.depth_locked = 1;
1559  KMP_MB();
1560  lck->lk.owner_id = gtid + 1;
1561  return KMP_LOCK_ACQUIRED_FIRST;
1562  }
1563 }
1564 
1565 static int
1566 __kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1567  kmp_int32 gtid) {
1568  char const *const func = "omp_set_nest_lock";
1569  if (lck->lk.initialized != lck) {
1570  KMP_FATAL(LockIsUninitialized, func);
1571  }
1572  if (!__kmp_is_queuing_lock_nestable(lck)) {
1573  KMP_FATAL(LockSimpleUsedAsNestable, func);
1574  }
1575  return __kmp_acquire_nested_queuing_lock(lck, gtid);
1576 }
1577 
1578 int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1579  int retval;
1580 
1581  KMP_DEBUG_ASSERT(gtid >= 0);
1582 
1583  if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1584  retval = ++lck->lk.depth_locked;
1585  } else if (!__kmp_test_queuing_lock(lck, gtid)) {
1586  retval = 0;
1587  } else {
1588  KMP_MB();
1589  retval = lck->lk.depth_locked = 1;
1590  KMP_MB();
1591  lck->lk.owner_id = gtid + 1;
1592  }
1593  return retval;
1594 }
1595 
1596 static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1597  kmp_int32 gtid) {
1598  char const *const func = "omp_test_nest_lock";
1599  if (lck->lk.initialized != lck) {
1600  KMP_FATAL(LockIsUninitialized, func);
1601  }
1602  if (!__kmp_is_queuing_lock_nestable(lck)) {
1603  KMP_FATAL(LockSimpleUsedAsNestable, func);
1604  }
1605  return __kmp_test_nested_queuing_lock(lck, gtid);
1606 }
1607 
1608 int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1609  KMP_DEBUG_ASSERT(gtid >= 0);
1610 
1611  KMP_MB();
1612  if (--(lck->lk.depth_locked) == 0) {
1613  KMP_MB();
1614  lck->lk.owner_id = 0;
1615  __kmp_release_queuing_lock(lck, gtid);
1616  return KMP_LOCK_RELEASED;
1617  }
1618  return KMP_LOCK_STILL_HELD;
1619 }
1620 
1621 static int
1622 __kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1623  kmp_int32 gtid) {
1624  char const *const func = "omp_unset_nest_lock";
1625  KMP_MB(); /* in case another processor initialized lock */
1626  if (lck->lk.initialized != lck) {
1627  KMP_FATAL(LockIsUninitialized, func);
1628  }
1629  if (!__kmp_is_queuing_lock_nestable(lck)) {
1630  KMP_FATAL(LockSimpleUsedAsNestable, func);
1631  }
1632  if (__kmp_get_queuing_lock_owner(lck) == -1) {
1633  KMP_FATAL(LockUnsettingFree, func);
1634  }
1635  if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1636  KMP_FATAL(LockUnsettingSetByAnother, func);
1637  }
1638  return __kmp_release_nested_queuing_lock(lck, gtid);
1639 }
1640 
1641 void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1642  __kmp_init_queuing_lock(lck);
1643  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1644 }
1645 
1646 void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1647  __kmp_destroy_queuing_lock(lck);
1648  lck->lk.depth_locked = 0;
1649 }
1650 
1651 static void
1652 __kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1653  char const *const func = "omp_destroy_nest_lock";
1654  if (lck->lk.initialized != lck) {
1655  KMP_FATAL(LockIsUninitialized, func);
1656  }
1657  if (!__kmp_is_queuing_lock_nestable(lck)) {
1658  KMP_FATAL(LockSimpleUsedAsNestable, func);
1659  }
1660  if (__kmp_get_queuing_lock_owner(lck) != -1) {
1661  KMP_FATAL(LockStillOwned, func);
1662  }
1663  __kmp_destroy_nested_queuing_lock(lck);
1664 }
1665 
1666 // access functions to fields which don't exist for all lock kinds.
1667 
1668 static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1669  return lck->lk.location;
1670 }
1671 
1672 static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1673  const ident_t *loc) {
1674  lck->lk.location = loc;
1675 }
1676 
1677 static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1678  return lck->lk.flags;
1679 }
1680 
1681 static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1682  kmp_lock_flags_t flags) {
1683  lck->lk.flags = flags;
1684 }
1685 
1686 #if KMP_USE_ADAPTIVE_LOCKS
1687 
1688 /* RTM Adaptive locks */
1689 
1690 #if KMP_HAVE_RTM_INTRINSICS
1691 #include <immintrin.h>
1692 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1693 
1694 #else
1695 
1696 // Values from the status register after failed speculation.
1697 #define _XBEGIN_STARTED (~0u)
1698 #define _XABORT_EXPLICIT (1 << 0)
1699 #define _XABORT_RETRY (1 << 1)
1700 #define _XABORT_CONFLICT (1 << 2)
1701 #define _XABORT_CAPACITY (1 << 3)
1702 #define _XABORT_DEBUG (1 << 4)
1703 #define _XABORT_NESTED (1 << 5)
1704 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1705 
1706 // Aborts for which it's worth trying again immediately
1707 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1708 
1709 #define STRINGIZE_INTERNAL(arg) #arg
1710 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1711 
1712 // Access to RTM instructions
1713 /*A version of XBegin which returns -1 on speculation, and the value of EAX on
1714  an abort. This is the same definition as the compiler intrinsic that will be
1715  supported at some point. */
1716 static __inline int _xbegin() {
1717  int res = -1;
1718 
1719 #if KMP_OS_WINDOWS
1720 #if KMP_ARCH_X86_64
1721  _asm {
1722  _emit 0xC7
1723  _emit 0xF8
1724  _emit 2
1725  _emit 0
1726  _emit 0
1727  _emit 0
1728  jmp L2
1729  mov res, eax
1730  L2:
1731  }
1732 #else /* IA32 */
1733  _asm {
1734  _emit 0xC7
1735  _emit 0xF8
1736  _emit 2
1737  _emit 0
1738  _emit 0
1739  _emit 0
1740  jmp L2
1741  mov res, eax
1742  L2:
1743  }
1744 #endif // KMP_ARCH_X86_64
1745 #else
1746  /* Note that %eax must be noted as killed (clobbered), because the XSR is
1747  returned in %eax(%rax) on abort. Other register values are restored, so
1748  don't need to be killed.
1749 
1750  We must also mark 'res' as an input and an output, since otherwise
1751  'res=-1' may be dropped as being dead, whereas we do need the assignment on
1752  the successful (i.e., non-abort) path. */
1753  __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1754  " .long 1f-1b-6\n"
1755  " jmp 2f\n"
1756  "1: movl %%eax,%0\n"
1757  "2:"
1758  : "+r"(res)::"memory", "%eax");
1759 #endif // KMP_OS_WINDOWS
1760  return res;
1761 }
1762 
1763 /* Transaction end */
1764 static __inline void _xend() {
1765 #if KMP_OS_WINDOWS
1766  __asm {
1767  _emit 0x0f
1768  _emit 0x01
1769  _emit 0xd5
1770  }
1771 #else
1772  __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1773 #endif
1774 }
1775 
1776 /* This is a macro, the argument must be a single byte constant which can be
1777  evaluated by the inline assembler, since it is emitted as a byte into the
1778  assembly code. */
1779 // clang-format off
1780 #if KMP_OS_WINDOWS
1781 #define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1782 #else
1783 #define _xabort(ARG) \
1784  __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1785 #endif
1786 // clang-format on
1787 #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1788 
1789 // Statistics is collected for testing purpose
1790 #if KMP_DEBUG_ADAPTIVE_LOCKS
1791 
1792 // We accumulate speculative lock statistics when the lock is destroyed. We
1793 // keep locks that haven't been destroyed in the liveLocks list so that we can
1794 // grab their statistics too.
1795 static kmp_adaptive_lock_statistics_t destroyedStats;
1796 
1797 // To hold the list of live locks.
1798 static kmp_adaptive_lock_info_t liveLocks;
1799 
1800 // A lock so we can safely update the list of locks.
1801 static kmp_bootstrap_lock_t chain_lock =
1802  KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1803 
1804 // Initialize the list of stats.
1805 void __kmp_init_speculative_stats() {
1806  kmp_adaptive_lock_info_t *lck = &liveLocks;
1807 
1808  memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1809  sizeof(lck->stats));
1810  lck->stats.next = lck;
1811  lck->stats.prev = lck;
1812 
1813  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1814  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1815 
1816  __kmp_init_bootstrap_lock(&chain_lock);
1817 }
1818 
1819 // Insert the lock into the circular list
1820 static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1821  __kmp_acquire_bootstrap_lock(&chain_lock);
1822 
1823  lck->stats.next = liveLocks.stats.next;
1824  lck->stats.prev = &liveLocks;
1825 
1826  liveLocks.stats.next = lck;
1827  lck->stats.next->stats.prev = lck;
1828 
1829  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1830  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1831 
1832  __kmp_release_bootstrap_lock(&chain_lock);
1833 }
1834 
1835 static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1836  KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837  KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838 
1839  kmp_adaptive_lock_info_t *n = lck->stats.next;
1840  kmp_adaptive_lock_info_t *p = lck->stats.prev;
1841 
1842  n->stats.prev = p;
1843  p->stats.next = n;
1844 }
1845 
1846 static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1847  memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1848  sizeof(lck->stats));
1849  __kmp_remember_lock(lck);
1850 }
1851 
1852 static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1853  kmp_adaptive_lock_info_t *lck) {
1854  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1855 
1856  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1857  t->successfulSpeculations += s->successfulSpeculations;
1858  t->hardFailedSpeculations += s->hardFailedSpeculations;
1859  t->softFailedSpeculations += s->softFailedSpeculations;
1860  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1861  t->lemmingYields += s->lemmingYields;
1862 }
1863 
1864 static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1865  __kmp_acquire_bootstrap_lock(&chain_lock);
1866 
1867  __kmp_add_stats(&destroyedStats, lck);
1868  __kmp_forget_lock(lck);
1869 
1870  __kmp_release_bootstrap_lock(&chain_lock);
1871 }
1872 
1873 static float percent(kmp_uint32 count, kmp_uint32 total) {
1874  return (total == 0) ? 0.0 : (100.0 * count) / total;
1875 }
1876 
1877 void __kmp_print_speculative_stats() {
1878  kmp_adaptive_lock_statistics_t total = destroyedStats;
1879  kmp_adaptive_lock_info_t *lck;
1880 
1881  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1882  __kmp_add_stats(&total, lck);
1883  }
1884  kmp_adaptive_lock_statistics_t *t = &total;
1885  kmp_uint32 totalSections =
1886  t->nonSpeculativeAcquires + t->successfulSpeculations;
1887  kmp_uint32 totalSpeculations = t->successfulSpeculations +
1888  t->hardFailedSpeculations +
1889  t->softFailedSpeculations;
1890  if (totalSections <= 0)
1891  return;
1892 
1893  kmp_safe_raii_file_t statsFile;
1894  if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1895  statsFile.set_stdout();
1896  } else {
1897  size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1898  char buffer[buffLen];
1899  KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1900  (kmp_int32)getpid());
1901  statsFile.open(buffer, "w");
1902  }
1903 
1904  fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1905  fprintf(statsFile,
1906  " Lock parameters: \n"
1907  " max_soft_retries : %10d\n"
1908  " max_badness : %10d\n",
1909  __kmp_adaptive_backoff_params.max_soft_retries,
1910  __kmp_adaptive_backoff_params.max_badness);
1911  fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1912  t->nonSpeculativeAcquireAttempts);
1913  fprintf(statsFile, " Total critical sections : %10d\n",
1914  totalSections);
1915  fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1916  t->successfulSpeculations,
1917  percent(t->successfulSpeculations, totalSections));
1918  fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1919  t->nonSpeculativeAcquires,
1920  percent(t->nonSpeculativeAcquires, totalSections));
1921  fprintf(statsFile, " Lemming yields : %10d\n\n",
1922  t->lemmingYields);
1923 
1924  fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1925  totalSpeculations);
1926  fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1927  t->successfulSpeculations,
1928  percent(t->successfulSpeculations, totalSpeculations));
1929  fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1930  t->softFailedSpeculations,
1931  percent(t->softFailedSpeculations, totalSpeculations));
1932  fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1933  t->hardFailedSpeculations,
1934  percent(t->hardFailedSpeculations, totalSpeculations));
1935 }
1936 
1937 #define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1938 #else
1939 #define KMP_INC_STAT(lck, stat)
1940 
1941 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
1942 
1943 static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1944  // It is enough to check that the head_id is zero.
1945  // We don't also need to check the tail.
1946  bool res = lck->lk.head_id == 0;
1947 
1948 // We need a fence here, since we must ensure that no memory operations
1949 // from later in this thread float above that read.
1950 #if KMP_COMPILER_ICC
1951  _mm_mfence();
1952 #else
1953  __sync_synchronize();
1954 #endif
1955 
1956  return res;
1957 }
1958 
1959 // Functions for manipulating the badness
1960 static __inline void
1961 __kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1962  // Reset the badness to zero so we eagerly try to speculate again
1963  lck->lk.adaptive.badness = 0;
1964  KMP_INC_STAT(lck, successfulSpeculations);
1965 }
1966 
1967 // Create a bit mask with one more set bit.
1968 static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1969  kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1970  if (newBadness > lck->lk.adaptive.max_badness) {
1971  return;
1972  } else {
1973  lck->lk.adaptive.badness = newBadness;
1974  }
1975 }
1976 
1977 // Check whether speculation should be attempted.
1978 KMP_ATTRIBUTE_TARGET_RTM
1979 static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1980  kmp_int32 gtid) {
1981  kmp_uint32 badness = lck->lk.adaptive.badness;
1982  kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1983  int res = (attempts & badness) == 0;
1984  return res;
1985 }
1986 
1987 // Attempt to acquire only the speculative lock.
1988 // Does not back off to the non-speculative lock.
1989 KMP_ATTRIBUTE_TARGET_RTM
1990 static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1991  kmp_int32 gtid) {
1992  int retries = lck->lk.adaptive.max_soft_retries;
1993 
1994  // We don't explicitly count the start of speculation, rather we record the
1995  // results (success, hard fail, soft fail). The sum of all of those is the
1996  // total number of times we started speculation since all speculations must
1997  // end one of those ways.
1998  do {
1999  kmp_uint32 status = _xbegin();
2000  // Switch this in to disable actual speculation but exercise at least some
2001  // of the rest of the code. Useful for debugging...
2002  // kmp_uint32 status = _XABORT_NESTED;
2003 
2004  if (status == _XBEGIN_STARTED) {
2005  /* We have successfully started speculation. Check that no-one acquired
2006  the lock for real between when we last looked and now. This also gets
2007  the lock cache line into our read-set, which we need so that we'll
2008  abort if anyone later claims it for real. */
2009  if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2010  // Lock is now visibly acquired, so someone beat us to it. Abort the
2011  // transaction so we'll restart from _xbegin with the failure status.
2012  _xabort(0x01);
2013  KMP_ASSERT2(0, "should not get here");
2014  }
2015  return 1; // Lock has been acquired (speculatively)
2016  } else {
2017  // We have aborted, update the statistics
2018  if (status & SOFT_ABORT_MASK) {
2019  KMP_INC_STAT(lck, softFailedSpeculations);
2020  // and loop round to retry.
2021  } else {
2022  KMP_INC_STAT(lck, hardFailedSpeculations);
2023  // Give up if we had a hard failure.
2024  break;
2025  }
2026  }
2027  } while (retries--); // Loop while we have retries, and didn't fail hard.
2028 
2029  // Either we had a hard failure or we didn't succeed softly after
2030  // the full set of attempts, so back off the badness.
2031  __kmp_step_badness(lck);
2032  return 0;
2033 }
2034 
2035 // Attempt to acquire the speculative lock, or back off to the non-speculative
2036 // one if the speculative lock cannot be acquired.
2037 // We can succeed speculatively, non-speculatively, or fail.
2038 static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2039  // First try to acquire the lock speculatively
2040  if (__kmp_should_speculate(lck, gtid) &&
2041  __kmp_test_adaptive_lock_only(lck, gtid))
2042  return 1;
2043 
2044  // Speculative acquisition failed, so try to acquire it non-speculatively.
2045  // Count the non-speculative acquire attempt
2046  lck->lk.adaptive.acquire_attempts++;
2047 
2048  // Use base, non-speculative lock.
2049  if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2050  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2051  return 1; // Lock is acquired (non-speculatively)
2052  } else {
2053  return 0; // Failed to acquire the lock, it's already visibly locked.
2054  }
2055 }
2056 
2057 static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2058  kmp_int32 gtid) {
2059  char const *const func = "omp_test_lock";
2060  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2061  KMP_FATAL(LockIsUninitialized, func);
2062  }
2063 
2064  int retval = __kmp_test_adaptive_lock(lck, gtid);
2065 
2066  if (retval) {
2067  lck->lk.qlk.owner_id = gtid + 1;
2068  }
2069  return retval;
2070 }
2071 
2072 // Block until we can acquire a speculative, adaptive lock. We check whether we
2073 // should be trying to speculate. If we should be, we check the real lock to see
2074 // if it is free, and, if not, pause without attempting to acquire it until it
2075 // is. Then we try the speculative acquire. This means that although we suffer
2076 // from lemmings a little (because all we can't acquire the lock speculatively
2077 // until the queue of threads waiting has cleared), we don't get into a state
2078 // where we can never acquire the lock speculatively (because we force the queue
2079 // to clear by preventing new arrivals from entering the queue). This does mean
2080 // that when we're trying to break lemmings, the lock is no longer fair. However
2081 // OpenMP makes no guarantee that its locks are fair, so this isn't a real
2082 // problem.
2083 static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2084  kmp_int32 gtid) {
2085  if (__kmp_should_speculate(lck, gtid)) {
2086  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2087  if (__kmp_test_adaptive_lock_only(lck, gtid))
2088  return;
2089  // We tried speculation and failed, so give up.
2090  } else {
2091  // We can't try speculation until the lock is free, so we pause here
2092  // (without suspending on the queueing lock, to allow it to drain, then
2093  // try again. All other threads will also see the same result for
2094  // shouldSpeculate, so will be doing the same if they try to claim the
2095  // lock from now on.
2096  while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2097  KMP_INC_STAT(lck, lemmingYields);
2098  KMP_YIELD(TRUE);
2099  }
2100 
2101  if (__kmp_test_adaptive_lock_only(lck, gtid))
2102  return;
2103  }
2104  }
2105 
2106  // Speculative acquisition failed, so acquire it non-speculatively.
2107  // Count the non-speculative acquire attempt
2108  lck->lk.adaptive.acquire_attempts++;
2109 
2110  __kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2111  // We have acquired the base lock, so count that.
2112  KMP_INC_STAT(lck, nonSpeculativeAcquires);
2113 }
2114 
2115 static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2116  kmp_int32 gtid) {
2117  char const *const func = "omp_set_lock";
2118  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2119  KMP_FATAL(LockIsUninitialized, func);
2120  }
2121  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2122  KMP_FATAL(LockIsAlreadyOwned, func);
2123  }
2124 
2125  __kmp_acquire_adaptive_lock(lck, gtid);
2126 
2127  lck->lk.qlk.owner_id = gtid + 1;
2128 }
2129 
2130 KMP_ATTRIBUTE_TARGET_RTM
2131 static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2132  kmp_int32 gtid) {
2133  if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2134  lck))) { // If the lock doesn't look claimed we must be speculating.
2135  // (Or the user's code is buggy and they're releasing without locking;
2136  // if we had XTEST we'd be able to check that case...)
2137  _xend(); // Exit speculation
2138  __kmp_update_badness_after_success(lck);
2139  } else { // Since the lock *is* visibly locked we're not speculating,
2140  // so should use the underlying lock's release scheme.
2141  __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2142  }
2143  return KMP_LOCK_RELEASED;
2144 }
2145 
2146 static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2147  kmp_int32 gtid) {
2148  char const *const func = "omp_unset_lock";
2149  KMP_MB(); /* in case another processor initialized lock */
2150  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2151  KMP_FATAL(LockIsUninitialized, func);
2152  }
2153  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2154  KMP_FATAL(LockUnsettingFree, func);
2155  }
2156  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2157  KMP_FATAL(LockUnsettingSetByAnother, func);
2158  }
2159  lck->lk.qlk.owner_id = 0;
2160  __kmp_release_adaptive_lock(lck, gtid);
2161  return KMP_LOCK_RELEASED;
2162 }
2163 
2164 static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2165  __kmp_init_queuing_lock(GET_QLK_PTR(lck));
2166  lck->lk.adaptive.badness = 0;
2167  lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2168  lck->lk.adaptive.max_soft_retries =
2169  __kmp_adaptive_backoff_params.max_soft_retries;
2170  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2171 #if KMP_DEBUG_ADAPTIVE_LOCKS
2172  __kmp_zero_speculative_stats(&lck->lk.adaptive);
2173 #endif
2174  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2175 }
2176 
2177 static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2178 #if KMP_DEBUG_ADAPTIVE_LOCKS
2179  __kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2180 #endif
2181  __kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2182  // Nothing needed for the speculative part.
2183 }
2184 
2185 static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2186  char const *const func = "omp_destroy_lock";
2187  if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2188  KMP_FATAL(LockIsUninitialized, func);
2189  }
2190  if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2191  KMP_FATAL(LockStillOwned, func);
2192  }
2193  __kmp_destroy_adaptive_lock(lck);
2194 }
2195 
2196 #endif // KMP_USE_ADAPTIVE_LOCKS
2197 
2198 /* ------------------------------------------------------------------------ */
2199 /* DRDPA ticket locks */
2200 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2201 
2202 static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2203  return lck->lk.owner_id - 1;
2204 }
2205 
2206 static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2207  return lck->lk.depth_locked != -1;
2208 }
2209 
2210 __forceinline static int
2211 __kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2212  kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2213  kmp_uint64 mask = lck->lk.mask; // atomic load
2214  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2215 
2216 #ifdef USE_LOCK_PROFILE
2217  if (polls[ticket & mask] != ticket)
2218  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2219 /* else __kmp_printf( "." );*/
2220 #endif /* USE_LOCK_PROFILE */
2221 
2222  // Now spin-wait, but reload the polls pointer and mask, in case the
2223  // polling area has been reconfigured. Unless it is reconfigured, the
2224  // reloads stay in L1 cache and are cheap.
2225  //
2226  // Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2227  // The current implementation of KMP_WAIT doesn't allow for mask
2228  // and poll to be re-read every spin iteration.
2229  kmp_uint32 spins;
2230  KMP_FSYNC_PREPARE(lck);
2231  KMP_INIT_YIELD(spins);
2232  while (polls[ticket & mask] < ticket) { // atomic load
2233  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
2234  // Re-read the mask and the poll pointer from the lock structure.
2235  //
2236  // Make certain that "mask" is read before "polls" !!!
2237  //
2238  // If another thread picks reconfigures the polling area and updates their
2239  // values, and we get the new value of mask and the old polls pointer, we
2240  // could access memory beyond the end of the old polling area.
2241  mask = lck->lk.mask; // atomic load
2242  polls = lck->lk.polls; // atomic load
2243  }
2244 
2245  // Critical section starts here
2246  KMP_FSYNC_ACQUIRED(lck);
2247  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2248  ticket, lck));
2249  lck->lk.now_serving = ticket; // non-volatile store
2250 
2251  // Deallocate a garbage polling area if we know that we are the last
2252  // thread that could possibly access it.
2253  //
2254  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2255  // ticket.
2256  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2257  __kmp_free(lck->lk.old_polls);
2258  lck->lk.old_polls = NULL;
2259  lck->lk.cleanup_ticket = 0;
2260  }
2261 
2262  // Check to see if we should reconfigure the polling area.
2263  // If there is still a garbage polling area to be deallocated from a
2264  // previous reconfiguration, let a later thread reconfigure it.
2265  if (lck->lk.old_polls == NULL) {
2266  bool reconfigure = false;
2267  std::atomic<kmp_uint64> *old_polls = polls;
2268  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2269 
2270  if (TCR_4(__kmp_nth) >
2271  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2272  // We are in oversubscription mode. Contract the polling area
2273  // down to a single location, if that hasn't been done already.
2274  if (num_polls > 1) {
2275  reconfigure = true;
2276  num_polls = TCR_4(lck->lk.num_polls);
2277  mask = 0;
2278  num_polls = 1;
2279  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2280  sizeof(*polls));
2281  polls[0] = ticket;
2282  }
2283  } else {
2284  // We are in under/fully subscribed mode. Check the number of
2285  // threads waiting on the lock. The size of the polling area
2286  // should be at least the number of threads waiting.
2287  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2288  if (num_waiting > num_polls) {
2289  kmp_uint32 old_num_polls = num_polls;
2290  reconfigure = true;
2291  do {
2292  mask = (mask << 1) | 1;
2293  num_polls *= 2;
2294  } while (num_polls <= num_waiting);
2295 
2296  // Allocate the new polling area, and copy the relevant portion
2297  // of the old polling area to the new area. __kmp_allocate()
2298  // zeroes the memory it allocates, and most of the old area is
2299  // just zero padding, so we only copy the release counters.
2300  polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2301  sizeof(*polls));
2302  kmp_uint32 i;
2303  for (i = 0; i < old_num_polls; i++) {
2304  polls[i].store(old_polls[i]);
2305  }
2306  }
2307  }
2308 
2309  if (reconfigure) {
2310  // Now write the updated fields back to the lock structure.
2311  //
2312  // Make certain that "polls" is written before "mask" !!!
2313  //
2314  // If another thread picks up the new value of mask and the old polls
2315  // pointer , it could access memory beyond the end of the old polling
2316  // area.
2317  //
2318  // On x86, we need memory fences.
2319  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2320  "lock %p to %d polls\n",
2321  ticket, lck, num_polls));
2322 
2323  lck->lk.old_polls = old_polls;
2324  lck->lk.polls = polls; // atomic store
2325 
2326  KMP_MB();
2327 
2328  lck->lk.num_polls = num_polls;
2329  lck->lk.mask = mask; // atomic store
2330 
2331  KMP_MB();
2332 
2333  // Only after the new polling area and mask have been flushed
2334  // to main memory can we update the cleanup ticket field.
2335  //
2336  // volatile load / non-volatile store
2337  lck->lk.cleanup_ticket = lck->lk.next_ticket;
2338  }
2339  }
2340  return KMP_LOCK_ACQUIRED_FIRST;
2341 }
2342 
2343 int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2344  int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2345  return retval;
2346 }
2347 
2348 static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2349  kmp_int32 gtid) {
2350  char const *const func = "omp_set_lock";
2351  if (lck->lk.initialized != lck) {
2352  KMP_FATAL(LockIsUninitialized, func);
2353  }
2354  if (__kmp_is_drdpa_lock_nestable(lck)) {
2355  KMP_FATAL(LockNestableUsedAsSimple, func);
2356  }
2357  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2358  KMP_FATAL(LockIsAlreadyOwned, func);
2359  }
2360 
2361  __kmp_acquire_drdpa_lock(lck, gtid);
2362 
2363  lck->lk.owner_id = gtid + 1;
2364  return KMP_LOCK_ACQUIRED_FIRST;
2365 }
2366 
2367 int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2368  // First get a ticket, then read the polls pointer and the mask.
2369  // The polls pointer must be read before the mask!!! (See above)
2370  kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2371  std::atomic<kmp_uint64> *polls = lck->lk.polls;
2372  kmp_uint64 mask = lck->lk.mask; // atomic load
2373  if (polls[ticket & mask] == ticket) {
2374  kmp_uint64 next_ticket = ticket + 1;
2375  if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2376  next_ticket)) {
2377  KMP_FSYNC_ACQUIRED(lck);
2378  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2379  ticket, lck));
2380  lck->lk.now_serving = ticket; // non-volatile store
2381 
2382  // Since no threads are waiting, there is no possibility that we would
2383  // want to reconfigure the polling area. We might have the cleanup ticket
2384  // value (which says that it is now safe to deallocate old_polls), but
2385  // we'll let a later thread which calls __kmp_acquire_lock do that - this
2386  // routine isn't supposed to block, and we would risk blocks if we called
2387  // __kmp_free() to do the deallocation.
2388  return TRUE;
2389  }
2390  }
2391  return FALSE;
2392 }
2393 
2394 static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2395  kmp_int32 gtid) {
2396  char const *const func = "omp_test_lock";
2397  if (lck->lk.initialized != lck) {
2398  KMP_FATAL(LockIsUninitialized, func);
2399  }
2400  if (__kmp_is_drdpa_lock_nestable(lck)) {
2401  KMP_FATAL(LockNestableUsedAsSimple, func);
2402  }
2403 
2404  int retval = __kmp_test_drdpa_lock(lck, gtid);
2405 
2406  if (retval) {
2407  lck->lk.owner_id = gtid + 1;
2408  }
2409  return retval;
2410 }
2411 
2412 int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2413  // Read the ticket value from the lock data struct, then the polls pointer and
2414  // the mask. The polls pointer must be read before the mask!!! (See above)
2415  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2416  std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2417  kmp_uint64 mask = lck->lk.mask; // atomic load
2418  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2419  ticket - 1, lck));
2420  KMP_FSYNC_RELEASING(lck);
2421  polls[ticket & mask] = ticket; // atomic store
2422  return KMP_LOCK_RELEASED;
2423 }
2424 
2425 static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2426  kmp_int32 gtid) {
2427  char const *const func = "omp_unset_lock";
2428  KMP_MB(); /* in case another processor initialized lock */
2429  if (lck->lk.initialized != lck) {
2430  KMP_FATAL(LockIsUninitialized, func);
2431  }
2432  if (__kmp_is_drdpa_lock_nestable(lck)) {
2433  KMP_FATAL(LockNestableUsedAsSimple, func);
2434  }
2435  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2436  KMP_FATAL(LockUnsettingFree, func);
2437  }
2438  if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2439  (__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2440  KMP_FATAL(LockUnsettingSetByAnother, func);
2441  }
2442  lck->lk.owner_id = 0;
2443  return __kmp_release_drdpa_lock(lck, gtid);
2444 }
2445 
2446 void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2447  lck->lk.location = NULL;
2448  lck->lk.mask = 0;
2449  lck->lk.num_polls = 1;
2450  lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2451  lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2452  lck->lk.cleanup_ticket = 0;
2453  lck->lk.old_polls = NULL;
2454  lck->lk.next_ticket = 0;
2455  lck->lk.now_serving = 0;
2456  lck->lk.owner_id = 0; // no thread owns the lock.
2457  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2458  lck->lk.initialized = lck;
2459 
2460  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2461 }
2462 
2463 void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2464  lck->lk.initialized = NULL;
2465  lck->lk.location = NULL;
2466  if (lck->lk.polls.load() != NULL) {
2467  __kmp_free(lck->lk.polls.load());
2468  lck->lk.polls = NULL;
2469  }
2470  if (lck->lk.old_polls != NULL) {
2471  __kmp_free(lck->lk.old_polls);
2472  lck->lk.old_polls = NULL;
2473  }
2474  lck->lk.mask = 0;
2475  lck->lk.num_polls = 0;
2476  lck->lk.cleanup_ticket = 0;
2477  lck->lk.next_ticket = 0;
2478  lck->lk.now_serving = 0;
2479  lck->lk.owner_id = 0;
2480  lck->lk.depth_locked = -1;
2481 }
2482 
2483 static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2484  char const *const func = "omp_destroy_lock";
2485  if (lck->lk.initialized != lck) {
2486  KMP_FATAL(LockIsUninitialized, func);
2487  }
2488  if (__kmp_is_drdpa_lock_nestable(lck)) {
2489  KMP_FATAL(LockNestableUsedAsSimple, func);
2490  }
2491  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2492  KMP_FATAL(LockStillOwned, func);
2493  }
2494  __kmp_destroy_drdpa_lock(lck);
2495 }
2496 
2497 // nested drdpa ticket locks
2498 
2499 int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2500  KMP_DEBUG_ASSERT(gtid >= 0);
2501 
2502  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2503  lck->lk.depth_locked += 1;
2504  return KMP_LOCK_ACQUIRED_NEXT;
2505  } else {
2506  __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2507  KMP_MB();
2508  lck->lk.depth_locked = 1;
2509  KMP_MB();
2510  lck->lk.owner_id = gtid + 1;
2511  return KMP_LOCK_ACQUIRED_FIRST;
2512  }
2513 }
2514 
2515 static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2516  kmp_int32 gtid) {
2517  char const *const func = "omp_set_nest_lock";
2518  if (lck->lk.initialized != lck) {
2519  KMP_FATAL(LockIsUninitialized, func);
2520  }
2521  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2522  KMP_FATAL(LockSimpleUsedAsNestable, func);
2523  }
2524  __kmp_acquire_nested_drdpa_lock(lck, gtid);
2525 }
2526 
2527 int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2528  int retval;
2529 
2530  KMP_DEBUG_ASSERT(gtid >= 0);
2531 
2532  if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2533  retval = ++lck->lk.depth_locked;
2534  } else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2535  retval = 0;
2536  } else {
2537  KMP_MB();
2538  retval = lck->lk.depth_locked = 1;
2539  KMP_MB();
2540  lck->lk.owner_id = gtid + 1;
2541  }
2542  return retval;
2543 }
2544 
2545 static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2546  kmp_int32 gtid) {
2547  char const *const func = "omp_test_nest_lock";
2548  if (lck->lk.initialized != lck) {
2549  KMP_FATAL(LockIsUninitialized, func);
2550  }
2551  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2552  KMP_FATAL(LockSimpleUsedAsNestable, func);
2553  }
2554  return __kmp_test_nested_drdpa_lock(lck, gtid);
2555 }
2556 
2557 int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2558  KMP_DEBUG_ASSERT(gtid >= 0);
2559 
2560  KMP_MB();
2561  if (--(lck->lk.depth_locked) == 0) {
2562  KMP_MB();
2563  lck->lk.owner_id = 0;
2564  __kmp_release_drdpa_lock(lck, gtid);
2565  return KMP_LOCK_RELEASED;
2566  }
2567  return KMP_LOCK_STILL_HELD;
2568 }
2569 
2570 static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2571  kmp_int32 gtid) {
2572  char const *const func = "omp_unset_nest_lock";
2573  KMP_MB(); /* in case another processor initialized lock */
2574  if (lck->lk.initialized != lck) {
2575  KMP_FATAL(LockIsUninitialized, func);
2576  }
2577  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2578  KMP_FATAL(LockSimpleUsedAsNestable, func);
2579  }
2580  if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2581  KMP_FATAL(LockUnsettingFree, func);
2582  }
2583  if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2584  KMP_FATAL(LockUnsettingSetByAnother, func);
2585  }
2586  return __kmp_release_nested_drdpa_lock(lck, gtid);
2587 }
2588 
2589 void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2590  __kmp_init_drdpa_lock(lck);
2591  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2592 }
2593 
2594 void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2595  __kmp_destroy_drdpa_lock(lck);
2596  lck->lk.depth_locked = 0;
2597 }
2598 
2599 static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2600  char const *const func = "omp_destroy_nest_lock";
2601  if (lck->lk.initialized != lck) {
2602  KMP_FATAL(LockIsUninitialized, func);
2603  }
2604  if (!__kmp_is_drdpa_lock_nestable(lck)) {
2605  KMP_FATAL(LockSimpleUsedAsNestable, func);
2606  }
2607  if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2608  KMP_FATAL(LockStillOwned, func);
2609  }
2610  __kmp_destroy_nested_drdpa_lock(lck);
2611 }
2612 
2613 // access functions to fields which don't exist for all lock kinds.
2614 
2615 static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2616  return lck->lk.location;
2617 }
2618 
2619 static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2620  const ident_t *loc) {
2621  lck->lk.location = loc;
2622 }
2623 
2624 static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2625  return lck->lk.flags;
2626 }
2627 
2628 static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2629  kmp_lock_flags_t flags) {
2630  lck->lk.flags = flags;
2631 }
2632 
2633 // Time stamp counter
2634 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2635 #define __kmp_tsc() __kmp_hardware_timestamp()
2636 // Runtime's default backoff parameters
2637 kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2638 #else
2639 // Use nanoseconds for other platforms
2640 extern kmp_uint64 __kmp_now_nsec();
2641 kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2642 #define __kmp_tsc() __kmp_now_nsec()
2643 #endif
2644 
2645 // A useful predicate for dealing with timestamps that may wrap.
2646 // Is a before b? Since the timestamps may wrap, this is asking whether it's
2647 // shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2648 // Times where going clockwise is less distance than going anti-clockwise
2649 // are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2650 // then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2651 // signed(b) = 0 captures the actual difference
2652 static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2653  return ((kmp_int64)b - (kmp_int64)a) > 0;
2654 }
2655 
2656 // Truncated binary exponential backoff function
2657 void __kmp_spin_backoff(kmp_backoff_t *boff) {
2658  // We could flatten this loop, but making it a nested loop gives better result
2659  kmp_uint32 i;
2660  for (i = boff->step; i > 0; i--) {
2661  kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2662  do {
2663  KMP_CPU_PAUSE();
2664  } while (before(__kmp_tsc(), goal));
2665  }
2666  boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2667 }
2668 
2669 #if KMP_USE_DYNAMIC_LOCK
2670 
2671 // Direct lock initializers. It simply writes a tag to the low 8 bits of the
2672 // lock word.
2673 static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2674  kmp_dyna_lockseq_t seq) {
2675  TCW_4(*lck, KMP_GET_D_TAG(seq));
2676  KA_TRACE(
2677  20,
2678  ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2679 }
2680 
2681 #if KMP_USE_TSX
2682 
2683 // HLE lock functions - imported from the testbed runtime.
2684 #define HLE_ACQUIRE ".byte 0xf2;"
2685 #define HLE_RELEASE ".byte 0xf3;"
2686 
2687 static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2688  __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2689  return v;
2690 }
2691 
2692 static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2693 
2694 static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2695  TCW_4(*lck, 0);
2696 }
2697 
2698 static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2699  // Use gtid for KMP_LOCK_BUSY if necessary
2700  if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2701  int delay = 1;
2702  do {
2703  while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2704  for (int i = delay; i != 0; --i)
2705  KMP_CPU_PAUSE();
2706  delay = ((delay << 1) | 1) & 7;
2707  }
2708  } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2709  }
2710 }
2711 
2712 static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2713  kmp_int32 gtid) {
2714  __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2715 }
2716 
2717 static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2718  __asm__ volatile(HLE_RELEASE "movl %1,%0"
2719  : "=m"(*lck)
2720  : "r"(KMP_LOCK_FREE(hle))
2721  : "memory");
2722  return KMP_LOCK_RELEASED;
2723 }
2724 
2725 static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2726  kmp_int32 gtid) {
2727  return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2728 }
2729 
2730 static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2731  return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2732 }
2733 
2734 static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2735  kmp_int32 gtid) {
2736  return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2737 }
2738 
2739 static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2740  __kmp_init_queuing_lock(lck);
2741 }
2742 
2743 static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2744  __kmp_destroy_queuing_lock(lck);
2745 }
2746 
2747 static void
2748 __kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2749  __kmp_destroy_queuing_lock_with_checks(lck);
2750 }
2751 
2752 KMP_ATTRIBUTE_TARGET_RTM
2753 static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2754  kmp_int32 gtid) {
2755  unsigned retries = 3, status;
2756  do {
2757  status = _xbegin();
2758  if (status == _XBEGIN_STARTED) {
2759  if (__kmp_is_unlocked_queuing_lock(lck))
2760  return;
2761  _xabort(0xff);
2762  }
2763  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2764  // Wait until lock becomes free
2765  while (!__kmp_is_unlocked_queuing_lock(lck)) {
2766  KMP_YIELD(TRUE);
2767  }
2768  } else if (!(status & _XABORT_RETRY))
2769  break;
2770  } while (retries--);
2771 
2772  // Fall-back non-speculative lock (xchg)
2773  __kmp_acquire_queuing_lock(lck, gtid);
2774 }
2775 
2776 static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2777  kmp_int32 gtid) {
2778  __kmp_acquire_rtm_queuing_lock(lck, gtid);
2779 }
2780 
2781 KMP_ATTRIBUTE_TARGET_RTM
2782 static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2783  kmp_int32 gtid) {
2784  if (__kmp_is_unlocked_queuing_lock(lck)) {
2785  // Releasing from speculation
2786  _xend();
2787  } else {
2788  // Releasing from a real lock
2789  __kmp_release_queuing_lock(lck, gtid);
2790  }
2791  return KMP_LOCK_RELEASED;
2792 }
2793 
2794 static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2795  kmp_int32 gtid) {
2796  return __kmp_release_rtm_queuing_lock(lck, gtid);
2797 }
2798 
2799 KMP_ATTRIBUTE_TARGET_RTM
2800 static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2801  kmp_int32 gtid) {
2802  unsigned retries = 3, status;
2803  do {
2804  status = _xbegin();
2805  if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2806  return 1;
2807  }
2808  if (!(status & _XABORT_RETRY))
2809  break;
2810  } while (retries--);
2811 
2812  return __kmp_test_queuing_lock(lck, gtid);
2813 }
2814 
2815 static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2816  kmp_int32 gtid) {
2817  return __kmp_test_rtm_queuing_lock(lck, gtid);
2818 }
2819 
2820 // Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2821 typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2822 
2823 static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2824  KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2825 }
2826 
2827 static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2828  __kmp_destroy_rtm_spin_lock(lck);
2829 }
2830 
2831 KMP_ATTRIBUTE_TARGET_RTM
2832 static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2833  kmp_int32 gtid) {
2834  unsigned retries = 3, status;
2835  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2836  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2837  do {
2838  status = _xbegin();
2839  if (status == _XBEGIN_STARTED) {
2840  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2841  return KMP_LOCK_ACQUIRED_FIRST;
2842  _xabort(0xff);
2843  }
2844  if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2845  // Wait until lock becomes free
2846  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2847  KMP_YIELD(TRUE);
2848  }
2849  } else if (!(status & _XABORT_RETRY))
2850  break;
2851  } while (retries--);
2852 
2853  // Fall-back spin lock
2854  KMP_FSYNC_PREPARE(lck);
2855  kmp_backoff_t backoff = __kmp_spin_backoff_params;
2856  while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2857  !__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2858  __kmp_spin_backoff(&backoff);
2859  }
2860  KMP_FSYNC_ACQUIRED(lck);
2861  return KMP_LOCK_ACQUIRED_FIRST;
2862 }
2863 
2864 static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2865  kmp_int32 gtid) {
2866  return __kmp_acquire_rtm_spin_lock(lck, gtid);
2867 }
2868 
2869 KMP_ATTRIBUTE_TARGET_RTM
2870 static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2871  kmp_int32 gtid) {
2872  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2873  // Releasing from speculation
2874  _xend();
2875  } else {
2876  // Releasing from a real lock
2877  KMP_FSYNC_RELEASING(lck);
2878  KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2879  }
2880  return KMP_LOCK_RELEASED;
2881 }
2882 
2883 static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2884  kmp_int32 gtid) {
2885  return __kmp_release_rtm_spin_lock(lck, gtid);
2886 }
2887 
2888 KMP_ATTRIBUTE_TARGET_RTM
2889 static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2890  unsigned retries = 3, status;
2891  kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2892  kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2893  do {
2894  status = _xbegin();
2895  if (status == _XBEGIN_STARTED &&
2896  KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2897  return TRUE;
2898  }
2899  if (!(status & _XABORT_RETRY))
2900  break;
2901  } while (retries--);
2902 
2903  if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2904  __kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2905  KMP_FSYNC_ACQUIRED(lck);
2906  return TRUE;
2907  }
2908  return FALSE;
2909 }
2910 
2911 static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2912  kmp_int32 gtid) {
2913  return __kmp_test_rtm_spin_lock(lck, gtid);
2914 }
2915 
2916 #endif // KMP_USE_TSX
2917 
2918 // Entry functions for indirect locks (first element of direct lock jump tables)
2919 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2920  kmp_dyna_lockseq_t tag);
2921 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2922 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2923 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2924 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2925 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2926  kmp_int32);
2927 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2928  kmp_int32);
2929 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2930  kmp_int32);
2931 
2932 // Lock function definitions for the union parameter type
2933 #define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2934 
2935 #define expand1(lk, op) \
2936  static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2937  __kmp_##op##_##lk##_##lock(&lock->lk); \
2938  }
2939 #define expand2(lk, op) \
2940  static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2941  kmp_int32 gtid) { \
2942  return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2943  }
2944 #define expand3(lk, op) \
2945  static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2946  kmp_lock_flags_t flags) { \
2947  __kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2948  }
2949 #define expand4(lk, op) \
2950  static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2951  const ident_t *loc) { \
2952  __kmp_set_##lk##_lock_location(&lock->lk, loc); \
2953  }
2954 
2955 KMP_FOREACH_LOCK_KIND(expand1, init)
2956 KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2957 KMP_FOREACH_LOCK_KIND(expand1, destroy)
2958 KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2959 KMP_FOREACH_LOCK_KIND(expand2, acquire)
2960 KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2961 KMP_FOREACH_LOCK_KIND(expand2, release)
2962 KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2963 KMP_FOREACH_LOCK_KIND(expand2, test)
2964 KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2965 KMP_FOREACH_LOCK_KIND(expand3, )
2966 KMP_FOREACH_LOCK_KIND(expand4, )
2967 
2968 #undef expand1
2969 #undef expand2
2970 #undef expand3
2971 #undef expand4
2972 
2973 // Jump tables for the indirect lock functions
2974 // Only fill in the odd entries, that avoids the need to shift out the low bit
2975 
2976 // init functions
2977 #define expand(l, op) 0, __kmp_init_direct_lock,
2978 void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2979  __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2980 #undef expand
2981 
2982 // destroy functions
2983 #define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
2984 static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
2985  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2986 #undef expand
2987 #define expand(l, op) \
2988  0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
2989 static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
2990  __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
2991 #undef expand
2992 
2993 // set/acquire functions
2994 #define expand(l, op) \
2995  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
2996 static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
2997  __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
2998 #undef expand
2999 #define expand(l, op) \
3000  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3001 static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3002  __kmp_set_indirect_lock_with_checks, 0,
3003  KMP_FOREACH_D_LOCK(expand, acquire)};
3004 #undef expand
3005 
3006 // unset/release and test functions
3007 #define expand(l, op) \
3008  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3009 static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3010  __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3011 static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3012  __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3013 #undef expand
3014 #define expand(l, op) \
3015  0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3016 static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3017  __kmp_unset_indirect_lock_with_checks, 0,
3018  KMP_FOREACH_D_LOCK(expand, release)};
3019 static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3020  __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3021 #undef expand
3022 
3023 // Exposes only one set of jump tables (*lock or *lock_with_checks).
3024 void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3025 int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3026 int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3027 int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3028 
3029 // Jump tables for the indirect lock functions
3030 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3031 void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3032  KMP_FOREACH_I_LOCK(expand, init)};
3033 #undef expand
3034 
3035 #define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3036 static void (*indirect_destroy[])(kmp_user_lock_p) = {
3037  KMP_FOREACH_I_LOCK(expand, destroy)};
3038 #undef expand
3039 #define expand(l, op) \
3040  (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3041 static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3042  KMP_FOREACH_I_LOCK(expand, destroy)};
3043 #undef expand
3044 
3045 // set/acquire functions
3046 #define expand(l, op) \
3047  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3048 static int (*indirect_set[])(kmp_user_lock_p,
3049  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3050 #undef expand
3051 #define expand(l, op) \
3052  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3053 static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3054  KMP_FOREACH_I_LOCK(expand, acquire)};
3055 #undef expand
3056 
3057 // unset/release and test functions
3058 #define expand(l, op) \
3059  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3060 static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3061  KMP_FOREACH_I_LOCK(expand, release)};
3062 static int (*indirect_test[])(kmp_user_lock_p,
3063  kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3064 #undef expand
3065 #define expand(l, op) \
3066  (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3067 static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3068  KMP_FOREACH_I_LOCK(expand, release)};
3069 static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3070  KMP_FOREACH_I_LOCK(expand, test)};
3071 #undef expand
3072 
3073 // Exposes only one jump tables (*lock or *lock_with_checks).
3074 void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3075 int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3076 int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3077 int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3078 
3079 // Lock index table.
3080 kmp_indirect_lock_table_t __kmp_i_lock_table;
3081 
3082 // Size of indirect locks.
3083 static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3084 
3085 // Jump tables for lock accessor/modifier.
3086 void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3087  const ident_t *) = {0};
3088 void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3089  kmp_lock_flags_t) = {0};
3090 const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3091  kmp_user_lock_p) = {0};
3092 kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3093  kmp_user_lock_p) = {0};
3094 
3095 // Use different lock pools for different lock types.
3096 static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3097 
3098 // User lock allocator for dynamically dispatched indirect locks. Every entry of
3099 // the indirect lock table holds the address and type of the allocated indirect
3100 // lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3101 // full. A destroyed indirect lock object is returned to the reusable pool of
3102 // locks, unique to each lock type.
3103 kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3104  kmp_int32 gtid,
3105  kmp_indirect_locktag_t tag) {
3106  kmp_indirect_lock_t *lck;
3107  kmp_lock_index_t idx, table_idx;
3108 
3109  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3110 
3111  if (__kmp_indirect_lock_pool[tag] != NULL) {
3112  // Reuse the allocated and destroyed lock object
3113  lck = __kmp_indirect_lock_pool[tag];
3114  if (OMP_LOCK_T_SIZE < sizeof(void *))
3115  idx = lck->lock->pool.index;
3116  __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3117  KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3118  lck));
3119  } else {
3120  kmp_uint32 row, col;
3121  kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3122  idx = 0;
3123  // Find location in list of lock tables to put new lock
3124  while (1) {
3125  table_idx = lock_table->next; // index within this table
3126  idx += lock_table->next; // global index within list of tables
3127  if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3128  row = table_idx / KMP_I_LOCK_CHUNK;
3129  col = table_idx % KMP_I_LOCK_CHUNK;
3130  // Allocate a new row of locks if necessary
3131  if (!lock_table->table[row]) {
3132  lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3133  sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3134  }
3135  break;
3136  }
3137  // Allocate a new lock table if necessary with double the capacity
3138  if (!lock_table->next_table) {
3139  kmp_indirect_lock_table_t *next_table =
3140  (kmp_indirect_lock_table_t *)__kmp_allocate(
3141  sizeof(kmp_indirect_lock_table_t));
3142  next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3143  sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3144  next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3145  next_table->next = 0;
3146  next_table->next_table = nullptr;
3147  lock_table->next_table = next_table;
3148  }
3149  lock_table = lock_table->next_table;
3150  KMP_ASSERT(lock_table);
3151  }
3152  lock_table->next++;
3153 
3154  lck = &lock_table->table[row][col];
3155  // Allocate a new base lock object
3156  lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3157  KA_TRACE(20,
3158  ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3159  }
3160 
3161  __kmp_release_lock(&__kmp_global_lock, gtid);
3162 
3163  lck->type = tag;
3164 
3165  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3166  *((kmp_lock_index_t *)user_lock) = idx
3167  << 1; // indirect lock word must be even
3168  } else {
3169  *((kmp_indirect_lock_t **)user_lock) = lck;
3170  }
3171 
3172  return lck;
3173 }
3174 
3175 // User lock lookup for dynamically dispatched locks.
3176 static __forceinline kmp_indirect_lock_t *
3177 __kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3178  if (__kmp_env_consistency_check) {
3179  kmp_indirect_lock_t *lck = NULL;
3180  if (user_lock == NULL) {
3181  KMP_FATAL(LockIsUninitialized, func);
3182  }
3183  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3184  kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3185  lck = __kmp_get_i_lock(idx);
3186  } else {
3187  lck = *((kmp_indirect_lock_t **)user_lock);
3188  }
3189  if (lck == NULL) {
3190  KMP_FATAL(LockIsUninitialized, func);
3191  }
3192  return lck;
3193  } else {
3194  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3195  return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3196  } else {
3197  return *((kmp_indirect_lock_t **)user_lock);
3198  }
3199  }
3200 }
3201 
3202 static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3203  kmp_dyna_lockseq_t seq) {
3204 #if KMP_USE_ADAPTIVE_LOCKS
3205  if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) {
3206  KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3207  seq = lockseq_queuing;
3208  }
3209 #endif
3210 #if KMP_USE_TSX
3211  if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.rtm) {
3212  seq = lockseq_queuing;
3213  }
3214 #endif
3215  kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3216  kmp_indirect_lock_t *l =
3217  __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3218  KMP_I_LOCK_FUNC(l, init)(l->lock);
3219  KA_TRACE(
3220  20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3221  seq));
3222 }
3223 
3224 static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3225  kmp_uint32 gtid = __kmp_entry_gtid();
3226  kmp_indirect_lock_t *l =
3227  __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3228  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3229  kmp_indirect_locktag_t tag = l->type;
3230 
3231  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3232 
3233  // Use the base lock's space to keep the pool chain.
3234  l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3235  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3236  l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3237  }
3238  __kmp_indirect_lock_pool[tag] = l;
3239 
3240  __kmp_release_lock(&__kmp_global_lock, gtid);
3241 }
3242 
3243 static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3244  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3245  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3246 }
3247 
3248 static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3249  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3250  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3251 }
3252 
3253 static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3254  kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3255  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3256 }
3257 
3258 static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3259  kmp_int32 gtid) {
3260  kmp_indirect_lock_t *l =
3261  __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3262  return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3263 }
3264 
3265 static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3266  kmp_int32 gtid) {
3267  kmp_indirect_lock_t *l =
3268  __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3269  return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3270 }
3271 
3272 static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3273  kmp_int32 gtid) {
3274  kmp_indirect_lock_t *l =
3275  __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3276  return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3277 }
3278 
3279 kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3280 
3281 // This is used only in kmp_error.cpp when consistency checking is on.
3282 kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3283  switch (seq) {
3284  case lockseq_tas:
3285  case lockseq_nested_tas:
3286  return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3287 #if KMP_USE_FUTEX
3288  case lockseq_futex:
3289  case lockseq_nested_futex:
3290  return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3291 #endif
3292  case lockseq_ticket:
3293  case lockseq_nested_ticket:
3294  return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3295  case lockseq_queuing:
3296  case lockseq_nested_queuing:
3297 #if KMP_USE_ADAPTIVE_LOCKS
3298  case lockseq_adaptive:
3299 #endif
3300  return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3301  case lockseq_drdpa:
3302  case lockseq_nested_drdpa:
3303  return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3304  default:
3305  return 0;
3306  }
3307 }
3308 
3309 // Initializes data for dynamic user locks.
3310 void __kmp_init_dynamic_user_locks() {
3311  // Initialize jump table for the lock functions
3312  if (__kmp_env_consistency_check) {
3313  __kmp_direct_set = direct_set_check;
3314  __kmp_direct_unset = direct_unset_check;
3315  __kmp_direct_test = direct_test_check;
3316  __kmp_direct_destroy = direct_destroy_check;
3317  __kmp_indirect_set = indirect_set_check;
3318  __kmp_indirect_unset = indirect_unset_check;
3319  __kmp_indirect_test = indirect_test_check;
3320  __kmp_indirect_destroy = indirect_destroy_check;
3321  } else {
3322  __kmp_direct_set = direct_set;
3323  __kmp_direct_unset = direct_unset;
3324  __kmp_direct_test = direct_test;
3325  __kmp_direct_destroy = direct_destroy;
3326  __kmp_indirect_set = indirect_set;
3327  __kmp_indirect_unset = indirect_unset;
3328  __kmp_indirect_test = indirect_test;
3329  __kmp_indirect_destroy = indirect_destroy;
3330  }
3331  // If the user locks have already been initialized, then return. Allow the
3332  // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3333  // new lock tables if they have already been allocated.
3334  if (__kmp_init_user_locks)
3335  return;
3336 
3337  // Initialize lock index table
3338  __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3339  __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3340  sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3341  *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3342  KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3343  __kmp_i_lock_table.next = 0;
3344  __kmp_i_lock_table.next_table = nullptr;
3345 
3346  // Indirect lock size
3347  __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3348  __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3349 #if KMP_USE_ADAPTIVE_LOCKS
3350  __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3351 #endif
3352  __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3353 #if KMP_USE_TSX
3354  __kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3355 #endif
3356  __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3357 #if KMP_USE_FUTEX
3358  __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3359 #endif
3360  __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3361  __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3362  __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3363 
3364 // Initialize lock accessor/modifier
3365 #define fill_jumps(table, expand, sep) \
3366  { \
3367  table[locktag##sep##ticket] = expand(ticket); \
3368  table[locktag##sep##queuing] = expand(queuing); \
3369  table[locktag##sep##drdpa] = expand(drdpa); \
3370  }
3371 
3372 #if KMP_USE_ADAPTIVE_LOCKS
3373 #define fill_table(table, expand) \
3374  { \
3375  fill_jumps(table, expand, _); \
3376  table[locktag_adaptive] = expand(queuing); \
3377  fill_jumps(table, expand, _nested_); \
3378  }
3379 #else
3380 #define fill_table(table, expand) \
3381  { \
3382  fill_jumps(table, expand, _); \
3383  fill_jumps(table, expand, _nested_); \
3384  }
3385 #endif // KMP_USE_ADAPTIVE_LOCKS
3386 
3387 #define expand(l) \
3388  (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3389  fill_table(__kmp_indirect_set_location, expand);
3390 #undef expand
3391 #define expand(l) \
3392  (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3393  fill_table(__kmp_indirect_set_flags, expand);
3394 #undef expand
3395 #define expand(l) \
3396  (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3397  fill_table(__kmp_indirect_get_location, expand);
3398 #undef expand
3399 #define expand(l) \
3400  (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3401  fill_table(__kmp_indirect_get_flags, expand);
3402 #undef expand
3403 
3404  __kmp_init_user_locks = TRUE;
3405 }
3406 
3407 // Clean up the lock table.
3408 void __kmp_cleanup_indirect_user_locks() {
3409  int k;
3410 
3411  // Clean up locks in the pools first (they were already destroyed before going
3412  // into the pools).
3413  for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3414  kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3415  while (l != NULL) {
3416  kmp_indirect_lock_t *ll = l;
3417  l = (kmp_indirect_lock_t *)l->lock->pool.next;
3418  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3419  ll));
3420  __kmp_free(ll->lock);
3421  ll->lock = NULL;
3422  }
3423  __kmp_indirect_lock_pool[k] = NULL;
3424  }
3425  // Clean up the remaining undestroyed locks.
3426  kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3427  while (ptr) {
3428  for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3429  if (!ptr->table[row])
3430  continue;
3431  for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3432  kmp_indirect_lock_t *l = &ptr->table[row][col];
3433  if (l->lock) {
3434  // Locks not destroyed explicitly need to be destroyed here.
3435  KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3436  KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3437  "from table\n",
3438  l));
3439  __kmp_free(l->lock);
3440  }
3441  }
3442  __kmp_free(ptr->table[row]);
3443  }
3444  kmp_indirect_lock_table_t *next_table = ptr->next_table;
3445  if (ptr != &__kmp_i_lock_table)
3446  __kmp_free(ptr);
3447  ptr = next_table;
3448  }
3449 
3450  __kmp_init_user_locks = FALSE;
3451 }
3452 
3453 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3454 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3455 
3456 #else // KMP_USE_DYNAMIC_LOCK
3457 
3458 static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3459  __kmp_init_tas_lock(lck);
3460 }
3461 
3462 static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3463  __kmp_init_nested_tas_lock(lck);
3464 }
3465 
3466 #if KMP_USE_FUTEX
3467 static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3468  __kmp_init_futex_lock(lck);
3469 }
3470 
3471 static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3472  __kmp_init_nested_futex_lock(lck);
3473 }
3474 #endif
3475 
3476 static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3477  return lck == lck->lk.self;
3478 }
3479 
3480 static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3481  __kmp_init_ticket_lock(lck);
3482 }
3483 
3484 static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3485  __kmp_init_nested_ticket_lock(lck);
3486 }
3487 
3488 static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3489  return lck == lck->lk.initialized;
3490 }
3491 
3492 static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3493  __kmp_init_queuing_lock(lck);
3494 }
3495 
3496 static void
3497 __kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3498  __kmp_init_nested_queuing_lock(lck);
3499 }
3500 
3501 #if KMP_USE_ADAPTIVE_LOCKS
3502 static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3503  __kmp_init_adaptive_lock(lck);
3504 }
3505 #endif
3506 
3507 static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3508  return lck == lck->lk.initialized;
3509 }
3510 
3511 static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3512  __kmp_init_drdpa_lock(lck);
3513 }
3514 
3515 static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3516  __kmp_init_nested_drdpa_lock(lck);
3517 }
3518 
3519 /* user locks
3520  * They are implemented as a table of function pointers which are set to the
3521  * lock functions of the appropriate kind, once that has been determined. */
3522 
3523 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3524 
3525 size_t __kmp_base_user_lock_size = 0;
3526 size_t __kmp_user_lock_size = 0;
3527 
3528 kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3529 int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3530  kmp_int32 gtid) = NULL;
3531 
3532 int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3533  kmp_int32 gtid) = NULL;
3534 int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3535  kmp_int32 gtid) = NULL;
3536 void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3537 void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3538 void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3539 int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3540  kmp_int32 gtid) = NULL;
3541 
3542 int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3543  kmp_int32 gtid) = NULL;
3544 int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3545  kmp_int32 gtid) = NULL;
3546 void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3547 void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3548 
3549 int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3550 const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3551 void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3552  const ident_t *loc) = NULL;
3553 kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3554 void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3555  kmp_lock_flags_t flags) = NULL;
3556 
3557 void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3558  switch (user_lock_kind) {
3559  case lk_default:
3560  default:
3561  KMP_ASSERT(0);
3562 
3563  case lk_tas: {
3564  __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3565  __kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3566 
3567  __kmp_get_user_lock_owner_ =
3568  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3569 
3570  if (__kmp_env_consistency_check) {
3571  KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3572  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3573  } else {
3574  KMP_BIND_USER_LOCK(tas);
3575  KMP_BIND_NESTED_USER_LOCK(tas);
3576  }
3577 
3578  __kmp_destroy_user_lock_ =
3579  (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3580 
3581  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3582 
3583  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3584 
3585  __kmp_set_user_lock_location_ =
3586  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3587 
3588  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3589 
3590  __kmp_set_user_lock_flags_ =
3591  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3592  } break;
3593 
3594 #if KMP_USE_FUTEX
3595 
3596  case lk_futex: {
3597  __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3598  __kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3599 
3600  __kmp_get_user_lock_owner_ =
3601  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3602 
3603  if (__kmp_env_consistency_check) {
3604  KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3605  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3606  } else {
3607  KMP_BIND_USER_LOCK(futex);
3608  KMP_BIND_NESTED_USER_LOCK(futex);
3609  }
3610 
3611  __kmp_destroy_user_lock_ =
3612  (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3613 
3614  __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3615 
3616  __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3617 
3618  __kmp_set_user_lock_location_ =
3619  (void (*)(kmp_user_lock_p, const ident_t *))NULL;
3620 
3621  __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3622 
3623  __kmp_set_user_lock_flags_ =
3624  (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3625  } break;
3626 
3627 #endif // KMP_USE_FUTEX
3628 
3629  case lk_ticket: {
3630  __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3631  __kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3632 
3633  __kmp_get_user_lock_owner_ =
3634  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3635 
3636  if (__kmp_env_consistency_check) {
3637  KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3638  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3639  } else {
3640  KMP_BIND_USER_LOCK(ticket);
3641  KMP_BIND_NESTED_USER_LOCK(ticket);
3642  }
3643 
3644  __kmp_destroy_user_lock_ =
3645  (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3646 
3647  __kmp_is_user_lock_initialized_ =
3648  (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3649 
3650  __kmp_get_user_lock_location_ =
3651  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3652 
3653  __kmp_set_user_lock_location_ = (void (*)(
3654  kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3655 
3656  __kmp_get_user_lock_flags_ =
3657  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3658 
3659  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3660  &__kmp_set_ticket_lock_flags);
3661  } break;
3662 
3663  case lk_queuing: {
3664  __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3665  __kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3666 
3667  __kmp_get_user_lock_owner_ =
3668  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3669 
3670  if (__kmp_env_consistency_check) {
3671  KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3672  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3673  } else {
3674  KMP_BIND_USER_LOCK(queuing);
3675  KMP_BIND_NESTED_USER_LOCK(queuing);
3676  }
3677 
3678  __kmp_destroy_user_lock_ =
3679  (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3680 
3681  __kmp_is_user_lock_initialized_ =
3682  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3683 
3684  __kmp_get_user_lock_location_ =
3685  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3686 
3687  __kmp_set_user_lock_location_ = (void (*)(
3688  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3689 
3690  __kmp_get_user_lock_flags_ =
3691  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3692 
3693  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3694  &__kmp_set_queuing_lock_flags);
3695  } break;
3696 
3697 #if KMP_USE_ADAPTIVE_LOCKS
3698  case lk_adaptive: {
3699  __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3700  __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3701 
3702  __kmp_get_user_lock_owner_ =
3703  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3704 
3705  if (__kmp_env_consistency_check) {
3706  KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3707  } else {
3708  KMP_BIND_USER_LOCK(adaptive);
3709  }
3710 
3711  __kmp_destroy_user_lock_ =
3712  (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3713 
3714  __kmp_is_user_lock_initialized_ =
3715  (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3716 
3717  __kmp_get_user_lock_location_ =
3718  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3719 
3720  __kmp_set_user_lock_location_ = (void (*)(
3721  kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3722 
3723  __kmp_get_user_lock_flags_ =
3724  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3725 
3726  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3727  &__kmp_set_queuing_lock_flags);
3728 
3729  } break;
3730 #endif // KMP_USE_ADAPTIVE_LOCKS
3731 
3732  case lk_drdpa: {
3733  __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3734  __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3735 
3736  __kmp_get_user_lock_owner_ =
3737  (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3738 
3739  if (__kmp_env_consistency_check) {
3740  KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3741  KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3742  } else {
3743  KMP_BIND_USER_LOCK(drdpa);
3744  KMP_BIND_NESTED_USER_LOCK(drdpa);
3745  }
3746 
3747  __kmp_destroy_user_lock_ =
3748  (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3749 
3750  __kmp_is_user_lock_initialized_ =
3751  (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3752 
3753  __kmp_get_user_lock_location_ =
3754  (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3755 
3756  __kmp_set_user_lock_location_ = (void (*)(
3757  kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3758 
3759  __kmp_get_user_lock_flags_ =
3760  (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3761 
3762  __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3763  &__kmp_set_drdpa_lock_flags);
3764  } break;
3765  }
3766 }
3767 
3768 // ----------------------------------------------------------------------------
3769 // User lock table & lock allocation
3770 
3771 kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3772 kmp_user_lock_p __kmp_lock_pool = NULL;
3773 
3774 // Lock block-allocation support.
3775 kmp_block_of_locks *__kmp_lock_blocks = NULL;
3776 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3777 
3778 static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3779  // Assume that kmp_global_lock is held upon entry/exit.
3780  kmp_lock_index_t index;
3781  if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3782  kmp_lock_index_t size;
3783  kmp_user_lock_p *table;
3784  // Reallocate lock table.
3785  if (__kmp_user_lock_table.allocated == 0) {
3786  size = 1024;
3787  } else {
3788  size = __kmp_user_lock_table.allocated * 2;
3789  }
3790  table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3791  KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3792  sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3793  table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3794  // We cannot free the previous table now, since it may be in use by other
3795  // threads. So save the pointer to the previous table in in the first
3796  // element of the new table. All the tables will be organized into a list,
3797  // and could be freed when library shutting down.
3798  __kmp_user_lock_table.table = table;
3799  __kmp_user_lock_table.allocated = size;
3800  }
3801  KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3802  __kmp_user_lock_table.allocated);
3803  index = __kmp_user_lock_table.used;
3804  __kmp_user_lock_table.table[index] = lck;
3805  ++__kmp_user_lock_table.used;
3806  return index;
3807 }
3808 
3809 static kmp_user_lock_p __kmp_lock_block_allocate() {
3810  // Assume that kmp_global_lock is held upon entry/exit.
3811  static int last_index = 0;
3812  if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3813  // Restart the index.
3814  last_index = 0;
3815  // Need to allocate a new block.
3816  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3817  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3818  char *buffer =
3819  (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3820  // Set up the new block.
3821  kmp_block_of_locks *new_block =
3822  (kmp_block_of_locks *)(&buffer[space_for_locks]);
3823  new_block->next_block = __kmp_lock_blocks;
3824  new_block->locks = (void *)buffer;
3825  // Publish the new block.
3826  KMP_MB();
3827  __kmp_lock_blocks = new_block;
3828  }
3829  kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3830  ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3831  last_index++;
3832  return ret;
3833 }
3834 
3835 // Get memory for a lock. It may be freshly allocated memory or reused memory
3836 // from lock pool.
3837 kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3838  kmp_lock_flags_t flags) {
3839  kmp_user_lock_p lck;
3840  kmp_lock_index_t index;
3841  KMP_DEBUG_ASSERT(user_lock);
3842 
3843  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3844 
3845  if (__kmp_lock_pool == NULL) {
3846  // Lock pool is empty. Allocate new memory.
3847 
3848  if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3849  lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3850  } else {
3851  lck = __kmp_lock_block_allocate();
3852  }
3853 
3854  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3855  // and debugger has info on all allocated locks.
3856  index = __kmp_lock_table_insert(lck);
3857  } else {
3858  // Pick up lock from pool.
3859  lck = __kmp_lock_pool;
3860  index = __kmp_lock_pool->pool.index;
3861  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3862  }
3863 
3864  // We could potentially differentiate between nested and regular locks
3865  // here, and do the lock table lookup for regular locks only.
3866  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3867  *((kmp_lock_index_t *)user_lock) = index;
3868  } else {
3869  *((kmp_user_lock_p *)user_lock) = lck;
3870  }
3871 
3872  // mark the lock if it is critical section lock.
3873  __kmp_set_user_lock_flags(lck, flags);
3874 
3875  __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3876 
3877  return lck;
3878 }
3879 
3880 // Put lock's memory to pool for reusing.
3881 void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3882  kmp_user_lock_p lck) {
3883  KMP_DEBUG_ASSERT(user_lock != NULL);
3884  KMP_DEBUG_ASSERT(lck != NULL);
3885 
3886  __kmp_acquire_lock(&__kmp_global_lock, gtid);
3887 
3888  lck->pool.next = __kmp_lock_pool;
3889  __kmp_lock_pool = lck;
3890  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3891  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3892  KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3893  lck->pool.index = index;
3894  }
3895 
3896  __kmp_release_lock(&__kmp_global_lock, gtid);
3897 }
3898 
3899 kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3900  kmp_user_lock_p lck = NULL;
3901 
3902  if (__kmp_env_consistency_check) {
3903  if (user_lock == NULL) {
3904  KMP_FATAL(LockIsUninitialized, func);
3905  }
3906  }
3907 
3908  if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3909  kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3910  if (__kmp_env_consistency_check) {
3911  if (!(0 < index && index < __kmp_user_lock_table.used)) {
3912  KMP_FATAL(LockIsUninitialized, func);
3913  }
3914  }
3915  KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3916  KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3917  lck = __kmp_user_lock_table.table[index];
3918  } else {
3919  lck = *((kmp_user_lock_p *)user_lock);
3920  }
3921 
3922  if (__kmp_env_consistency_check) {
3923  if (lck == NULL) {
3924  KMP_FATAL(LockIsUninitialized, func);
3925  }
3926  }
3927 
3928  return lck;
3929 }
3930 
3931 void __kmp_cleanup_user_locks(void) {
3932  // Reset lock pool. Don't worry about lock in the pool--we will free them when
3933  // iterating through lock table (it includes all the locks, dead or alive).
3934  __kmp_lock_pool = NULL;
3935 
3936 #define IS_CRITICAL(lck) \
3937  ((__kmp_get_user_lock_flags_ != NULL) && \
3938  ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3939 
3940  // Loop through lock table, free all locks.
3941  // Do not free item [0], it is reserved for lock tables list.
3942  //
3943  // FIXME - we are iterating through a list of (pointers to) objects of type
3944  // union kmp_user_lock, but we have no way of knowing whether the base type is
3945  // currently "pool" or whatever the global user lock type is.
3946  //
3947  // We are relying on the fact that for all of the user lock types
3948  // (except "tas"), the first field in the lock struct is the "initialized"
3949  // field, which is set to the address of the lock object itself when
3950  // the lock is initialized. When the union is of type "pool", the
3951  // first field is a pointer to the next object in the free list, which
3952  // will not be the same address as the object itself.
3953  //
3954  // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3955  // for "pool" objects on the free list. This must happen as the "location"
3956  // field of real user locks overlaps the "index" field of "pool" objects.
3957  //
3958  // It would be better to run through the free list, and remove all "pool"
3959  // objects from the lock table before executing this loop. However,
3960  // "pool" objects do not always have their index field set (only on
3961  // lin_32e), and I don't want to search the lock table for the address
3962  // of every "pool" object on the free list.
3963  while (__kmp_user_lock_table.used > 1) {
3964  const ident *loc;
3965 
3966  // reduce __kmp_user_lock_table.used before freeing the lock,
3967  // so that state of locks is consistent
3968  kmp_user_lock_p lck =
3969  __kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3970 
3971  if ((__kmp_is_user_lock_initialized_ != NULL) &&
3972  (*__kmp_is_user_lock_initialized_)(lck)) {
3973  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3974  // it is NOT a critical section (user is not responsible for destroying
3975  // criticals) AND we know source location to report.
3976  if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3977  ((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3978  (loc->psource != NULL)) {
3979  kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3980  KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3981  __kmp_str_loc_free(&str_loc);
3982  }
3983 
3984 #ifdef KMP_DEBUG
3985  if (IS_CRITICAL(lck)) {
3986  KA_TRACE(
3987  20,
3988  ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
3989  lck, *(void **)lck));
3990  } else {
3991  KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
3992  *(void **)lck));
3993  }
3994 #endif // KMP_DEBUG
3995 
3996  // Cleanup internal lock dynamic resources (for drdpa locks particularly).
3997  __kmp_destroy_user_lock(lck);
3998  }
3999 
4000  // Free the lock if block allocation of locks is not used.
4001  if (__kmp_lock_blocks == NULL) {
4002  __kmp_free(lck);
4003  }
4004  }
4005 
4006 #undef IS_CRITICAL
4007 
4008  // delete lock table(s).
4009  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4010  __kmp_user_lock_table.table = NULL;
4011  __kmp_user_lock_table.allocated = 0;
4012 
4013  while (table_ptr != NULL) {
4014  // In the first element we saved the pointer to the previous
4015  // (smaller) lock table.
4016  kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4017  __kmp_free(table_ptr);
4018  table_ptr = next;
4019  }
4020 
4021  // Free buffers allocated for blocks of locks.
4022  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4023  __kmp_lock_blocks = NULL;
4024 
4025  while (block_ptr != NULL) {
4026  kmp_block_of_locks_t *next = block_ptr->next_block;
4027  __kmp_free(block_ptr->locks);
4028  // *block_ptr itself was allocated at the end of the locks vector.
4029  block_ptr = next;
4030  }
4031 
4032  TCW_4(__kmp_init_user_locks, FALSE);
4033 }
4034 
4035 #endif // KMP_USE_DYNAMIC_LOCK
void set_stdout()
Definition: kmp.h:4255
void open(const char *filename, const char *mode, const char *env_var=nullptr)
Definition: kmp.h:4238
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244