LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
107  kmp_info_t *waiting_threads[1];
108  kmp_uint32 num_waiting_threads;
109  std::atomic<bool> *sleepLoc;
110 
111 public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
119  flag_type get_type() { return (flag_type)(t.type); }
120 
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124  KMP_DEBUG_ASSERT(i < num_waiting_threads);
125  return waiting_threads[i];
126  }
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
131  void set_waiter(kmp_info_t *thr) {
132  waiting_threads[0] = thr;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136 };
137 
139 template <typename PtrType, flag_type FlagType, bool Sleepable>
140 class kmp_flag_native : public kmp_flag<FlagType> {
141 protected:
142  volatile PtrType *loc;
143  PtrType checker;
144  typedef flag_traits<FlagType> traits_type;
145 
146 public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150  : kmp_flag<FlagType>(1), loc(p) {
151  this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154  : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  virtual ~kmp_flag_native() {}
158  void *operator new(size_t size) { return __kmp_allocate(size); }
159  void operator delete(void *p) { __kmp_free(p); }
160  volatile PtrType *get() { return loc; }
161  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
162  void set(volatile PtrType *new_loc) { loc = new_loc; }
163  PtrType load() { return *loc; }
164  void store(PtrType val) { *loc = val; }
166  virtual bool done_check() {
167  if (Sleepable && !(this->sleepLoc))
168  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
169  checker;
170  else
171  return traits_type::tcr(*(this->get())) == checker;
172  }
175  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
181  virtual bool notdone_check() {
182  return traits_type::tcr(*(this->get())) != checker;
183  }
187  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188  }
192  PtrType set_sleeping() {
193  if (this->sleepLoc) {
194  this->sleepLoc->store(true);
195  return *(this->get());
196  }
197  return traits_type::test_then_or((volatile PtrType *)this->get(),
198  KMP_BARRIER_SLEEP_STATE);
199  }
203  void unset_sleeping() {
204  if (this->sleepLoc) {
205  this->sleepLoc->store(false);
206  return;
207  }
208  traits_type::test_then_and((volatile PtrType *)this->get(),
209  ~KMP_BARRIER_SLEEP_STATE);
210  }
213  bool is_sleeping_val(PtrType old_loc) {
214  if (this->sleepLoc)
215  return this->sleepLoc->load();
216  return old_loc & KMP_BARRIER_SLEEP_STATE;
217  }
219  bool is_sleeping() {
220  if (this->sleepLoc)
221  return this->sleepLoc->load();
222  return is_sleeping_val(*(this->get()));
223  }
224  bool is_any_sleeping() {
225  if (this->sleepLoc)
226  return this->sleepLoc->load();
227  return is_sleeping_val(*(this->get()));
228  }
229  kmp_uint8 *get_stolen() { return NULL; }
230 };
231 
233 template <typename PtrType, flag_type FlagType, bool Sleepable>
234 class kmp_flag_atomic : public kmp_flag<FlagType> {
235 protected:
236  std::atomic<PtrType> *loc;
237  PtrType checker;
238 public:
239  typedef flag_traits<FlagType> traits_type;
240  typedef PtrType flag_t;
241  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243  : kmp_flag<FlagType>(1), loc(p) {
244  this->waiting_threads[0] = thr;
245  }
246  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247  : kmp_flag<FlagType>(), loc(p), checker(c) {}
248  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
251  std::atomic<PtrType> *get() { return loc; }
253  void *get_void_p() { return RCAST(void *, loc); }
255  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
257  PtrType load() { return loc->load(std::memory_order_acquire); }
259  void store(PtrType val) { loc->store(val, std::memory_order_release); }
261  bool done_check() {
262  if (Sleepable && !(this->sleepLoc))
263  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
264  else
265  return this->load() == checker;
266  }
269  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
275  bool notdone_check() { return this->load() != checker; }
278  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
282  PtrType set_sleeping() {
283  if (this->sleepLoc) {
284  this->sleepLoc->store(true);
285  return *(this->get());
286  }
287  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
288  }
292  void unset_sleeping() {
293  if (this->sleepLoc) {
294  this->sleepLoc->store(false);
295  return;
296  }
297  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
298  }
301  bool is_sleeping_val(PtrType old_loc) {
302  if (this->sleepLoc)
303  return this->sleepLoc->load();
304  return old_loc & KMP_BARRIER_SLEEP_STATE;
305  }
307  bool is_sleeping() {
308  if (this->sleepLoc)
309  return this->sleepLoc->load();
310  return is_sleeping_val(this->load());
311  }
312  bool is_any_sleeping() {
313  if (this->sleepLoc)
314  return this->sleepLoc->load();
315  return is_sleeping_val(this->load());
316  }
317  kmp_uint8 *get_stolen() { return NULL; }
318 };
319 
320 #if OMPT_SUPPORT
321 OMPT_NOINLINE
322 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323  ompt_state_t ompt_state,
324  ompt_data_t *tId) {
325  int ds_tid = this_thr->th.th_info.ds.ds_tid;
326  if (ompt_state == ompt_state_wait_barrier_implicit) {
327  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
328 #if OMPT_OPTIONAL
329  void *codeptr = NULL;
330  if (ompt_enabled.ompt_callback_sync_region_wait) {
331  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
332  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
333  codeptr);
334  }
335  if (ompt_enabled.ompt_callback_sync_region) {
336  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
337  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
338  codeptr);
339  }
340 #endif
341  if (!KMP_MASTER_TID(ds_tid)) {
342  if (ompt_enabled.ompt_callback_implicit_task) {
343  int flags = this_thr->th.ompt_thread_info.parallel_flags;
344  flags = (flags & ompt_parallel_league) ? ompt_task_initial
345  : ompt_task_implicit;
346  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
347  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
348  }
349  // return to idle state
350  this_thr->th.ompt_thread_info.state = ompt_state_idle;
351  } else {
352  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
353  }
354  }
355 }
356 #endif
357 
358 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
359  __kmp_wait_* must make certain that another thread calls __kmp_release
360  to wake it back up to prevent deadlocks!
361 
362  NOTE: We may not belong to a team at this point. */
363 template <class C, bool final_spin, bool Cancellable = false,
364  bool Sleepable = true>
365 static inline bool
366 __kmp_wait_template(kmp_info_t *this_thr,
367  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
368 #if USE_ITT_BUILD && USE_ITT_NOTIFY
369  volatile void *spin = flag->get();
370 #endif
371  kmp_uint32 spins;
372  int th_gtid;
373  int tasks_completed = FALSE;
374 #if !KMP_USE_MONITOR
375  kmp_uint64 poll_count;
376  kmp_uint64 hibernate_goal;
377 #else
378  kmp_uint32 hibernate;
379 #endif
380 
381  KMP_FSYNC_SPIN_INIT(spin, NULL);
382  if (flag->done_check()) {
383  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
384  return false;
385  }
386  th_gtid = this_thr->th.th_info.ds.ds_gtid;
387  if (Cancellable) {
388  kmp_team_t *team = this_thr->th.th_team;
389  if (team && team->t.t_cancel_request == cancel_parallel)
390  return true;
391  }
392 #if KMP_OS_UNIX
393  if (final_spin)
394  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
395 #endif
396  KA_TRACE(20,
397  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
398 #if KMP_STATS_ENABLED
399  stats_state_e thread_state = KMP_GET_THREAD_STATE();
400 #endif
401 
402 /* OMPT Behavior:
403 THIS function is called from
404  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
405  these have join / fork behavior
406 
407  In these cases, we don't change the state or trigger events in THIS
408 function.
409  Events are triggered in the calling code (__kmp_barrier):
410 
411  state := ompt_state_overhead
412  barrier-begin
413  barrier-wait-begin
414  state := ompt_state_wait_barrier
415  call join-barrier-implementation (finally arrive here)
416  {}
417  call fork-barrier-implementation (finally arrive here)
418  {}
419  state := ompt_state_overhead
420  barrier-wait-end
421  barrier-end
422  state := ompt_state_work_parallel
423 
424 
425  __kmp_fork_barrier (after thread creation, before executing implicit task)
426  call fork-barrier-implementation (finally arrive here)
427  {} // worker arrive here with state = ompt_state_idle
428 
429 
430  __kmp_join_barrier (implicit barrier at end of parallel region)
431  state := ompt_state_barrier_implicit
432  barrier-begin
433  barrier-wait-begin
434  call join-barrier-implementation (finally arrive here
435 final_spin=FALSE)
436  {
437  }
438  __kmp_fork_barrier (implicit barrier at end of parallel region)
439  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
440 
441  Worker after task-team is finished:
442  barrier-wait-end
443  barrier-end
444  implicit-task-end
445  idle-begin
446  state := ompt_state_idle
447 
448  Before leaving, if state = ompt_state_idle
449  idle-end
450  state := ompt_state_overhead
451 */
452 #if OMPT_SUPPORT
453  ompt_state_t ompt_entry_state;
454  ompt_data_t *tId;
455  if (ompt_enabled.enabled) {
456  ompt_entry_state = this_thr->th.ompt_thread_info.state;
457  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
458  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
459  ompt_lw_taskteam_t *team = NULL;
460  if (this_thr->th.th_team)
461  team = this_thr->th.th_team->t.ompt_serialized_team_info;
462  if (team) {
463  tId = &(team->ompt_task_info.task_data);
464  } else {
465  tId = OMPT_CUR_TASK_DATA(this_thr);
466  }
467  } else {
468  tId = &(this_thr->th.ompt_thread_info.task_data);
469  }
470  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
471  this_thr->th.th_task_team == NULL)) {
472  // implicit task is done. Either no taskqueue, or task-team finished
473  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
474  }
475  }
476 #endif
477 
478  KMP_INIT_YIELD(spins); // Setup for waiting
479 
480  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
481  __kmp_pause_status == kmp_soft_paused) {
482 #if KMP_USE_MONITOR
483 // The worker threads cannot rely on the team struct existing at this point.
484 // Use the bt values cached in the thread struct instead.
485 #ifdef KMP_ADJUST_BLOCKTIME
486  if (__kmp_pause_status == kmp_soft_paused ||
487  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
488  // Force immediate suspend if not set by user and more threads than
489  // available procs
490  hibernate = 0;
491  else
492  hibernate = this_thr->th.th_team_bt_intervals;
493 #else
494  hibernate = this_thr->th.th_team_bt_intervals;
495 #endif /* KMP_ADJUST_BLOCKTIME */
496 
497  /* If the blocktime is nonzero, we want to make sure that we spin wait for
498  the entirety of the specified #intervals, plus up to one interval more.
499  This increment make certain that this thread doesn't go to sleep too
500  soon. */
501  if (hibernate != 0)
502  hibernate++;
503 
504  // Add in the current time value.
505  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
506  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
507  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
508  hibernate - __kmp_global.g.g_time.dt.t_value));
509 #else
510  if (__kmp_pause_status == kmp_soft_paused) {
511  // Force immediate suspend
512  hibernate_goal = KMP_NOW();
513  } else
514  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
515  poll_count = 0;
516  (void)poll_count;
517 #endif // KMP_USE_MONITOR
518  }
519 
520  KMP_MB();
521 
522  // Main wait spin loop
523  while (flag->notdone_check()) {
524  kmp_task_team_t *task_team = NULL;
525  if (__kmp_tasking_mode != tskm_immediate_exec) {
526  task_team = this_thr->th.th_task_team;
527  /* If the thread's task team pointer is NULL, it means one of 3 things:
528  1) A newly-created thread is first being released by
529  __kmp_fork_barrier(), and its task team has not been set up yet.
530  2) All tasks have been executed to completion.
531  3) Tasking is off for this region. This could be because we are in a
532  serialized region (perhaps the outer one), or else tasking was manually
533  disabled (KMP_TASKING=0). */
534  if (task_team != NULL) {
535  if (TCR_SYNC_4(task_team->tt.tt_active)) {
536  if (KMP_TASKING_ENABLED(task_team)) {
537  flag->execute_tasks(
538  this_thr, th_gtid, final_spin,
539  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
540  } else
541  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
542  } else {
543  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
544 #if OMPT_SUPPORT
545  // task-team is done now, other cases should be catched above
546  if (final_spin && ompt_enabled.enabled)
547  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
548 #endif
549  this_thr->th.th_task_team = NULL;
550  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
551  }
552  } else {
553  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
554  } // if
555  } // if
556 
557  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
558  if (TCR_4(__kmp_global.g.g_done)) {
559  if (__kmp_global.g.g_abort)
560  __kmp_abort_thread();
561  break;
562  }
563 
564  // If we are oversubscribed, or have waited a bit (and
565  // KMP_LIBRARY=throughput), then yield
566  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
567 
568 #if KMP_STATS_ENABLED
569  // Check if thread has been signalled to idle state
570  // This indicates that the logical "join-barrier" has finished
571  if (this_thr->th.th_stats->isIdle() &&
572  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
573  KMP_SET_THREAD_STATE(IDLE);
574  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
575  }
576 #endif
577  // Check if the barrier surrounding this wait loop has been cancelled
578  if (Cancellable) {
579  kmp_team_t *team = this_thr->th.th_team;
580  if (team && team->t.t_cancel_request == cancel_parallel)
581  break;
582  }
583 
584  // For hidden helper thread, if task_team is nullptr, it means the main
585  // thread has not released the barrier. We cannot wait here because once the
586  // main thread releases all children barriers, all hidden helper threads are
587  // still sleeping. This leads to a problem that following configuration,
588  // such as task team sync, will not be performed such that this thread does
589  // not have task team. Usually it is not bad. However, a corner case is,
590  // when the first task encountered is an untied task, the check in
591  // __kmp_task_alloc will crash because it uses the task team pointer without
592  // checking whether it is nullptr. It is probably under some kind of
593  // assumption.
594  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
595  !TCR_4(__kmp_hidden_helper_team_done)) {
596  // If there is still hidden helper tasks to be executed, the hidden helper
597  // thread will not enter a waiting status.
598  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
599  __kmp_hidden_helper_worker_thread_wait();
600  }
601  continue;
602  }
603 
604  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
605  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
606  __kmp_pause_status != kmp_soft_paused)
607  continue;
608 
609  // Don't suspend if there is a likelihood of new tasks being spawned.
610  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
611  continue;
612 
613 #if KMP_USE_MONITOR
614  // If we have waited a bit more, fall asleep
615  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
616  continue;
617 #else
618  if (KMP_BLOCKING(hibernate_goal, poll_count++))
619  continue;
620 #endif
621  // Don't suspend if wait loop designated non-sleepable
622  // in template parameters
623  if (!Sleepable)
624  continue;
625 
626  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
627  __kmp_pause_status != kmp_soft_paused)
628  continue;
629 
630 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
631  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
632  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
633  flag->mwait(th_gtid);
634  } else {
635 #endif
636  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
637 #if KMP_OS_UNIX
638  if (final_spin)
639  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
640 #endif
641  flag->suspend(th_gtid);
642 #if KMP_OS_UNIX
643  if (final_spin)
644  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
645 #endif
646 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
647  }
648 #endif
649 
650  if (TCR_4(__kmp_global.g.g_done)) {
651  if (__kmp_global.g.g_abort)
652  __kmp_abort_thread();
653  break;
654  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
655  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
656  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
657  }
658  // TODO: If thread is done with work and times out, disband/free
659  }
660 
661 #if OMPT_SUPPORT
662  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
663  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
664 #if OMPT_OPTIONAL
665  if (final_spin) {
666  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
667  ompt_exit_state = this_thr->th.ompt_thread_info.state;
668  }
669 #endif
670  if (ompt_exit_state == ompt_state_idle) {
671  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
672  }
673  }
674 #endif
675 #if KMP_STATS_ENABLED
676  // If we were put into idle state, pop that off the state stack
677  if (KMP_GET_THREAD_STATE() == IDLE) {
678  KMP_POP_PARTITIONED_TIMER();
679  KMP_SET_THREAD_STATE(thread_state);
680  this_thr->th.th_stats->resetIdleFlag();
681  }
682 #endif
683 
684 #if KMP_OS_UNIX
685  if (final_spin)
686  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
687 #endif
688  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
689  if (Cancellable) {
690  kmp_team_t *team = this_thr->th.th_team;
691  if (team && team->t.t_cancel_request == cancel_parallel) {
692  if (tasks_completed) {
693  // undo the previous decrement of unfinished_threads so that the
694  // thread can decrement at the join barrier with no problem
695  kmp_task_team_t *task_team = this_thr->th.th_task_team;
696  std::atomic<kmp_int32> *unfinished_threads =
697  &(task_team->tt.tt_unfinished_threads);
698  KMP_ATOMIC_INC(unfinished_threads);
699  }
700  return true;
701  }
702  }
703  return false;
704 }
705 
706 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
707 // Set up a monitor on the flag variable causing the calling thread to wait in
708 // a less active state until the flag variable is modified.
709 template <class C>
710 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
711  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
712  kmp_info_t *th = __kmp_threads[th_gtid];
713 
714  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
715  flag->get()));
716 
717  // User-level mwait is available
718  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
719 
720  __kmp_suspend_initialize_thread(th);
721  __kmp_lock_suspend_mx(th);
722 
723  volatile void *spin = flag->get();
724  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
725 
726  if (!flag->done_check()) {
727  // Mark thread as no longer active
728  th->th.th_active = FALSE;
729  if (th->th.th_active_in_pool) {
730  th->th.th_active_in_pool = FALSE;
731  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
732  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
733  }
734  flag->set_sleeping();
735  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
736 #if KMP_HAVE_UMWAIT
737  if (__kmp_umwait_enabled) {
738  __kmp_umonitor(cacheline);
739  }
740 #elif KMP_HAVE_MWAIT
741  if (__kmp_mwait_enabled) {
742  __kmp_mm_monitor(cacheline, 0, 0);
743  }
744 #endif
745  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
746  // the address could happen after the last time we checked and before
747  // monitoring started, in which case monitor can't detect the change.
748  if (flag->done_check())
749  flag->unset_sleeping();
750  else {
751  // if flag changes here, wake-up happens immediately
752  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
753  th->th.th_sleep_loc_type = flag->get_type();
754  __kmp_unlock_suspend_mx(th);
755  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
756 #if KMP_HAVE_UMWAIT
757  if (__kmp_umwait_enabled) {
758  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
759  }
760 #elif KMP_HAVE_MWAIT
761  if (__kmp_mwait_enabled) {
762  __kmp_mm_mwait(0, __kmp_mwait_hints);
763  }
764 #endif
765  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
766  __kmp_lock_suspend_mx(th);
767  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
768  if (flag->is_sleeping())
769  flag->unset_sleeping();
770  TCW_PTR(th->th.th_sleep_loc, NULL);
771  th->th.th_sleep_loc_type = flag_unset;
772  }
773  // Mark thread as active again
774  th->th.th_active = TRUE;
775  if (TCR_4(th->th.th_in_pool)) {
776  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
777  th->th.th_active_in_pool = TRUE;
778  }
779  } // Drop out to main wait loop to check flag, handle tasks, etc.
780  __kmp_unlock_suspend_mx(th);
781  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
782 }
783 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
784 
785 /* Release any threads specified as waiting on the flag by releasing the flag
786  and resume the waiting thread if indicated by the sleep bit(s). A thread that
787  calls __kmp_wait_template must call this function to wake up the potentially
788  sleeping thread and prevent deadlocks! */
789 template <class C> static inline void __kmp_release_template(C *flag) {
790 #ifdef KMP_DEBUG
791  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
792 #endif
793  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
794  KMP_DEBUG_ASSERT(flag->get());
795  KMP_FSYNC_RELEASING(flag->get_void_p());
796 
797  flag->internal_release();
798 
799  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
800  flag->load()));
801 
802  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
803  // Only need to check sleep stuff if infinite block time not set.
804  // Are *any* threads waiting on flag sleeping?
805  if (flag->is_any_sleeping()) {
806  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
807  // if sleeping waiter exists at i, sets current_waiter to i inside flag
808  kmp_info_t *waiter = flag->get_waiter(i);
809  if (waiter) {
810  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
811  // Wake up thread if needed
812  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
813  "flag(%p) set\n",
814  gtid, wait_gtid, flag->get()));
815  flag->resume(wait_gtid); // unsets flag's current_waiter when done
816  }
817  }
818  }
819  }
820 }
821 
822 template <bool Cancellable, bool Sleepable>
823 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
824 public:
825  kmp_flag_32(std::atomic<kmp_uint32> *p)
826  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
827  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
828  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
829  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
830  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
831  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
832 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
833  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
834 #endif
835  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
836  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
837  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
838  kmp_int32 is_constrained) {
839  return __kmp_execute_tasks_32(
840  this_thr, gtid, this, final_spin,
841  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
842  }
843  bool wait(kmp_info_t *this_thr,
844  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
845  if (final_spin)
846  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
847  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
848  else
849  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
850  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
851  }
852  void release() { __kmp_release_template(this); }
853  flag_type get_ptr_type() { return flag32; }
854 };
855 
856 template <bool Cancellable, bool Sleepable>
857 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
858 public:
859  kmp_flag_64(volatile kmp_uint64 *p)
860  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
861  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
862  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
863  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
864  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
865  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
866  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
867  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
868 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
869  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
870 #endif
871  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
872  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
873  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
874  kmp_int32 is_constrained) {
875  return __kmp_execute_tasks_64(
876  this_thr, gtid, this, final_spin,
877  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
878  }
879  bool wait(kmp_info_t *this_thr,
880  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
881  if (final_spin)
882  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
883  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
884  else
885  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
886  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
887  }
888  void release() { __kmp_release_template(this); }
889  flag_type get_ptr_type() { return flag64; }
890 };
891 
892 template <bool Cancellable, bool Sleepable>
893 class kmp_atomic_flag_64
894  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
895 public:
896  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
897  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
898  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
899  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
900  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
901  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
902  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
903  std::atomic<bool> *loc)
904  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
905  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
906  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
907  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
908  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
909  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
910  kmp_int32 is_constrained) {
911  return __kmp_atomic_execute_tasks_64(
912  this_thr, gtid, this, final_spin,
913  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
914  }
915  bool wait(kmp_info_t *this_thr,
916  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
917  if (final_spin)
918  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
919  Sleepable>(
920  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
921  else
922  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
923  Sleepable>(
924  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
925  }
926  void release() { __kmp_release_template(this); }
927  flag_type get_ptr_type() { return atomic_flag64; }
928 };
929 
930 // Hierarchical 64-bit on-core barrier instantiation
931 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
932  kmp_uint32 offset;
933  bool flag_switch;
934  enum barrier_type bt;
935  kmp_info_t *this_thr;
936 #if USE_ITT_BUILD
937  void *itt_sync_obj;
938 #endif
939  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
940  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
941  }
942 
943 public:
944  kmp_flag_oncore(volatile kmp_uint64 *p)
945  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
946  }
947  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
948  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
949  flag_switch(false),
950  bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
951  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
952  enum barrier_type bar_t,
953  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
954  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
955  flag_switch(false), bt(bar_t),
956  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
957  virtual ~kmp_flag_oncore() override {}
958  void *operator new(size_t size) { return __kmp_allocate(size); }
959  void operator delete(void *p) { __kmp_free(p); }
960  bool done_check_val(kmp_uint64 old_loc) override {
961  return byteref(&old_loc, offset) == checker;
962  }
963  bool done_check() override { return done_check_val(*get()); }
964  bool notdone_check() override {
965  // Calculate flag_switch
966  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
967  flag_switch = true;
968  if (byteref(get(), offset) != 1 && !flag_switch)
969  return true;
970  else if (flag_switch) {
971  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
972  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
973  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
974  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
975  }
976  return false;
977  }
978  void internal_release() {
979  // Other threads can write their own bytes simultaneously.
980  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
981  byteref(get(), offset) = 1;
982  } else {
983  kmp_uint64 mask = 0;
984  byteref(&mask, offset) = 1;
985  KMP_TEST_THEN_OR64(get(), mask);
986  }
987  }
988  void wait(kmp_info_t *this_thr, int final_spin) {
989  if (final_spin)
990  __kmp_wait_template<kmp_flag_oncore, TRUE>(
991  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
992  else
993  __kmp_wait_template<kmp_flag_oncore, FALSE>(
994  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
995  }
996  void release() { __kmp_release_template(this); }
997  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
998 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
999  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1000 #endif
1001  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1002  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1003  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1004  kmp_int32 is_constrained) {
1005 #if OMPD_SUPPORT
1006  int ret = __kmp_execute_tasks_oncore(
1007  this_thr, gtid, this, final_spin,
1008  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1009  if (ompd_state & OMPD_ENABLE_BP)
1010  ompd_bp_task_end();
1011  return ret;
1012 #else
1013  return __kmp_execute_tasks_oncore(
1014  this_thr, gtid, this, final_spin,
1015  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1016 #endif
1017  }
1018  enum barrier_type get_bt() { return bt; }
1019  flag_type get_ptr_type() { return flag_oncore; }
1020 };
1021 
1022 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1023  int gtid = __kmp_gtid_from_thread(thr);
1024  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1025  flag_type type = thr->th.th_sleep_loc_type;
1026  if (!flag)
1027  return;
1028  // Attempt to wake up a thread: examine its type and call appropriate template
1029  switch (type) {
1030  case flag32:
1031  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1032  break;
1033  case flag64:
1034  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1035  break;
1036  case atomic_flag64:
1037  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1038  break;
1039  case flag_oncore:
1040  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1041  break;
1042 #ifdef KMP_DEBUG
1043  case flag_unset:
1044  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1045  break;
1046  default:
1047  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
1048  "known flag type\n",
1049  type));
1050 #endif
1051  }
1052 }
1053 
1058 #endif // KMP_WAIT_RELEASE_H
std::atomic< PtrType > * loc
void store(PtrType val)
bool is_sleeping_val(PtrType old_loc)
PtrType set_sleeping()
bool done_check_val(PtrType old_loc)
void set(std::atomic< PtrType > *new_loc)
std::atomic< PtrType > * get()
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual bool done_check()
PtrType set_sleeping()
flag_properties t
kmp_uint32 num_waiting_threads
kmp_info_t * waiting_threads[1]
flag_type get_type()
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
void set_waiter(kmp_info_t *thr)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63