17#include "kmp_wait_release.h"
18#include "kmp_taskdeps.h"
21#include "ompt-specific.h"
24#if ENABLE_LIBOMPTARGET
26extern "C" void __tgt_target_nowait_query(
void **) KMP_WEAK_ATTRIBUTE_INTERNAL;
30static void __kmp_enable_tasking(kmp_task_team_t *task_team,
31 kmp_info_t *this_thr);
32static void __kmp_alloc_task_deque(kmp_info_t *thread,
33 kmp_thread_data_t *thread_data);
34static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
35 kmp_task_team_t *task_team);
36static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask);
38#ifdef BUILD_TIED_TASK_STACK
47static void __kmp_trace_task_stack(kmp_int32 gtid,
48 kmp_thread_data_t *thread_data,
49 int threshold,
char *location) {
50 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
51 kmp_taskdata_t **stack_top = task_stack->ts_top;
52 kmp_int32 entries = task_stack->ts_entries;
53 kmp_taskdata_t *tied_task;
57 (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
58 "first_block = %p, stack_top = %p \n",
59 location, gtid, entries, task_stack->ts_first_block, stack_top));
61 KMP_DEBUG_ASSERT(stack_top != NULL);
62 KMP_DEBUG_ASSERT(entries > 0);
64 while (entries != 0) {
65 KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]);
67 if (entries & TASK_STACK_INDEX_MASK == 0) {
68 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top);
70 stack_block = stack_block->sb_prev;
71 stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
78 tied_task = *stack_top;
80 KMP_DEBUG_ASSERT(tied_task != NULL);
81 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
84 (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
85 "stack_top=%p, tied_task=%p\n",
86 location, gtid, entries, stack_top, tied_task));
88 KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]);
91 (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
101static void __kmp_init_task_stack(kmp_int32 gtid,
102 kmp_thread_data_t *thread_data) {
103 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
104 kmp_stack_block_t *first_block;
107 first_block = &task_stack->ts_first_block;
108 task_stack->ts_top = (kmp_taskdata_t **)first_block;
109 memset((
void *)first_block,
'\0',
110 TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
113 task_stack->ts_entries = TASK_STACK_EMPTY;
114 first_block->sb_next = NULL;
115 first_block->sb_prev = NULL;
122static void __kmp_free_task_stack(kmp_int32 gtid,
123 kmp_thread_data_t *thread_data) {
124 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
125 kmp_stack_block_t *stack_block = &task_stack->ts_first_block;
127 KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY);
129 while (stack_block != NULL) {
130 kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL;
132 stack_block->sb_next = NULL;
133 stack_block->sb_prev = NULL;
134 if (stack_block != &task_stack->ts_first_block) {
135 __kmp_thread_free(thread,
138 stack_block = next_block;
141 task_stack->ts_entries = 0;
142 task_stack->ts_top = NULL;
151static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread,
152 kmp_taskdata_t *tied_task) {
154 kmp_thread_data_t *thread_data =
155 &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
156 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
158 if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) {
162 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
163 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
166 (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
167 gtid, thread, tied_task));
169 *(task_stack->ts_top) = tied_task;
172 task_stack->ts_top++;
173 task_stack->ts_entries++;
175 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
177 kmp_stack_block_t *stack_block =
178 (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE);
181 if (stack_block->sb_next !=
183 task_stack->ts_top = &stack_block->sb_next->sb_block[0];
185 kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc(
186 thread,
sizeof(kmp_stack_block_t));
188 task_stack->ts_top = &new_block->sb_block[0];
189 stack_block->sb_next = new_block;
190 new_block->sb_prev = stack_block;
191 new_block->sb_next = NULL;
195 (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
196 gtid, tied_task, new_block));
199 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
210static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread,
211 kmp_taskdata_t *ending_task) {
213 kmp_thread_data_t *thread_data =
214 &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)];
215 kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks;
216 kmp_taskdata_t *tied_task;
218 if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) {
223 KMP_DEBUG_ASSERT(task_stack->ts_top != NULL);
224 KMP_DEBUG_ASSERT(task_stack->ts_entries > 0);
226 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid,
230 if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) {
231 kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top);
233 stack_block = stack_block->sb_prev;
234 task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE];
238 task_stack->ts_top--;
239 task_stack->ts_entries--;
241 tied_task = *(task_stack->ts_top);
243 KMP_DEBUG_ASSERT(tied_task != NULL);
244 KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED);
245 KMP_DEBUG_ASSERT(tied_task == ending_task);
247 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid,
256static bool __kmp_task_is_allowed(
int gtid,
const kmp_int32 is_constrained,
257 const kmp_taskdata_t *tasknew,
258 const kmp_taskdata_t *taskcurr) {
259 if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) {
263 kmp_taskdata_t *current = taskcurr->td_last_tied;
264 KMP_DEBUG_ASSERT(current != NULL);
266 if (current->td_flags.tasktype == TASK_EXPLICIT ||
267 current->td_taskwait_thread > 0) {
268 kmp_int32 level = current->td_level;
269 kmp_taskdata_t *parent = tasknew->td_parent;
270 while (parent != current && parent->td_level > level) {
272 parent = parent->td_parent;
273 KMP_DEBUG_ASSERT(parent != NULL);
275 if (parent != current)
280 kmp_depnode_t *node = tasknew->td_depnode;
281 if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) {
282 for (
int i = 0; i < node->dn.mtx_num_locks; ++i) {
283 KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL);
284 if (__kmp_test_lock(node->dn.mtx_locks[i], gtid))
287 for (
int j = i - 1; j >= 0; --j)
288 __kmp_release_lock(node->dn.mtx_locks[j], gtid);
292 node->dn.mtx_num_locks = -node->dn.mtx_num_locks;
301static void __kmp_realloc_task_deque(kmp_info_t *thread,
302 kmp_thread_data_t *thread_data) {
303 kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td);
304 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == size);
305 kmp_int32 new_size = 2 * size;
307 KE_TRACE(10, (
"__kmp_realloc_task_deque: T#%d reallocating deque[from %d to "
308 "%d] for thread_data %p\n",
309 __kmp_gtid_from_thread(thread), size, new_size, thread_data));
311 kmp_taskdata_t **new_deque =
312 (kmp_taskdata_t **)__kmp_allocate(new_size *
sizeof(kmp_taskdata_t *));
315 for (i = thread_data->td.td_deque_head, j = 0; j < size;
316 i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++)
317 new_deque[j] = thread_data->td.td_deque[i];
319 __kmp_free(thread_data->td.td_deque);
321 thread_data->td.td_deque_head = 0;
322 thread_data->td.td_deque_tail = size;
323 thread_data->td.td_deque = new_deque;
324 thread_data->td.td_deque_size = new_size;
327static kmp_task_pri_t *__kmp_alloc_task_pri_list() {
328 kmp_task_pri_t *l = (kmp_task_pri_t *)__kmp_allocate(
sizeof(kmp_task_pri_t));
329 kmp_thread_data_t *thread_data = &l->td;
330 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
331 thread_data->td.td_deque_last_stolen = -1;
332 KE_TRACE(20, (
"__kmp_alloc_task_pri_list: T#%d allocating deque[%d] "
333 "for thread_data %p\n",
334 __kmp_get_gtid(), INITIAL_TASK_DEQUE_SIZE, thread_data));
335 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
336 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
337 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
346static kmp_thread_data_t *
347__kmp_get_priority_deque_data(kmp_task_team_t *task_team, kmp_int32 pri) {
348 kmp_thread_data_t *thread_data;
349 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
350 if (lst->priority == pri) {
352 thread_data = &lst->td;
353 }
else if (lst->priority < pri) {
356 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
357 thread_data = &list->td;
358 list->priority = pri;
360 task_team->tt.tt_task_pri_list = list;
362 kmp_task_pri_t *next_queue = lst->next;
363 while (next_queue && next_queue->priority > pri) {
365 next_queue = lst->next;
368 if (next_queue == NULL) {
370 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
371 thread_data = &list->td;
372 list->priority = pri;
375 }
else if (next_queue->priority == pri) {
377 thread_data = &next_queue->td;
380 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
381 thread_data = &list->td;
382 list->priority = pri;
383 list->next = next_queue;
391static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
392 kmp_taskdata_t *taskdata,
393 kmp_task_team_t *task_team,
395 kmp_thread_data_t *thread_data = NULL;
397 (
"__kmp_push_priority_task: T#%d trying to push task %p, pri %d.\n",
398 gtid, taskdata, pri));
401 kmp_task_pri_t *lst = task_team->tt.tt_task_pri_list;
402 if (UNLIKELY(lst == NULL)) {
403 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
404 if (task_team->tt.tt_task_pri_list == NULL) {
406 kmp_task_pri_t *list = __kmp_alloc_task_pri_list();
407 thread_data = &list->td;
408 list->priority = pri;
410 task_team->tt.tt_task_pri_list = list;
413 thread_data = __kmp_get_priority_deque_data(task_team, pri);
415 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
417 if (lst->priority == pri) {
419 thread_data = &lst->td;
421 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
422 thread_data = __kmp_get_priority_deque_data(task_team, pri);
423 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
426 KMP_DEBUG_ASSERT(thread_data);
428 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
430 if (TCR_4(thread_data->td.td_deque_ntasks) >=
431 TASK_DEQUE_SIZE(thread_data->td)) {
432 if (__kmp_enable_task_throttling &&
433 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
434 thread->th.th_current_task)) {
435 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
436 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d deque is full; returning "
437 "TASK_NOT_PUSHED for task %p\n",
439 return TASK_NOT_PUSHED;
442 __kmp_realloc_task_deque(thread, thread_data);
445 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
446 TASK_DEQUE_SIZE(thread_data->td));
448 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
450 thread_data->td.td_deque_tail =
451 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
452 TCW_4(thread_data->td.td_deque_ntasks,
453 TCR_4(thread_data->td.td_deque_ntasks) + 1);
454 KMP_FSYNC_RELEASING(thread->th.th_current_task);
455 KMP_FSYNC_RELEASING(taskdata);
456 KA_TRACE(20, (
"__kmp_push_priority_task: T#%d returning "
457 "TASK_SUCCESSFULLY_PUSHED: task=%p ntasks=%d head=%u tail=%u\n",
458 gtid, taskdata, thread_data->td.td_deque_ntasks,
459 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
460 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
461 task_team->tt.tt_num_task_pri++;
462 return TASK_SUCCESSFULLY_PUSHED;
466static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
467 kmp_info_t *thread = __kmp_threads[gtid];
468 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
473 if (UNLIKELY(taskdata->td_flags.hidden_helper &&
474 !KMP_HIDDEN_HELPER_THREAD(gtid))) {
475 kmp_int32 shadow_gtid = KMP_GTID_TO_SHADOW_GTID(gtid);
476 __kmpc_give_task(task, __kmp_tid_from_gtid(shadow_gtid));
478 __kmp_hidden_helper_worker_thread_signal();
479 return TASK_SUCCESSFULLY_PUSHED;
482 kmp_task_team_t *task_team = thread->th.th_task_team;
483 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
484 kmp_thread_data_t *thread_data;
487 (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata));
489 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
492 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
493 KMP_DEBUG_USE_VAR(counter);
496 (
"__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n",
497 gtid, counter, taskdata));
501 if (UNLIKELY(taskdata->td_flags.task_serial)) {
502 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning "
503 "TASK_NOT_PUSHED for task %p\n",
505 return TASK_NOT_PUSHED;
510 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
511 if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) {
512 __kmp_enable_tasking(task_team, thread);
514 KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE);
515 KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL);
517 if (taskdata->td_flags.priority_specified && task->data2.priority > 0 &&
518 __kmp_max_task_priority > 0) {
519 int pri = KMP_MIN(task->data2.priority, __kmp_max_task_priority);
520 return __kmp_push_priority_task(gtid, thread, taskdata, task_team, pri);
524 thread_data = &task_team->tt.tt_threads_data[tid];
529 if (UNLIKELY(thread_data->td.td_deque == NULL)) {
530 __kmp_alloc_task_deque(thread, thread_data);
535 if (TCR_4(thread_data->td.td_deque_ntasks) >=
536 TASK_DEQUE_SIZE(thread_data->td)) {
537 if (__kmp_enable_task_throttling &&
538 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
539 thread->th.th_current_task)) {
540 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning "
541 "TASK_NOT_PUSHED for task %p\n",
543 return TASK_NOT_PUSHED;
545 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
547 if (TCR_4(thread_data->td.td_deque_ntasks) >=
548 TASK_DEQUE_SIZE(thread_data->td)) {
550 __kmp_realloc_task_deque(thread, thread_data);
556 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
558 if (TCR_4(thread_data->td.td_deque_ntasks) >=
559 TASK_DEQUE_SIZE(thread_data->td)) {
560 if (__kmp_enable_task_throttling &&
561 __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
562 thread->th.th_current_task)) {
563 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
564 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full on 2nd check; "
565 "returning TASK_NOT_PUSHED for task %p\n",
567 return TASK_NOT_PUSHED;
570 __kmp_realloc_task_deque(thread, thread_data);
575 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
576 TASK_DEQUE_SIZE(thread_data->td));
578 thread_data->td.td_deque[thread_data->td.td_deque_tail] =
581 thread_data->td.td_deque_tail =
582 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
583 TCW_4(thread_data->td.td_deque_ntasks,
584 TCR_4(thread_data->td.td_deque_ntasks) + 1);
585 KMP_FSYNC_RELEASING(thread->th.th_current_task);
586 KMP_FSYNC_RELEASING(taskdata);
587 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
588 "task=%p ntasks=%d head=%u tail=%u\n",
589 gtid, taskdata, thread_data->td.td_deque_ntasks,
590 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
592 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
594 return TASK_SUCCESSFULLY_PUSHED;
601void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) {
602 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(enter): T#%d "
603 "this_thread=%p, curtask=%p, "
604 "curtask_parent=%p\n",
605 0, this_thr, this_thr->th.th_current_task,
606 this_thr->th.th_current_task->td_parent));
608 this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent;
610 KF_TRACE(10, (
"__kmp_pop_current_task_from_thread(exit): T#%d "
611 "this_thread=%p, curtask=%p, "
612 "curtask_parent=%p\n",
613 0, this_thr, this_thr->th.th_current_task,
614 this_thr->th.th_current_task->td_parent));
623void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team,
627 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p "
630 tid, this_thr, this_thr->th.th_current_task,
631 team->t.t_implicit_task_taskdata[tid].td_parent));
633 KMP_DEBUG_ASSERT(this_thr != NULL);
636 if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) {
637 team->t.t_implicit_task_taskdata[0].td_parent =
638 this_thr->th.th_current_task;
639 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0];
642 team->t.t_implicit_task_taskdata[tid].td_parent =
643 team->t.t_implicit_task_taskdata[0].td_parent;
644 this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid];
647 KF_TRACE(10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p "
650 tid, this_thr, this_thr->th.th_current_task,
651 team->t.t_implicit_task_taskdata[tid].td_parent));
659static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
660 kmp_taskdata_t *current_task) {
661 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
662 kmp_info_t *thread = __kmp_threads[gtid];
665 (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
666 gtid, taskdata, current_task));
668 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
673 current_task->td_flags.executing = 0;
676#ifdef BUILD_TIED_TASK_STACK
677 if (taskdata->td_flags.tiedness == TASK_TIED) {
678 __kmp_push_task_stack(gtid, thread, taskdata);
683 thread->th.th_current_task = taskdata;
685 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 ||
686 taskdata->td_flags.tiedness == TASK_UNTIED);
687 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 ||
688 taskdata->td_flags.tiedness == TASK_UNTIED);
689 taskdata->td_flags.started = 1;
690 taskdata->td_flags.executing = 1;
691 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
692 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
699 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata));
710static inline void __ompt_task_init(kmp_taskdata_t *task,
int tid) {
712 task->ompt_task_info.task_data.value = 0;
713 task->ompt_task_info.frame.exit_frame = ompt_data_none;
714 task->ompt_task_info.frame.enter_frame = ompt_data_none;
715 task->ompt_task_info.frame.exit_frame_flags =
716 ompt_frame_runtime | ompt_frame_framepointer;
717 task->ompt_task_info.frame.enter_frame_flags =
718 ompt_frame_runtime | ompt_frame_framepointer;
719 task->ompt_task_info.dispatch_chunk.start = 0;
720 task->ompt_task_info.dispatch_chunk.iterations = 0;
725static inline void __ompt_task_start(kmp_task_t *task,
726 kmp_taskdata_t *current_task,
728 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
729 ompt_task_status_t status = ompt_task_switch;
730 if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) {
731 status = ompt_task_yield;
732 __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0;
735 if (ompt_enabled.ompt_callback_task_schedule) {
736 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
737 &(current_task->ompt_task_info.task_data), status,
738 &(taskdata->ompt_task_info.task_data));
740 taskdata->ompt_task_info.scheduling_parent = current_task;
745static inline void __ompt_task_finish(kmp_task_t *task,
746 kmp_taskdata_t *resumed_task,
747 ompt_task_status_t status) {
748 if (ompt_enabled.ompt_callback_task_schedule) {
749 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
750 if (__kmp_omp_cancellation && taskdata->td_taskgroup &&
751 taskdata->td_taskgroup->cancel_request == cancel_taskgroup) {
752 status = ompt_task_cancel;
756 ompt_callbacks.ompt_callback(ompt_callback_task_schedule)(
757 &(taskdata->ompt_task_info.task_data), status,
758 (resumed_task ? &(resumed_task->ompt_task_info.task_data) : NULL));
764static void __kmpc_omp_task_begin_if0_template(
ident_t *loc_ref, kmp_int32 gtid,
767 void *return_address) {
768 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
769 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
771 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p "
773 gtid, loc_ref, taskdata, current_task));
775 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
778 kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count);
779 KMP_DEBUG_USE_VAR(counter);
780 KA_TRACE(20, (
"__kmpc_omp_task_begin_if0: T#%d untied_count (%d) "
781 "incremented for task %p\n",
782 gtid, counter, taskdata));
785 taskdata->td_flags.task_serial =
787 __kmp_task_start(gtid, task, current_task);
791 if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
792 current_task->ompt_task_info.frame.enter_frame.ptr =
793 taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
794 current_task->ompt_task_info.frame.enter_frame_flags =
795 taskdata->ompt_task_info.frame.exit_frame_flags =
796 ompt_frame_application | ompt_frame_framepointer;
798 if (ompt_enabled.ompt_callback_task_create) {
799 ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
800 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
801 &(parent_info->task_data), &(parent_info->frame),
802 &(taskdata->ompt_task_info.task_data),
803 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0,
806 __ompt_task_start(task, current_task, gtid);
810 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid,
816static void __kmpc_omp_task_begin_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
819 void *return_address) {
820 __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address,
831void __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid,
834 if (UNLIKELY(ompt_enabled.enabled)) {
835 OMPT_STORE_RETURN_ADDRESS(gtid);
836 __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task,
837 OMPT_GET_FRAME_ADDRESS(1),
838 OMPT_LOAD_RETURN_ADDRESS(gtid));
842 __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL);
848void __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) {
849 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
853 (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
854 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task));
856 __kmp_task_start(gtid, task, current_task);
858 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid,
859 loc_ref, KMP_TASK_TO_TASKDATA(task)));
869static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
870 kmp_info_t *thread) {
871 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n", gtid,
875 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
876 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0);
877 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1);
878 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
879 KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 ||
880 taskdata->td_flags.task_serial == 1);
881 KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0);
882 kmp_task_t *task = KMP_TASKDATA_TO_TASK(taskdata);
884 task->data1.destructors = NULL;
885 task->data2.priority = 0;
887 taskdata->td_flags.freed = 1;
890 __kmp_fast_free(thread, taskdata);
892 __kmp_thread_free(thread, taskdata);
894 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n", gtid, taskdata));
903static void __kmp_free_task_and_ancestors(kmp_int32 gtid,
904 kmp_taskdata_t *taskdata,
905 kmp_info_t *thread) {
908 kmp_int32 team_serial =
909 (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) &&
910 !taskdata->td_flags.proxy;
911 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
913 kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
914 KMP_DEBUG_ASSERT(children >= 0);
917 while (children == 0) {
918 kmp_taskdata_t *parent_taskdata = taskdata->td_parent;
920 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
921 "and freeing itself\n",
925 __kmp_free_task(gtid, taskdata, thread);
927 taskdata = parent_taskdata;
933 if (taskdata->td_flags.tasktype == TASK_IMPLICIT) {
934 if (taskdata->td_dephash) {
935 int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks);
936 kmp_tasking_flags_t flags_old = taskdata->td_flags;
937 if (children == 0 && flags_old.complete == 1) {
938 kmp_tasking_flags_t flags_new = flags_old;
939 flags_new.complete = 0;
940 if (KMP_COMPARE_AND_STORE_ACQ32(
941 RCAST(kmp_int32 *, &taskdata->td_flags),
942 *RCAST(kmp_int32 *, &flags_old),
943 *RCAST(kmp_int32 *, &flags_new))) {
944 KA_TRACE(100, (
"__kmp_free_task_and_ancestors: T#%d cleans "
945 "dephash of implicit task %p\n",
948 __kmp_dephash_free_entries(thread, taskdata->td_dephash);
955 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1;
956 KMP_DEBUG_ASSERT(children >= 0);
960 20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
961 "not freeing it yet\n",
962 gtid, taskdata, children));
973static bool __kmp_track_children_task(kmp_taskdata_t *taskdata) {
974 kmp_tasking_flags_t flags = taskdata->td_flags;
975 bool ret = !(flags.team_serial || flags.tasking_ser);
976 ret = ret || flags.proxy == TASK_PROXY ||
977 flags.detachable == TASK_DETACHABLE || flags.hidden_helper;
979 KMP_ATOMIC_LD_ACQ(&taskdata->td_parent->td_incomplete_child_tasks) > 0;
993static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
994 kmp_taskdata_t *resumed_task) {
995 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
996 kmp_info_t *thread = __kmp_threads[gtid];
997 kmp_task_team_t *task_team =
998 thread->th.th_task_team;
1000 kmp_int32 children = 0;
1002 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming "
1004 gtid, taskdata, resumed_task));
1006 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
1009#ifdef BUILD_TIED_TASK_STACK
1010 if (taskdata->td_flags.tiedness == TASK_TIED) {
1011 __kmp_pop_task_stack(gtid, thread, taskdata);
1015 if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) {
1018 kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1;
1021 (
"__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n",
1022 gtid, counter, taskdata));
1026 if (resumed_task == NULL) {
1027 KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial);
1028 resumed_task = taskdata->td_parent;
1031 thread->th.th_current_task = resumed_task;
1032 resumed_task->td_flags.executing = 1;
1033 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d partially done task %p, "
1034 "resuming task %p\n",
1035 gtid, taskdata, resumed_task));
1043 (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
1044 taskdata->td_flags.task_serial);
1045 if (taskdata->td_flags.task_serial) {
1046 if (resumed_task == NULL) {
1047 resumed_task = taskdata->td_parent;
1051 KMP_DEBUG_ASSERT(resumed_task !=
1061 if (UNLIKELY(taskdata->td_flags.destructors_thunk)) {
1062 kmp_routine_entry_t destr_thunk = task->data1.destructors;
1063 KMP_ASSERT(destr_thunk);
1064 destr_thunk(gtid, task);
1067 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
1068 KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1);
1069 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
1071 bool completed =
true;
1072 if (UNLIKELY(taskdata->td_flags.detachable == TASK_DETACHABLE)) {
1073 if (taskdata->td_allow_completion_event.type ==
1074 KMP_EVENT_ALLOW_COMPLETION) {
1076 __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1077 if (taskdata->td_allow_completion_event.type ==
1078 KMP_EVENT_ALLOW_COMPLETION) {
1080 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1081 taskdata->td_flags.executing = 0;
1088 __ompt_task_finish(task, resumed_task, ompt_task_detach);
1094 taskdata->td_flags.proxy = TASK_PROXY;
1097 __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid);
1102 if (taskdata->td_target_data.async_handle != NULL) {
1106 __kmpc_give_task(task, __kmp_tid_from_gtid(gtid));
1107 if (KMP_HIDDEN_HELPER_THREAD(gtid))
1108 __kmp_hidden_helper_worker_thread_signal();
1113 taskdata->td_flags.complete = 1;
1118 __ompt_task_finish(task, resumed_task, ompt_task_complete);
1122 if (__kmp_track_children_task(taskdata)) {
1123 __kmp_release_deps(gtid, taskdata);
1128 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
1129 KMP_DEBUG_ASSERT(children >= 0);
1130 if (taskdata->td_taskgroup)
1131 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
1132 }
else if (task_team && (task_team->tt.tt_found_proxy_tasks ||
1133 task_team->tt.tt_hidden_helper_task_encountered)) {
1136 __kmp_release_deps(gtid, taskdata);
1142 KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1);
1143 taskdata->td_flags.executing = 0;
1146 if (taskdata->td_flags.hidden_helper) {
1148 KMP_ASSERT(KMP_HIDDEN_HELPER_THREAD(gtid));
1149 KMP_ATOMIC_DEC(&__kmp_unexecuted_hidden_helper_tasks);
1154 20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
1155 gtid, taskdata, children));
1161 thread->th.th_current_task = resumed_task;
1163 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
1167 resumed_task->td_flags.executing = 1;
1170 10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
1171 gtid, taskdata, resumed_task));
1177static void __kmpc_omp_task_complete_if0_template(
ident_t *loc_ref,
1180 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
1181 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1182 KMP_DEBUG_ASSERT(gtid >= 0);
1184 __kmp_task_finish<ompt>(gtid, task, NULL);
1186 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
1187 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task)));
1191 ompt_frame_t *ompt_frame;
1192 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1193 ompt_frame->enter_frame = ompt_data_none;
1194 ompt_frame->enter_frame_flags =
1195 ompt_frame_runtime | ompt_frame_framepointer;
1204void __kmpc_omp_task_complete_if0_ompt(
ident_t *loc_ref, kmp_int32 gtid,
1206 __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task);
1215void __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid,
1218 if (UNLIKELY(ompt_enabled.enabled)) {
1219 __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task);
1223 __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task);
1229void __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid,
1231 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid,
1232 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1234 __kmp_task_finish<false>(gtid, task,
1237 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid,
1238 loc_ref, KMP_TASK_TO_TASKDATA(task)));
1254void __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr,
1255 kmp_team_t *team,
int tid,
int set_curr_task) {
1256 kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid];
1260 (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
1261 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE"));
1263 task->td_task_id = KMP_GEN_TASK_ID();
1264 task->td_team = team;
1267 task->td_ident = loc_ref;
1268 task->td_taskwait_ident = NULL;
1269 task->td_taskwait_counter = 0;
1270 task->td_taskwait_thread = 0;
1272 task->td_flags.tiedness = TASK_TIED;
1273 task->td_flags.tasktype = TASK_IMPLICIT;
1274 task->td_flags.proxy = TASK_FULL;
1277 task->td_flags.task_serial = 1;
1278 task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1279 task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1281 task->td_flags.started = 1;
1282 task->td_flags.executing = 1;
1283 task->td_flags.complete = 0;
1284 task->td_flags.freed = 0;
1286 task->td_depnode = NULL;
1287 task->td_last_tied = task;
1288 task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1290 if (set_curr_task) {
1291 KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0);
1293 KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0);
1294 task->td_taskgroup = NULL;
1295 task->td_dephash = NULL;
1296 __kmp_push_current_task_to_thread(this_thr, team, tid);
1298 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
1299 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
1303 if (UNLIKELY(ompt_enabled.enabled))
1304 __ompt_task_init(task, tid);
1307 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid,
1316void __kmp_finish_implicit_task(kmp_info_t *thread) {
1317 kmp_taskdata_t *task = thread->th.th_current_task;
1318 if (task->td_dephash) {
1320 task->td_flags.complete = 1;
1321 children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks);
1322 kmp_tasking_flags_t flags_old = task->td_flags;
1323 if (children == 0 && flags_old.complete == 1) {
1324 kmp_tasking_flags_t flags_new = flags_old;
1325 flags_new.complete = 0;
1326 if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags),
1327 *RCAST(kmp_int32 *, &flags_old),
1328 *RCAST(kmp_int32 *, &flags_new))) {
1329 KA_TRACE(100, (
"__kmp_finish_implicit_task: T#%d cleans "
1330 "dephash of implicit task %p\n",
1331 thread->th.th_info.ds.ds_gtid, task));
1332 __kmp_dephash_free_entries(thread, task->td_dephash);
1342void __kmp_free_implicit_task(kmp_info_t *thread) {
1343 kmp_taskdata_t *task = thread->th.th_current_task;
1344 if (task && task->td_dephash) {
1345 __kmp_dephash_free(thread, task->td_dephash);
1346 task->td_dephash = NULL;
1352static size_t __kmp_round_up_to_val(
size_t size,
size_t val) {
1353 if (size & (val - 1)) {
1355 if (size <= KMP_SIZE_T_MAX - val) {
1374kmp_task_t *__kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1375 kmp_tasking_flags_t *flags,
1376 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
1377 kmp_routine_entry_t task_entry) {
1379 kmp_taskdata_t *taskdata;
1380 kmp_info_t *thread = __kmp_threads[gtid];
1381 kmp_team_t *team = thread->th.th_team;
1382 kmp_taskdata_t *parent_task = thread->th.th_current_task;
1383 size_t shareds_offset;
1385 if (UNLIKELY(!TCR_4(__kmp_init_middle)))
1386 __kmp_middle_initialize();
1388 if (flags->hidden_helper) {
1389 if (__kmp_enable_hidden_helper) {
1390 if (!TCR_4(__kmp_init_hidden_helper))
1391 __kmp_hidden_helper_initialize();
1394 flags->hidden_helper = FALSE;
1398 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
1399 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1400 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
1401 sizeof_shareds, task_entry));
1403 KMP_DEBUG_ASSERT(parent_task);
1404 if (parent_task->td_flags.final) {
1405 if (flags->merged_if0) {
1410 if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) {
1414 KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1);
1420 if (UNLIKELY(flags->proxy == TASK_PROXY ||
1421 flags->detachable == TASK_DETACHABLE || flags->hidden_helper)) {
1422 if (flags->proxy == TASK_PROXY) {
1423 flags->tiedness = TASK_UNTIED;
1424 flags->merged_if0 = 1;
1428 if ((thread->th.th_task_team) == NULL) {
1431 KMP_DEBUG_ASSERT(team->t.t_serialized);
1433 (
"T#%d creating task team in __kmp_task_alloc for proxy task\n",
1436 __kmp_task_team_setup(thread, team, 1);
1437 thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state];
1439 kmp_task_team_t *task_team = thread->th.th_task_team;
1442 if (!KMP_TASKING_ENABLED(task_team)) {
1445 (
"T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid));
1446 __kmp_enable_tasking(task_team, thread);
1447 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
1448 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
1450 if (thread_data->td.td_deque == NULL) {
1451 __kmp_alloc_task_deque(thread, thread_data);
1455 if ((flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) &&
1456 task_team->tt.tt_found_proxy_tasks == FALSE)
1457 TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE);
1458 if (flags->hidden_helper &&
1459 task_team->tt.tt_hidden_helper_task_encountered == FALSE)
1460 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, TRUE);
1465 shareds_offset =
sizeof(kmp_taskdata_t) + sizeof_kmp_task_t;
1466 shareds_offset = __kmp_round_up_to_val(shareds_offset,
sizeof(
void *));
1469 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid,
1471 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid,
1476 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset +
1479 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
1483 task = KMP_TASKDATA_TO_TASK(taskdata);
1486#if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD
1487 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(
double) - 1)) == 0);
1488 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(
double) - 1)) == 0);
1490 KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (
sizeof(_Quad) - 1)) == 0);
1491 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (
sizeof(_Quad) - 1)) == 0);
1493 if (sizeof_shareds > 0) {
1495 task->shareds = &((
char *)taskdata)[shareds_offset];
1497 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
1500 task->shareds = NULL;
1502 task->routine = task_entry;
1505 taskdata->td_task_id = KMP_GEN_TASK_ID();
1506 taskdata->td_team = thread->th.th_team;
1507 taskdata->td_alloc_thread = thread;
1508 taskdata->td_parent = parent_task;
1509 taskdata->td_level = parent_task->td_level + 1;
1510 KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0);
1511 taskdata->td_ident = loc_ref;
1512 taskdata->td_taskwait_ident = NULL;
1513 taskdata->td_taskwait_counter = 0;
1514 taskdata->td_taskwait_thread = 0;
1515 KMP_DEBUG_ASSERT(taskdata->td_parent != NULL);
1517 if (flags->proxy == TASK_FULL)
1518 copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs);
1520 taskdata->td_flags = *flags;
1521 taskdata->td_task_team = thread->th.th_task_team;
1522 taskdata->td_size_alloc = shareds_offset + sizeof_shareds;
1523 taskdata->td_flags.tasktype = TASK_EXPLICIT;
1526 if (flags->hidden_helper) {
1527 kmp_info_t *shadow_thread = __kmp_threads[KMP_GTID_TO_SHADOW_GTID(gtid)];
1528 taskdata->td_team = shadow_thread->th.th_team;
1529 taskdata->td_task_team = shadow_thread->th.th_task_team;
1533 taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec);
1536 taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0;
1542 taskdata->td_flags.task_serial =
1543 (parent_task->td_flags.final || taskdata->td_flags.team_serial ||
1544 taskdata->td_flags.tasking_ser || flags->merged_if0);
1546 taskdata->td_flags.started = 0;
1547 taskdata->td_flags.executing = 0;
1548 taskdata->td_flags.complete = 0;
1549 taskdata->td_flags.freed = 0;
1551 KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0);
1553 KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1);
1554 taskdata->td_taskgroup =
1555 parent_task->td_taskgroup;
1556 taskdata->td_dephash = NULL;
1557 taskdata->td_depnode = NULL;
1558 taskdata->td_target_data.async_handle = NULL;
1559 if (flags->tiedness == TASK_UNTIED)
1560 taskdata->td_last_tied = NULL;
1562 taskdata->td_last_tied = taskdata;
1563 taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED;
1565 if (UNLIKELY(ompt_enabled.enabled))
1566 __ompt_task_init(taskdata, gtid);
1570 if (__kmp_track_children_task(taskdata)) {
1571 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
1572 if (parent_task->td_taskgroup)
1573 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
1576 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) {
1577 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
1579 if (flags->hidden_helper) {
1580 taskdata->td_flags.task_serial = FALSE;
1582 KMP_ATOMIC_INC(&__kmp_unexecuted_hidden_helper_tasks);
1586 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
1587 gtid, taskdata, taskdata->td_parent));
1592kmp_task_t *__kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1593 kmp_int32 flags,
size_t sizeof_kmp_task_t,
1594 size_t sizeof_shareds,
1595 kmp_routine_entry_t task_entry) {
1597 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;
1598 __kmp_assert_valid_gtid(gtid);
1599 input_flags->native = FALSE;
1601 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) "
1602 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
1603 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
1604 input_flags->proxy ?
"proxy" :
"",
1605 input_flags->detachable ?
"detachable" :
"", sizeof_kmp_task_t,
1606 sizeof_shareds, task_entry));
1608 retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t,
1609 sizeof_shareds, task_entry);
1611 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval));
1616kmp_task_t *__kmpc_omp_target_task_alloc(
ident_t *loc_ref, kmp_int32 gtid,
1618 size_t sizeof_kmp_task_t,
1619 size_t sizeof_shareds,
1620 kmp_routine_entry_t task_entry,
1621 kmp_int64 device_id) {
1622 auto &input_flags =
reinterpret_cast<kmp_tasking_flags_t &
>(flags);
1624 input_flags.tiedness = TASK_UNTIED;
1626 if (__kmp_enable_hidden_helper)
1627 input_flags.hidden_helper = TRUE;
1629 return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t,
1630 sizeof_shareds, task_entry);
1648 kmp_task_t *new_task, kmp_int32 naffins,
1649 kmp_task_affinity_info_t *affin_list) {
1658static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
1659 kmp_taskdata_t *current_task) {
1660 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
1664 30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
1665 gtid, taskdata, current_task));
1666 KMP_DEBUG_ASSERT(task);
1667 if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY &&
1668 taskdata->td_flags.complete == 1)) {
1673 (
"__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n",
1676 __kmp_bottom_half_finish_proxy(gtid, task);
1678 KA_TRACE(30, (
"__kmp_invoke_task(exit): T#%d completed bottom finish for "
1679 "proxy task %p, resuming task %p\n",
1680 gtid, taskdata, current_task));
1688 ompt_thread_info_t oldInfo;
1689 if (UNLIKELY(ompt_enabled.enabled)) {
1691 thread = __kmp_threads[gtid];
1692 oldInfo = thread->th.ompt_thread_info;
1693 thread->th.ompt_thread_info.wait_id = 0;
1694 thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
1695 ? ompt_state_work_serial
1696 : ompt_state_work_parallel;
1697 taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1702 if (taskdata->td_flags.proxy != TASK_PROXY) {
1703 __kmp_task_start(gtid, task, current_task);
1709 if (UNLIKELY(__kmp_omp_cancellation)) {
1710 thread = __kmp_threads[gtid];
1711 kmp_team_t *this_team = thread->th.th_team;
1712 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
1713 if ((taskgroup && taskgroup->cancel_request) ||
1714 (this_team->t.t_cancel_request == cancel_parallel)) {
1715#if OMPT_SUPPORT && OMPT_OPTIONAL
1716 ompt_data_t *task_data;
1717 if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) {
1718 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
1719 ompt_callbacks.ompt_callback(ompt_callback_cancel)(
1721 ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup
1722 : ompt_cancel_parallel) |
1723 ompt_cancel_discarded_task,
1736 if (taskdata->td_flags.tiedness == TASK_UNTIED) {
1737 taskdata->td_last_tied = current_task->td_last_tied;
1738 KMP_DEBUG_ASSERT(taskdata->td_last_tied);
1740#if KMP_STATS_ENABLED
1742 switch (KMP_GET_THREAD_STATE()) {
1743 case FORK_JOIN_BARRIER:
1744 KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar);
1747 KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar);
1750 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield);
1753 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait);
1756 KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup);
1759 KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate);
1766 if (UNLIKELY(ompt_enabled.enabled))
1767 __ompt_task_start(task, current_task, gtid);
1769#if OMPT_SUPPORT && OMPT_OPTIONAL
1770 if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
1771 taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
1772 ompt_data_t instance = ompt_data_none;
1773 instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
1774 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1775 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
1776 &(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
1777 ompt_dispatch_taskloop_chunk, instance);
1778 taskdata->ompt_task_info.dispatch_chunk = {0, 0};
1783 if (ompd_state & OMPD_ENABLE_BP)
1784 ompd_bp_task_begin();
1787#if USE_ITT_BUILD && USE_ITT_NOTIFY
1788 kmp_uint64 cur_time;
1789 kmp_int32 kmp_itt_count_task =
1790 __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial &&
1791 current_task->td_flags.tasktype == TASK_IMPLICIT;
1792 if (kmp_itt_count_task) {
1793 thread = __kmp_threads[gtid];
1795 if (thread->th.th_bar_arrive_time)
1796 cur_time = __itt_get_timestamp();
1798 kmp_itt_count_task = 0;
1800 KMP_FSYNC_ACQUIRED(taskdata);
1803#if ENABLE_LIBOMPTARGET
1804 if (taskdata->td_target_data.async_handle != NULL) {
1808 __tgt_target_nowait_query(&taskdata->td_target_data.async_handle);
1811 if (task->routine != NULL) {
1812#ifdef KMP_GOMP_COMPAT
1813 if (taskdata->td_flags.native) {
1814 ((void (*)(
void *))(*(task->routine)))(task->shareds);
1818 (*(task->routine))(gtid, task);
1821 KMP_POP_PARTITIONED_TIMER();
1823#if USE_ITT_BUILD && USE_ITT_NOTIFY
1824 if (kmp_itt_count_task) {
1826 thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
1828 KMP_FSYNC_CANCEL(taskdata);
1829 KMP_FSYNC_RELEASING(taskdata->td_parent);
1834 if (ompd_state & OMPD_ENABLE_BP)
1839 if (taskdata->td_flags.proxy != TASK_PROXY) {
1841 if (UNLIKELY(ompt_enabled.enabled)) {
1842 thread->th.ompt_thread_info = oldInfo;
1843 if (taskdata->td_flags.tiedness == TASK_TIED) {
1844 taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
1846 __kmp_task_finish<true>(gtid, task, current_task);
1849 __kmp_task_finish<false>(gtid, task, current_task);
1854 (
"__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n",
1855 gtid, taskdata, current_task));
1869kmp_int32 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid,
1870 kmp_task_t *new_task) {
1871 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1873 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid,
1874 loc_ref, new_taskdata));
1877 kmp_taskdata_t *parent;
1878 if (UNLIKELY(ompt_enabled.enabled)) {
1879 parent = new_taskdata->td_parent;
1880 if (ompt_enabled.ompt_callback_task_create) {
1881 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1882 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
1883 &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0,
1884 OMPT_GET_RETURN_ADDRESS(0));
1892 if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1894 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1895 new_taskdata->td_flags.task_serial = 1;
1896 __kmp_invoke_task(gtid, new_task, current_task);
1901 (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1902 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
1903 gtid, loc_ref, new_taskdata));
1906 if (UNLIKELY(ompt_enabled.enabled)) {
1907 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
1910 return TASK_CURRENT_NOT_QUEUED;
1924kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,
1925 bool serialize_immediate) {
1926 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1930 if (new_taskdata->td_flags.proxy == TASK_PROXY ||
1931 __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED)
1933 kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task;
1934 if (serialize_immediate)
1935 new_taskdata->td_flags.task_serial = 1;
1936 __kmp_invoke_task(gtid, new_task, current_task);
1937 }
else if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1938 __kmp_wpolicy_passive) {
1939 kmp_info_t *this_thr = __kmp_threads[gtid];
1940 kmp_team_t *team = this_thr->th.th_team;
1941 kmp_int32 nthreads = this_thr->th.th_team_nproc;
1942 for (
int i = 0; i < nthreads; ++i) {
1943 kmp_info_t *thread = team->t.t_threads[i];
1944 if (thread == this_thr)
1946 if (thread->th.th_sleep_loc != NULL) {
1947 __kmp_null_resume_wrapper(thread);
1952 return TASK_CURRENT_NOT_QUEUED;
1967kmp_int32 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid,
1968 kmp_task_t *new_task) {
1970 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
1972#if KMP_DEBUG || OMPT_SUPPORT
1973 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1975 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
1977 __kmp_assert_valid_gtid(gtid);
1980 kmp_taskdata_t *parent = NULL;
1981 if (UNLIKELY(ompt_enabled.enabled)) {
1982 if (!new_taskdata->td_flags.started) {
1983 OMPT_STORE_RETURN_ADDRESS(gtid);
1984 parent = new_taskdata->td_parent;
1985 if (!parent->ompt_task_info.frame.enter_frame.ptr) {
1986 parent->ompt_task_info.frame.enter_frame.ptr =
1987 OMPT_GET_FRAME_ADDRESS(0);
1989 if (ompt_enabled.ompt_callback_task_create) {
1990 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
1991 &(parent->ompt_task_info.task_data),
1992 &(parent->ompt_task_info.frame),
1993 &(new_taskdata->ompt_task_info.task_data),
1994 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
1995 OMPT_LOAD_RETURN_ADDRESS(gtid));
2000 __ompt_task_finish(new_task,
2001 new_taskdata->ompt_task_info.scheduling_parent,
2003 new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
2008 res = __kmp_omp_task(gtid, new_task,
true);
2010 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2011 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2012 gtid, loc_ref, new_taskdata));
2014 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
2015 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2034kmp_int32 __kmp_omp_taskloop_task(
ident_t *loc_ref, kmp_int32 gtid,
2035 kmp_task_t *new_task,
void *codeptr_ra) {
2037 KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK);
2039#if KMP_DEBUG || OMPT_SUPPORT
2040 kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
2042 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref,
2046 kmp_taskdata_t *parent = NULL;
2047 if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
2048 parent = new_taskdata->td_parent;
2049 if (!parent->ompt_task_info.frame.enter_frame.ptr)
2050 parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2051 if (ompt_enabled.ompt_callback_task_create) {
2052 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
2053 &(parent->ompt_task_info.task_data), &(parent->ompt_task_info.frame),
2054 &(new_taskdata->ompt_task_info.task_data),
2055 ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0,
2061 res = __kmp_omp_task(gtid, new_task,
true);
2063 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning "
2064 "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
2065 gtid, loc_ref, new_taskdata));
2067 if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
2068 parent->ompt_task_info.frame.enter_frame = ompt_data_none;
2075static kmp_int32 __kmpc_omp_taskwait_template(
ident_t *loc_ref, kmp_int32 gtid,
2076 void *frame_address,
2077 void *return_address) {
2078 kmp_taskdata_t *taskdata =
nullptr;
2080 int thread_finished = FALSE;
2081 KMP_SET_THREAD_STATE_BLOCK(TASKWAIT);
2083 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref));
2084 KMP_DEBUG_ASSERT(gtid >= 0);
2086 if (__kmp_tasking_mode != tskm_immediate_exec) {
2087 thread = __kmp_threads[gtid];
2088 taskdata = thread->th.th_current_task;
2090#if OMPT_SUPPORT && OMPT_OPTIONAL
2091 ompt_data_t *my_task_data;
2092 ompt_data_t *my_parallel_data;
2095 my_task_data = &(taskdata->ompt_task_info.task_data);
2096 my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
2098 taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
2100 if (ompt_enabled.ompt_callback_sync_region) {
2101 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2102 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2103 my_task_data, return_address);
2106 if (ompt_enabled.ompt_callback_sync_region_wait) {
2107 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2108 ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
2109 my_task_data, return_address);
2119 taskdata->td_taskwait_counter += 1;
2120 taskdata->td_taskwait_ident = loc_ref;
2121 taskdata->td_taskwait_thread = gtid + 1;
2124 void *itt_sync_obj = NULL;
2126 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2131 !taskdata->td_flags.team_serial && !taskdata->td_flags.final;
2133 must_wait = must_wait || (thread->th.th_task_team != NULL &&
2134 thread->th.th_task_team->tt.tt_found_proxy_tasks);
2138 (__kmp_enable_hidden_helper && thread->th.th_task_team != NULL &&
2139 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered);
2142 kmp_flag_32<false, false> flag(
2143 RCAST(std::atomic<kmp_uint32> *,
2144 &(taskdata->td_incomplete_child_tasks)),
2146 while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) {
2147 flag.execute_tasks(thread, gtid, FALSE,
2148 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2149 __kmp_task_stealing_constraint);
2153 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2154 KMP_FSYNC_ACQUIRED(taskdata);
2159 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2161#if OMPT_SUPPORT && OMPT_OPTIONAL
2163 if (ompt_enabled.ompt_callback_sync_region_wait) {
2164 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2165 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2166 my_task_data, return_address);
2168 if (ompt_enabled.ompt_callback_sync_region) {
2169 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2170 ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
2171 my_task_data, return_address);
2173 taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
2179 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
2180 "returning TASK_CURRENT_NOT_QUEUED\n",
2183 return TASK_CURRENT_NOT_QUEUED;
2186#if OMPT_SUPPORT && OMPT_OPTIONAL
2188static kmp_int32 __kmpc_omp_taskwait_ompt(
ident_t *loc_ref, kmp_int32 gtid,
2189 void *frame_address,
2190 void *return_address) {
2191 return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address,
2198kmp_int32 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid) {
2199#if OMPT_SUPPORT && OMPT_OPTIONAL
2200 if (UNLIKELY(ompt_enabled.enabled)) {
2201 OMPT_STORE_RETURN_ADDRESS(gtid);
2202 return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
2203 OMPT_LOAD_RETURN_ADDRESS(gtid));
2206 return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
2210kmp_int32 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part) {
2211 kmp_taskdata_t *taskdata = NULL;
2213 int thread_finished = FALSE;
2216 KMP_SET_THREAD_STATE_BLOCK(TASKYIELD);
2218 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
2219 gtid, loc_ref, end_part));
2220 __kmp_assert_valid_gtid(gtid);
2222 if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) {
2223 thread = __kmp_threads[gtid];
2224 taskdata = thread->th.th_current_task;
2231 taskdata->td_taskwait_counter += 1;
2232 taskdata->td_taskwait_ident = loc_ref;
2233 taskdata->td_taskwait_thread = gtid + 1;
2236 void *itt_sync_obj = NULL;
2238 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2241 if (!taskdata->td_flags.team_serial) {
2242 kmp_task_team_t *task_team = thread->th.th_task_team;
2243 if (task_team != NULL) {
2244 if (KMP_TASKING_ENABLED(task_team)) {
2246 if (UNLIKELY(ompt_enabled.enabled))
2247 thread->th.ompt_thread_info.ompt_task_yielded = 1;
2249 __kmp_execute_tasks_32(
2250 thread, gtid, (kmp_flag_32<> *)NULL, FALSE,
2251 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2252 __kmp_task_stealing_constraint);
2254 if (UNLIKELY(ompt_enabled.enabled))
2255 thread->th.ompt_thread_info.ompt_task_yielded = 0;
2261 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2266 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2269 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
2270 "returning TASK_CURRENT_NOT_QUEUED\n",
2273 return TASK_CURRENT_NOT_QUEUED;
2294 unsigned reserved31 : 31;
2374template <
typename T>
2375void *__kmp_task_reduction_init(
int gtid,
int num, T *data) {
2376 __kmp_assert_valid_gtid(gtid);
2377 kmp_info_t *thread = __kmp_threads[gtid];
2378 kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;
2379 kmp_uint32 nth = thread->th.th_team_nproc;
2383 KMP_ASSERT(tg != NULL);
2384 KMP_ASSERT(data != NULL);
2385 KMP_ASSERT(num > 0);
2387 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n",
2391 KA_TRACE(10, (
"__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n",
2395 for (
int i = 0; i < num; ++i) {
2396 size_t size = data[i].reduce_size - 1;
2398 size += CACHE_LINE - size % CACHE_LINE;
2399 KMP_ASSERT(data[i].reduce_comb != NULL);
2402 arr[i].
flags = data[i].flags;
2406 __kmp_assign_orig<T>(arr[i], data[i]);
2407 if (!arr[i].flags.lazy_priv) {
2410 arr[i].
reduce_pend = (
char *)(arr[i].reduce_priv) + nth * size;
2411 if (arr[i].reduce_init != NULL) {
2413 for (
size_t j = 0; j < nth; ++j) {
2414 __kmp_call_init<T>(arr[i], j * size);
2421 arr[i].
reduce_priv = __kmp_allocate(nth *
sizeof(
void *));
2424 tg->reduce_data = (
void *)arr;
2425 tg->reduce_num_data = num;
2464template <
typename T>
2465void __kmp_task_reduction_init_copy(kmp_info_t *thr,
int num, T *data,
2466 kmp_taskgroup_t *tg,
void *reduce_data) {
2468 KA_TRACE(20, (
"__kmp_task_reduction_init_copy: Th %p, init taskgroup %p,"
2470 thr, tg, reduce_data));
2475 for (
int i = 0; i < num; ++i) {
2478 tg->reduce_data = (
void *)arr;
2479 tg->reduce_num_data = num;
2492 __kmp_assert_valid_gtid(gtid);
2493 kmp_info_t *thread = __kmp_threads[gtid];
2494 kmp_int32 nth = thread->th.th_team_nproc;
2498 kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp;
2500 tg = thread->th.th_current_task->td_taskgroup;
2501 KMP_ASSERT(tg != NULL);
2503 kmp_int32 num = tg->reduce_num_data;
2504 kmp_int32 tid = thread->th.th_info.ds.ds_tid;
2506 KMP_ASSERT(data != NULL);
2507 while (tg != NULL) {
2508 for (
int i = 0; i < num; ++i) {
2509 if (!arr[i].flags.lazy_priv) {
2510 if (data == arr[i].reduce_shar ||
2511 (data >= arr[i].reduce_priv && data < arr[i].reduce_pend))
2512 return (
char *)(arr[i].
reduce_priv) + tid * arr[i].reduce_size;
2515 void **p_priv = (
void **)(arr[i].reduce_priv);
2516 if (data == arr[i].reduce_shar)
2519 for (
int j = 0; j < nth; ++j)
2520 if (data == p_priv[j])
2524 if (p_priv[tid] == NULL) {
2526 p_priv[tid] = __kmp_allocate(arr[i].reduce_size);
2527 if (arr[i].reduce_init != NULL) {
2528 if (arr[i].reduce_orig != NULL) {
2530 p_priv[tid], arr[i].reduce_orig);
2532 ((void (*)(
void *))arr[i].
reduce_init)(p_priv[tid]);
2541 num = tg->reduce_num_data;
2543 KMP_ASSERT2(0,
"Unknown task reduction item");
2549static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) {
2550 kmp_int32 nth = th->th.th_team_nproc;
2551 KMP_DEBUG_ASSERT(nth > 1);
2553 kmp_int32 num = tg->reduce_num_data;
2554 for (
int i = 0; i < num; ++i) {
2556 void (*f_fini)(
void *) = (
void (*)(
void *))(arr[i].
reduce_fini);
2557 void (*f_comb)(
void *,
void *) =
2559 if (!arr[i].flags.lazy_priv) {
2562 for (
int j = 0; j < nth; ++j) {
2563 void *priv_data = (
char *)pr_data + j * size;
2564 f_comb(sh_data, priv_data);
2569 void **pr_data = (
void **)(arr[i].reduce_priv);
2570 for (
int j = 0; j < nth; ++j) {
2571 if (pr_data[j] != NULL) {
2572 f_comb(sh_data, pr_data[j]);
2575 __kmp_free(pr_data[j]);
2579 __kmp_free(arr[i].reduce_priv);
2581 __kmp_thread_free(th, arr);
2582 tg->reduce_data = NULL;
2583 tg->reduce_num_data = 0;
2589static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) {
2590 __kmp_thread_free(th, tg->reduce_data);
2591 tg->reduce_data = NULL;
2592 tg->reduce_num_data = 0;
2595template <
typename T>
2596void *__kmp_task_reduction_modifier_init(
ident_t *loc,
int gtid,
int is_ws,
2598 __kmp_assert_valid_gtid(gtid);
2599 kmp_info_t *thr = __kmp_threads[gtid];
2600 kmp_int32 nth = thr->th.th_team_nproc;
2601 __kmpc_taskgroup(loc, gtid);
2604 (
"__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n",
2605 gtid, thr->th.th_current_task->td_taskgroup));
2606 return (
void *)thr->th.th_current_task->td_taskgroup;
2608 kmp_team_t *team = thr->th.th_team;
2610 kmp_taskgroup_t *tg;
2611 reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);
2612 if (reduce_data == NULL &&
2613 __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,
2616 KMP_DEBUG_ASSERT(reduce_data == NULL);
2618 tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data);
2622 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0);
2623 KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0);
2624 KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data);
2627 (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) ==
2631 KMP_DEBUG_ASSERT(reduce_data > (
void *)1);
2632 tg = thr->th.th_current_task->td_taskgroup;
2633 __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data);
2655 int num,
void *data) {
2656 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2676 return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num,
2689 __kmpc_end_taskgroup(loc, gtid);
2693void __kmpc_taskgroup(
ident_t *loc,
int gtid) {
2694 __kmp_assert_valid_gtid(gtid);
2695 kmp_info_t *thread = __kmp_threads[gtid];
2696 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2697 kmp_taskgroup_t *tg_new =
2698 (kmp_taskgroup_t *)__kmp_thread_malloc(thread,
sizeof(kmp_taskgroup_t));
2699 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new));
2700 KMP_ATOMIC_ST_RLX(&tg_new->count, 0);
2701 KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq);
2702 tg_new->parent = taskdata->td_taskgroup;
2703 tg_new->reduce_data = NULL;
2704 tg_new->reduce_num_data = 0;
2705 tg_new->gomp_data = NULL;
2706 taskdata->td_taskgroup = tg_new;
2708#if OMPT_SUPPORT && OMPT_OPTIONAL
2709 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2710 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2712 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2713 kmp_team_t *team = thread->th.th_team;
2714 ompt_data_t my_task_data = taskdata->ompt_task_info.task_data;
2716 ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data;
2718 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2719 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2720 &(my_task_data), codeptr);
2727void __kmpc_end_taskgroup(
ident_t *loc,
int gtid) {
2728 __kmp_assert_valid_gtid(gtid);
2729 kmp_info_t *thread = __kmp_threads[gtid];
2730 kmp_taskdata_t *taskdata = thread->th.th_current_task;
2731 kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup;
2732 int thread_finished = FALSE;
2734#if OMPT_SUPPORT && OMPT_OPTIONAL
2736 ompt_data_t my_task_data;
2737 ompt_data_t my_parallel_data;
2738 void *codeptr =
nullptr;
2739 if (UNLIKELY(ompt_enabled.enabled)) {
2740 team = thread->th.th_team;
2741 my_task_data = taskdata->ompt_task_info.task_data;
2743 my_parallel_data = team->t.ompt_team_info.parallel_data;
2744 codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2746 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2750 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc));
2751 KMP_DEBUG_ASSERT(taskgroup != NULL);
2752 KMP_SET_THREAD_STATE_BLOCK(TASKGROUP);
2754 if (__kmp_tasking_mode != tskm_immediate_exec) {
2756 taskdata->td_taskwait_counter += 1;
2757 taskdata->td_taskwait_ident = loc;
2758 taskdata->td_taskwait_thread = gtid + 1;
2762 void *itt_sync_obj = NULL;
2764 KMP_ITT_TASKWAIT_STARTING(itt_sync_obj);
2768#if OMPT_SUPPORT && OMPT_OPTIONAL
2769 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2770 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2771 ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data),
2772 &(my_task_data), codeptr);
2776 if (!taskdata->td_flags.team_serial ||
2777 (thread->th.th_task_team != NULL &&
2778 (thread->th.th_task_team->tt.tt_found_proxy_tasks ||
2779 thread->th.th_task_team->tt.tt_hidden_helper_task_encountered))) {
2780 kmp_flag_32<false, false> flag(
2781 RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), 0U);
2782 while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) {
2783 flag.execute_tasks(thread, gtid, FALSE,
2784 &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj),
2785 __kmp_task_stealing_constraint);
2788 taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread;
2790#if OMPT_SUPPORT && OMPT_OPTIONAL
2791 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) {
2792 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
2793 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2794 &(my_task_data), codeptr);
2799 KMP_ITT_TASKWAIT_FINISHED(itt_sync_obj);
2800 KMP_FSYNC_ACQUIRED(taskdata);
2803 KMP_DEBUG_ASSERT(taskgroup->count == 0);
2805 if (taskgroup->reduce_data != NULL &&
2806 !taskgroup->gomp_data) {
2809 kmp_team_t *t = thread->th.th_team;
2813 if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL &&
2816 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]);
2817 if (cnt == thread->th.th_team_nproc - 1) {
2820 __kmp_task_reduction_fini(thread, taskgroup);
2823 __kmp_thread_free(thread, reduce_data);
2824 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL);
2825 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0);
2829 __kmp_task_reduction_clean(thread, taskgroup);
2831 }
else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) !=
2835 cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]);
2836 if (cnt == thread->th.th_team_nproc - 1) {
2838 __kmp_task_reduction_fini(thread, taskgroup);
2841 __kmp_thread_free(thread, reduce_data);
2842 KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL);
2843 KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0);
2847 __kmp_task_reduction_clean(thread, taskgroup);
2851 __kmp_task_reduction_fini(thread, taskgroup);
2855 taskdata->td_taskgroup = taskgroup->parent;
2856 __kmp_thread_free(thread, taskgroup);
2858 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n",
2861#if OMPT_SUPPORT && OMPT_OPTIONAL
2862 if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) {
2863 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
2864 ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data),
2865 &(my_task_data), codeptr);
2870static kmp_task_t *__kmp_get_priority_task(kmp_int32 gtid,
2871 kmp_task_team_t *task_team,
2872 kmp_int32 is_constrained) {
2873 kmp_task_t *task = NULL;
2874 kmp_taskdata_t *taskdata;
2875 kmp_taskdata_t *current;
2876 kmp_thread_data_t *thread_data;
2877 int ntasks = task_team->tt.tt_num_task_pri;
2880 20, (
"__kmp_get_priority_task(exit #1): T#%d No tasks to get\n", gtid));
2885 if (__kmp_atomic_compare_store(&task_team->tt.tt_num_task_pri, ntasks,
2888 }
while (ntasks > 0);
2890 KA_TRACE(20, (
"__kmp_get_priority_task(exit #2): T#%d No tasks to get\n",
2896 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
2898 KMP_ASSERT(list != NULL);
2899 thread_data = &list->td;
2900 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2901 deque_ntasks = thread_data->td.td_deque_ntasks;
2902 if (deque_ntasks == 0) {
2903 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2904 KA_TRACE(20, (
"__kmp_get_priority_task: T#%d No tasks to get from %p\n",
2905 __kmp_get_gtid(), thread_data));
2908 }
while (deque_ntasks == 0);
2909 KMP_DEBUG_ASSERT(deque_ntasks);
2910 int target = thread_data->td.td_deque_head;
2911 current = __kmp_threads[gtid]->th.th_current_task;
2912 taskdata = thread_data->td.td_deque[target];
2913 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2915 thread_data->td.td_deque_head =
2916 (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2918 if (!task_team->tt.tt_untied_task_encountered) {
2920 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2921 KA_TRACE(20, (
"__kmp_get_priority_task(exit #3): T#%d could not get task "
2922 "from %p: task_team=%p ntasks=%d head=%u tail=%u\n",
2923 gtid, thread_data, task_team, deque_ntasks, target,
2924 thread_data->td.td_deque_tail));
2925 task_team->tt.tt_num_task_pri++;
2931 for (i = 1; i < deque_ntasks; ++i) {
2932 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2933 taskdata = thread_data->td.td_deque[target];
2934 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
2940 if (taskdata == NULL) {
2942 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2944 10, (
"__kmp_get_priority_task(exit #4): T#%d could not get task from "
2945 "%p: task_team=%p ntasks=%d head=%u tail=%u\n",
2946 gtid, thread_data, task_team, deque_ntasks,
2947 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2948 task_team->tt.tt_num_task_pri++;
2952 for (i = i + 1; i < deque_ntasks; ++i) {
2954 target = (target + 1) & TASK_DEQUE_MASK(thread_data->td);
2955 thread_data->td.td_deque[prev] = thread_data->td.td_deque[target];
2959 thread_data->td.td_deque_tail ==
2960 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(thread_data->td)));
2961 thread_data->td.td_deque_tail = target;
2963 thread_data->td.td_deque_ntasks = deque_ntasks - 1;
2964 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
2965 task = KMP_TASKDATA_TO_TASK(taskdata);
2970static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid,
2971 kmp_task_team_t *task_team,
2972 kmp_int32 is_constrained) {
2974 kmp_taskdata_t *taskdata;
2975 kmp_thread_data_t *thread_data;
2978 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
2979 KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data !=
2982 thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)];
2984 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
2985 gtid, thread_data->td.td_deque_ntasks,
2986 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2988 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
2990 (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: "
2991 "ntasks=%d head=%u tail=%u\n",
2992 gtid, thread_data->td.td_deque_ntasks,
2993 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
2997 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
2999 if (TCR_4(thread_data->td.td_deque_ntasks) == 0) {
3000 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3002 (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: "
3003 "ntasks=%d head=%u tail=%u\n",
3004 gtid, thread_data->td.td_deque_ntasks,
3005 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3009 tail = (thread_data->td.td_deque_tail - 1) &
3010 TASK_DEQUE_MASK(thread_data->td);
3011 taskdata = thread_data->td.td_deque[tail];
3013 if (!__kmp_task_is_allowed(gtid, is_constrained, taskdata,
3014 thread->th.th_current_task)) {
3016 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3018 (
"__kmp_remove_my_task(exit #3): T#%d TSC blocks tail task: "
3019 "ntasks=%d head=%u tail=%u\n",
3020 gtid, thread_data->td.td_deque_ntasks,
3021 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3025 thread_data->td.td_deque_tail = tail;
3026 TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1);
3028 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3030 KA_TRACE(10, (
"__kmp_remove_my_task(exit #4): T#%d task %p removed: "
3031 "ntasks=%d head=%u tail=%u\n",
3032 gtid, taskdata, thread_data->td.td_deque_ntasks,
3033 thread_data->td.td_deque_head, thread_data->td.td_deque_tail));
3035 task = KMP_TASKDATA_TO_TASK(taskdata);
3042static kmp_task_t *__kmp_steal_task(kmp_info_t *victim_thr, kmp_int32 gtid,
3043 kmp_task_team_t *task_team,
3044 std::atomic<kmp_int32> *unfinished_threads,
3045 int *thread_finished,
3046 kmp_int32 is_constrained) {
3048 kmp_taskdata_t *taskdata;
3049 kmp_taskdata_t *current;
3050 kmp_thread_data_t *victim_td, *threads_data;
3052 kmp_int32 victim_tid;
3054 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3056 threads_data = task_team->tt.tt_threads_data;
3057 KMP_DEBUG_ASSERT(threads_data != NULL);
3059 victim_tid = victim_thr->th.th_info.ds.ds_tid;
3060 victim_td = &threads_data[victim_tid];
3062 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: "
3063 "task_team=%p ntasks=%d head=%u tail=%u\n",
3064 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3065 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3066 victim_td->td.td_deque_tail));
3068 if (TCR_4(victim_td->td.td_deque_ntasks) == 0) {
3069 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: "
3070 "task_team=%p ntasks=%d head=%u tail=%u\n",
3071 gtid, __kmp_gtid_from_thread(victim_thr), task_team,
3072 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
3073 victim_td->td.td_deque_tail));
3077 __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock);
3079 int ntasks = TCR_4(victim_td->td.td_deque_ntasks);
3082 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3083 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: "
3084 "task_team=%p ntasks=%d head=%u tail=%u\n",
3085 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3086 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3090 KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL);
3091 current = __kmp_threads[gtid]->th.th_current_task;
3092 taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head];
3093 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3095 victim_td->td.td_deque_head =
3096 (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td);
3098 if (!task_team->tt.tt_untied_task_encountered) {
3100 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3101 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d could not steal from "
3102 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3103 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3104 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3109 target = victim_td->td.td_deque_head;
3111 for (i = 1; i < ntasks; ++i) {
3112 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3113 taskdata = victim_td->td.td_deque[target];
3114 if (__kmp_task_is_allowed(gtid, is_constrained, taskdata, current)) {
3120 if (taskdata == NULL) {
3122 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3123 KA_TRACE(10, (
"__kmp_steal_task(exit #4): T#%d could not steal from "
3124 "T#%d: task_team=%p ntasks=%d head=%u tail=%u\n",
3125 gtid, __kmp_gtid_from_thread(victim_thr), task_team, ntasks,
3126 victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3130 for (i = i + 1; i < ntasks; ++i) {
3132 target = (target + 1) & TASK_DEQUE_MASK(victim_td->td);
3133 victim_td->td.td_deque[prev] = victim_td->td.td_deque[target];
3137 victim_td->td.td_deque_tail ==
3138 (kmp_uint32)((target + 1) & TASK_DEQUE_MASK(victim_td->td)));
3139 victim_td->td.td_deque_tail = target;
3141 if (*thread_finished) {
3148 KMP_ATOMIC_INC(unfinished_threads);
3151 (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
3152 gtid, count + 1, task_team));
3153 *thread_finished = FALSE;
3155 TCW_4(victim_td->td.td_deque_ntasks, ntasks - 1);
3157 __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock);
3161 (
"__kmp_steal_task(exit #5): T#%d stole task %p from T#%d: "
3162 "task_team=%p ntasks=%d head=%u tail=%u\n",
3163 gtid, taskdata, __kmp_gtid_from_thread(victim_thr), task_team,
3164 ntasks, victim_td->td.td_deque_head, victim_td->td.td_deque_tail));
3166 task = KMP_TASKDATA_TO_TASK(taskdata);
3180static inline int __kmp_execute_tasks_template(
3181 kmp_info_t *thread, kmp_int32 gtid, C *flag,
int final_spin,
3182 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3183 kmp_int32 is_constrained) {
3184 kmp_task_team_t *task_team = thread->th.th_task_team;
3185 kmp_thread_data_t *threads_data;
3187 kmp_info_t *other_thread;
3188 kmp_taskdata_t *current_task = thread->th.th_current_task;
3189 std::atomic<kmp_int32> *unfinished_threads;
3190 kmp_int32 nthreads, victim_tid = -2, use_own_tasks = 1, new_victim = 0,
3191 tid = thread->th.th_info.ds.ds_tid;
3193 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3194 KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]);
3196 if (task_team == NULL || current_task == NULL)
3199 KA_TRACE(15, (
"__kmp_execute_tasks_template(enter): T#%d final_spin=%d "
3200 "*thread_finished=%d\n",
3201 gtid, final_spin, *thread_finished));
3203 thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
3204 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3206 KMP_DEBUG_ASSERT(threads_data != NULL);
3208 nthreads = task_team->tt.tt_nproc;
3209 unfinished_threads = &(task_team->tt.tt_unfinished_threads);
3210 KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks ||
3211 task_team->tt.tt_hidden_helper_task_encountered);
3212 KMP_DEBUG_ASSERT(*unfinished_threads >= 0);
3218 if (task_team->tt.tt_num_task_pri) {
3219 task = __kmp_get_priority_task(gtid, task_team, is_constrained);
3221 if (task == NULL && use_own_tasks) {
3222 task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained);
3224 if ((task == NULL) && (nthreads > 1)) {
3228 if (victim_tid == -2) {
3229 victim_tid = threads_data[tid].td.td_deque_last_stolen;
3232 other_thread = threads_data[victim_tid].td.td_thr;
3234 if (victim_tid != -1) {
3236 }
else if (!new_victim) {
3242 victim_tid = __kmp_get_random(thread) % (nthreads - 1);
3243 if (victim_tid >= tid) {
3247 other_thread = threads_data[victim_tid].td.td_thr;
3257 if ((__kmp_tasking_mode == tskm_task_teams) &&
3258 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
3259 (TCR_PTR(CCAST(
void *, other_thread->th.th_sleep_loc)) !=
3262 __kmp_null_resume_wrapper(other_thread);
3275 task = __kmp_steal_task(other_thread, gtid, task_team,
3276 unfinished_threads, thread_finished,
3280 if (threads_data[tid].td.td_deque_last_stolen != victim_tid) {
3281 threads_data[tid].td.td_deque_last_stolen = victim_tid;
3288 KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1);
3297#if USE_ITT_BUILD && USE_ITT_NOTIFY
3298 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
3299 if (itt_sync_obj == NULL) {
3301 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
3303 __kmp_itt_task_starting(itt_sync_obj);
3306 __kmp_invoke_task(gtid, task, current_task);
3308 if (itt_sync_obj != NULL)
3309 __kmp_itt_task_finished(itt_sync_obj);
3316 if (flag == NULL || (!final_spin && flag->done_check())) {
3319 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3323 if (thread->th.th_task_team == NULL) {
3326 KMP_YIELD(__kmp_library == library_throughput);
3329 if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) {
3330 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d stolen task spawned "
3331 "other tasks, restart\n",
3342 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks) == 0) {
3346 if (!*thread_finished) {
3348 kmp_int32 count = -1 +
3350 KMP_ATOMIC_DEC(unfinished_threads);
3351 KA_TRACE(20, (
"__kmp_execute_tasks_template: T#%d dec "
3352 "unfinished_threads to %d task_team=%p\n",
3353 gtid, count, task_team));
3354 *thread_finished = TRUE;
3362 if (flag != NULL && flag->done_check()) {
3365 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3373 if (thread->th.th_task_team == NULL) {
3375 (
"__kmp_execute_tasks_template: T#%d no more tasks\n", gtid));
3384 if (flag == NULL || (!final_spin && flag->done_check())) {
3386 (
"__kmp_execute_tasks_template: T#%d spin condition satisfied\n",
3393 if (nthreads == 1 &&
3394 KMP_ATOMIC_LD_ACQ(¤t_task->td_incomplete_child_tasks))
3398 (
"__kmp_execute_tasks_template: T#%d can't find work\n", gtid));
3404template <
bool C,
bool S>
3405int __kmp_execute_tasks_32(
3406 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32<C, S> *flag,
int final_spin,
3407 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3408 kmp_int32 is_constrained) {
3409 return __kmp_execute_tasks_template(
3410 thread, gtid, flag, final_spin,
3411 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3414template <
bool C,
bool S>
3415int __kmp_execute_tasks_64(
3416 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64<C, S> *flag,
int final_spin,
3417 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3418 kmp_int32 is_constrained) {
3419 return __kmp_execute_tasks_template(
3420 thread, gtid, flag, final_spin,
3421 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3424template <
bool C,
bool S>
3425int __kmp_atomic_execute_tasks_64(
3426 kmp_info_t *thread, kmp_int32 gtid, kmp_atomic_flag_64<C, S> *flag,
3427 int final_spin,
int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3428 kmp_int32 is_constrained) {
3429 return __kmp_execute_tasks_template(
3430 thread, gtid, flag, final_spin,
3431 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3434int __kmp_execute_tasks_oncore(
3435 kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag,
int final_spin,
3436 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
3437 kmp_int32 is_constrained) {
3438 return __kmp_execute_tasks_template(
3439 thread, gtid, flag, final_spin,
3440 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
3444__kmp_execute_tasks_32<false, false>(kmp_info_t *, kmp_int32,
3445 kmp_flag_32<false, false> *,
int,
3446 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3448template int __kmp_execute_tasks_64<false, true>(kmp_info_t *, kmp_int32,
3449 kmp_flag_64<false, true> *,
3451 int *USE_ITT_BUILD_ARG(
void *),
3454template int __kmp_execute_tasks_64<true, false>(kmp_info_t *, kmp_int32,
3455 kmp_flag_64<true, false> *,
3457 int *USE_ITT_BUILD_ARG(
void *),
3460template int __kmp_atomic_execute_tasks_64<false, true>(
3461 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<false, true> *,
int,
3462 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3464template int __kmp_atomic_execute_tasks_64<true, false>(
3465 kmp_info_t *, kmp_int32, kmp_atomic_flag_64<true, false> *,
int,
3466 int *USE_ITT_BUILD_ARG(
void *), kmp_int32);
3471static void __kmp_enable_tasking(kmp_task_team_t *task_team,
3472 kmp_info_t *this_thr) {
3473 kmp_thread_data_t *threads_data;
3474 int nthreads, i, is_init_thread;
3476 KA_TRACE(10, (
"__kmp_enable_tasking(enter): T#%d\n",
3477 __kmp_gtid_from_thread(this_thr)));
3479 KMP_DEBUG_ASSERT(task_team != NULL);
3480 KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL);
3482 nthreads = task_team->tt.tt_nproc;
3483 KMP_DEBUG_ASSERT(nthreads > 0);
3484 KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc);
3487 is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team);
3489 if (!is_init_thread) {
3493 (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
3494 __kmp_gtid_from_thread(this_thr)));
3497 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data);
3498 KMP_DEBUG_ASSERT(threads_data != NULL);
3500 if (__kmp_tasking_mode == tskm_task_teams &&
3501 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) {
3505 for (i = 0; i < nthreads; i++) {
3507 kmp_info_t *thread = threads_data[i].td.td_thr;
3509 if (i == this_thr->th.th_info.ds.ds_tid) {
3518 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3520 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
3521 __kmp_gtid_from_thread(this_thr),
3522 __kmp_gtid_from_thread(thread)));
3523 __kmp_null_resume_wrapper(thread);
3525 KF_TRACE(50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
3526 __kmp_gtid_from_thread(this_thr),
3527 __kmp_gtid_from_thread(thread)));
3532 KA_TRACE(10, (
"__kmp_enable_tasking(exit): T#%d\n",
3533 __kmp_gtid_from_thread(this_thr)));
3566static kmp_task_team_t *__kmp_free_task_teams =
3569kmp_bootstrap_lock_t __kmp_task_team_lock =
3570 KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock);
3577static void __kmp_alloc_task_deque(kmp_info_t *thread,
3578 kmp_thread_data_t *thread_data) {
3579 __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock);
3580 KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL);
3583 thread_data->td.td_deque_last_stolen = -1;
3585 KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0);
3586 KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0);
3587 KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0);
3591 (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
3592 __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data));
3596 thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate(
3597 INITIAL_TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
3598 thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE;
3604static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) {
3605 if (thread_data->td.td_deque != NULL) {
3606 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
3607 TCW_4(thread_data->td.td_deque_ntasks, 0);
3608 __kmp_free(thread_data->td.td_deque);
3609 thread_data->td.td_deque = NULL;
3610 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
3613#ifdef BUILD_TIED_TASK_STACK
3615 if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) {
3616 __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data);
3628static int __kmp_realloc_task_threads_data(kmp_info_t *thread,
3629 kmp_task_team_t *task_team) {
3630 kmp_thread_data_t **threads_data_p;
3631 kmp_int32 nthreads, maxthreads;
3632 int is_init_thread = FALSE;
3634 if (TCR_4(task_team->tt.tt_found_tasks)) {
3639 threads_data_p = &task_team->tt.tt_threads_data;
3640 nthreads = task_team->tt.tt_nproc;
3641 maxthreads = task_team->tt.tt_max_threads;
3646 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3648 if (!TCR_4(task_team->tt.tt_found_tasks)) {
3650 kmp_team_t *team = thread->th.th_team;
3653 is_init_thread = TRUE;
3654 if (maxthreads < nthreads) {
3656 if (*threads_data_p != NULL) {
3657 kmp_thread_data_t *old_data = *threads_data_p;
3658 kmp_thread_data_t *new_data = NULL;
3662 (
"__kmp_realloc_task_threads_data: T#%d reallocating "
3663 "threads data for task_team %p, new_size = %d, old_size = %d\n",
3664 __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads));
3669 new_data = (kmp_thread_data_t *)__kmp_allocate(
3670 nthreads *
sizeof(kmp_thread_data_t));
3672 KMP_MEMCPY_S((
void *)new_data, nthreads *
sizeof(kmp_thread_data_t),
3673 (
void *)old_data, maxthreads *
sizeof(kmp_thread_data_t));
3675#ifdef BUILD_TIED_TASK_STACK
3677 for (i = maxthreads; i < nthreads; i++) {
3678 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3679 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3683 (*threads_data_p) = new_data;
3684 __kmp_free(old_data);
3686 KE_TRACE(10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
3687 "threads data for task_team %p, size = %d\n",
3688 __kmp_gtid_from_thread(thread), task_team, nthreads));
3692 *threads_data_p = (kmp_thread_data_t *)__kmp_allocate(
3693 nthreads *
sizeof(kmp_thread_data_t));
3694#ifdef BUILD_TIED_TASK_STACK
3696 for (i = 0; i < nthreads; i++) {
3697 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3698 __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data);
3702 task_team->tt.tt_max_threads = nthreads;
3705 KMP_DEBUG_ASSERT(*threads_data_p != NULL);
3709 for (i = 0; i < nthreads; i++) {
3710 kmp_thread_data_t *thread_data = &(*threads_data_p)[i];
3711 thread_data->td.td_thr = team->t.t_threads[i];
3713 if (thread_data->td.td_deque_last_stolen >= nthreads) {
3717 thread_data->td.td_deque_last_stolen = -1;
3722 TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE);
3725 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3726 return is_init_thread;
3732static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) {
3733 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock);
3734 if (task_team->tt.tt_threads_data != NULL) {
3736 for (i = 0; i < task_team->tt.tt_max_threads; i++) {
3737 __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]);
3739 __kmp_free(task_team->tt.tt_threads_data);
3740 task_team->tt.tt_threads_data = NULL;
3742 __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock);
3748static void __kmp_free_task_pri_list(kmp_task_team_t *task_team) {
3749 __kmp_acquire_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3750 if (task_team->tt.tt_task_pri_list != NULL) {
3751 kmp_task_pri_t *list = task_team->tt.tt_task_pri_list;
3752 while (list != NULL) {
3753 kmp_task_pri_t *next = list->next;
3754 __kmp_free_task_deque(&list->td);
3758 task_team->tt.tt_task_pri_list = NULL;
3760 __kmp_release_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3767static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
3769 kmp_task_team_t *task_team = NULL;
3772 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
3773 (thread ? __kmp_gtid_from_thread(thread) : -1), team));
3775 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3777 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3778 if (__kmp_free_task_teams != NULL) {
3779 task_team = __kmp_free_task_teams;
3780 TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next);
3781 task_team->tt.tt_next = NULL;
3783 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3786 if (task_team == NULL) {
3787 KE_TRACE(10, (
"__kmp_allocate_task_team: T#%d allocating "
3788 "task team for team %p\n",
3789 __kmp_gtid_from_thread(thread), team));
3792 task_team = (kmp_task_team_t *)__kmp_allocate(
sizeof(kmp_task_team_t));
3793 __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
3794 __kmp_init_bootstrap_lock(&task_team->tt.tt_task_pri_lock);
3795#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
3798 __itt_suppress_mark_range(
3799 __itt_suppress_range, __itt_suppress_threading_errors,
3800 &task_team->tt.tt_found_tasks,
sizeof(task_team->tt.tt_found_tasks));
3801 __itt_suppress_mark_range(__itt_suppress_range,
3802 __itt_suppress_threading_errors,
3803 CCAST(kmp_uint32 *, &task_team->tt.tt_active),
3804 sizeof(task_team->tt.tt_active));
3812 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3813 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3814 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3815 task_team->tt.tt_nproc = nthreads = team->t.t_nproc;
3817 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads, nthreads);
3818 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3819 TCW_4(task_team->tt.tt_active, TRUE);
3821 KA_TRACE(20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p "
3822 "unfinished_threads init'd to %d\n",
3823 (thread ? __kmp_gtid_from_thread(thread) : -1), task_team,
3824 KMP_ATOMIC_LD_RLX(&task_team->tt.tt_unfinished_threads)));
3831void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) {
3832 KA_TRACE(20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
3833 thread ? __kmp_gtid_from_thread(thread) : -1, task_team));
3836 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3838 KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL);
3839 task_team->tt.tt_next = __kmp_free_task_teams;
3840 TCW_PTR(__kmp_free_task_teams, task_team);
3842 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3850void __kmp_reap_task_teams(
void) {
3851 kmp_task_team_t *task_team;
3853 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
3855 __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock);
3856 while ((task_team = __kmp_free_task_teams) != NULL) {
3857 __kmp_free_task_teams = task_team->tt.tt_next;
3858 task_team->tt.tt_next = NULL;
3861 if (task_team->tt.tt_threads_data != NULL) {
3862 __kmp_free_task_threads_data(task_team);
3864 if (task_team->tt.tt_task_pri_list != NULL) {
3865 __kmp_free_task_pri_list(task_team);
3867 __kmp_free(task_team);
3869 __kmp_release_bootstrap_lock(&__kmp_task_team_lock);
3876void __kmp_wait_to_unref_task_teams(
void) {
3882 KMP_INIT_YIELD(spins);
3883 KMP_INIT_BACKOFF(time);
3891 for (thread = CCAST(kmp_info_t *, __kmp_thread_pool); thread != NULL;
3892 thread = thread->th.th_next_pool) {
3896 if (TCR_PTR(thread->th.th_task_team) == NULL) {
3897 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
3898 __kmp_gtid_from_thread(thread)));
3903 if (!__kmp_is_thread_alive(thread, &exit_val)) {
3904 thread->th.th_task_team = NULL;
3911 KA_TRACE(10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to "
3912 "unreference task_team\n",
3913 __kmp_gtid_from_thread(thread)));
3915 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
3918 if ((sleep_loc = TCR_PTR(CCAST(
void *, thread->th.th_sleep_loc))) !=
3922 (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
3923 __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread)));
3924 __kmp_null_resume_wrapper(thread);
3933 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
3939void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team,
int always) {
3940 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
3946 if (team->t.t_task_team[this_thr->th.th_task_state] == NULL &&
3947 (always || team->t.t_nproc > 1)) {
3948 team->t.t_task_team[this_thr->th.th_task_state] =
3949 __kmp_allocate_task_team(this_thr, team);
3950 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created new task_team %p"
3951 " for team %d at parity=%d\n",
3952 __kmp_gtid_from_thread(this_thr),
3953 team->t.t_task_team[this_thr->th.th_task_state], team->t.t_id,
3954 this_thr->th.th_task_state));
3964 if (team->t.t_nproc > 1) {
3965 int other_team = 1 - this_thr->th.th_task_state;
3966 KMP_DEBUG_ASSERT(other_team >= 0 && other_team < 2);
3967 if (team->t.t_task_team[other_team] == NULL) {
3968 team->t.t_task_team[other_team] =
3969 __kmp_allocate_task_team(this_thr, team);
3970 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d created second new "
3971 "task_team %p for team %d at parity=%d\n",
3972 __kmp_gtid_from_thread(this_thr),
3973 team->t.t_task_team[other_team], team->t.t_id, other_team));
3976 kmp_task_team_t *task_team = team->t.t_task_team[other_team];
3977 if (!task_team->tt.tt_active ||
3978 team->t.t_nproc != task_team->tt.tt_nproc) {
3979 TCW_4(task_team->tt.tt_nproc, team->t.t_nproc);
3980 TCW_4(task_team->tt.tt_found_tasks, FALSE);
3981 TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE);
3982 TCW_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
3983 KMP_ATOMIC_ST_REL(&task_team->tt.tt_unfinished_threads,
3985 TCW_4(task_team->tt.tt_active, TRUE);
3989 KA_TRACE(20, (
"__kmp_task_team_setup: Primary T#%d reset next task_team "
3990 "%p for team %d at parity=%d\n",
3991 __kmp_gtid_from_thread(this_thr),
3992 team->t.t_task_team[other_team], team->t.t_id, other_team));
4000 if (this_thr == __kmp_hidden_helper_main_thread) {
4001 for (
int i = 0; i < 2; ++i) {
4002 kmp_task_team_t *task_team = team->t.t_task_team[i];
4003 if (KMP_TASKING_ENABLED(task_team)) {
4006 __kmp_enable_tasking(task_team, this_thr);
4007 for (
int j = 0; j < task_team->tt.tt_nproc; ++j) {
4008 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[j];
4009 if (thread_data->td.td_deque == NULL) {
4010 __kmp_alloc_task_deque(__kmp_hidden_helper_threads[j], thread_data);
4020void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) {
4021 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4025 this_thr->th.th_task_state = (kmp_uint8)(1 - this_thr->th.th_task_state);
4029 TCW_PTR(this_thr->th.th_task_team,
4030 team->t.t_task_team[this_thr->th.th_task_state]);
4032 (
"__kmp_task_team_sync: Thread T#%d task team switched to task_team "
4033 "%p from Team #%d (parity=%d)\n",
4034 __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team,
4035 team->t.t_id, this_thr->th.th_task_state));
4045void __kmp_task_team_wait(
4046 kmp_info_t *this_thr,
4047 kmp_team_t *team USE_ITT_BUILD_ARG(
void *itt_sync_obj),
int wait) {
4048 kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state];
4050 KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec);
4051 KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team);
4053 if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) {
4055 KA_TRACE(20, (
"__kmp_task_team_wait: Primary T#%d waiting for all tasks "
4056 "(for unfinished_threads to reach 0) on task_team = %p\n",
4057 __kmp_gtid_from_thread(this_thr), task_team));
4061 kmp_flag_32<false, false> flag(
4062 RCAST(std::atomic<kmp_uint32> *,
4063 &task_team->tt.tt_unfinished_threads),
4065 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
4071 (
"__kmp_task_team_wait: Primary T#%d deactivating task_team %p: "
4072 "setting active to false, setting local and team's pointer to NULL\n",
4073 __kmp_gtid_from_thread(this_thr), task_team));
4074 KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 ||
4075 task_team->tt.tt_found_proxy_tasks == TRUE ||
4076 task_team->tt.tt_hidden_helper_task_encountered == TRUE);
4077 TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE);
4078 TCW_SYNC_4(task_team->tt.tt_hidden_helper_task_encountered, FALSE);
4079 KMP_CHECK_UPDATE(task_team->tt.tt_untied_task_encountered, 0);
4080 TCW_SYNC_4(task_team->tt.tt_active, FALSE);
4083 TCW_PTR(this_thr->th.th_task_team, NULL);
4092void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,
int gtid) {
4093 std::atomic<kmp_uint32> *spin = RCAST(
4094 std::atomic<kmp_uint32> *,
4095 &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads);
4097 KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier);
4100 KMP_FSYNC_SPIN_INIT(spin, NULL);
4102 kmp_flag_32<false, false> spin_flag(spin, 0U);
4103 while (!spin_flag.execute_tasks(thread, gtid, TRUE,
4104 &flag USE_ITT_BUILD_ARG(NULL), 0)) {
4107 KMP_FSYNC_SPIN_PREPARE(RCAST(
void *, spin));
4110 if (TCR_4(__kmp_global.g.g_done)) {
4111 if (__kmp_global.g.g_abort)
4112 __kmp_abort_thread();
4118 KMP_FSYNC_SPIN_ACQUIRED(RCAST(
void *, spin));
4127static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task,
4129 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4130 kmp_task_team_t *task_team = taskdata->td_task_team;
4132 KA_TRACE(20, (
"__kmp_give_task: trying to give task %p to thread %d.\n",
4136 KMP_DEBUG_ASSERT(task_team != NULL);
4138 bool result =
false;
4139 kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid];
4141 if (thread_data->td.td_deque == NULL) {
4145 (
"__kmp_give_task: thread %d has no queue while giving task %p.\n",
4150 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4151 TASK_DEQUE_SIZE(thread_data->td)) {
4154 (
"__kmp_give_task: queue is full while giving task %p to thread %d.\n",
4159 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4162 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4163 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4164 TASK_DEQUE_SIZE(thread_data->td)) {
4166 __kmp_realloc_task_deque(thread, thread_data);
4171 __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
4173 if (TCR_4(thread_data->td.td_deque_ntasks) >=
4174 TASK_DEQUE_SIZE(thread_data->td)) {
4175 KA_TRACE(30, (
"__kmp_give_task: queue is full while giving task %p to "
4181 if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass)
4182 goto release_and_exit;
4184 __kmp_realloc_task_deque(thread, thread_data);
4190 thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata;
4192 thread_data->td.td_deque_tail =
4193 (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
4194 TCW_4(thread_data->td.td_deque_ntasks,
4195 TCR_4(thread_data->td.td_deque_ntasks) + 1);
4198 KA_TRACE(30, (
"__kmp_give_task: successfully gave task %p to thread %d.\n",
4202 __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
4207#define PROXY_TASK_FLAG 0x40000000
4224static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4225 KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT);
4226 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4227 KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0);
4228 KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0);
4230 taskdata->td_flags.complete = 1;
4232 if (taskdata->td_taskgroup)
4233 KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count);
4237 KMP_ATOMIC_OR(&taskdata->td_incomplete_child_tasks, PROXY_TASK_FLAG);
4240static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) {
4242 kmp_int32 children = 0;
4246 KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks);
4247 KMP_DEBUG_ASSERT(children >= 0);
4250 KMP_ATOMIC_AND(&taskdata->td_incomplete_child_tasks, ~PROXY_TASK_FLAG);
4253static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) {
4254 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4255 kmp_info_t *thread = __kmp_threads[gtid];
4257 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4258 KMP_DEBUG_ASSERT(taskdata->td_flags.complete ==
4263 while ((KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) &
4264 PROXY_TASK_FLAG) > 0)
4267 __kmp_release_deps(gtid, taskdata);
4268 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
4280 KMP_DEBUG_ASSERT(ptask != NULL);
4281 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4283 10, (
"__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n",
4285 __kmp_assert_valid_gtid(gtid);
4286 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4288 __kmp_first_top_half_finish_proxy(taskdata);
4289 __kmp_second_top_half_finish_proxy(taskdata);
4290 __kmp_bottom_half_finish_proxy(gtid, ptask);
4293 (
"__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n",
4297void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) {
4298 KMP_DEBUG_ASSERT(ptask != NULL);
4299 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4303 kmp_team_t *team = taskdata->td_team;
4304 kmp_int32 nthreads = team->t.t_nproc;
4309 kmp_int32 start_k = start % nthreads;
4311 kmp_int32 k = start_k;
4315 thread = team->t.t_threads[k];
4316 k = (k + 1) % nthreads;
4322 }
while (!__kmp_give_task(thread, k, ptask, pass));
4324 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && __kmp_wpolicy_passive) {
4326 for (
int i = 0; i < nthreads; ++i) {
4327 thread = team->t.t_threads[i];
4328 if (thread->th.th_sleep_loc != NULL) {
4329 __kmp_null_resume_wrapper(thread);
4344 KMP_DEBUG_ASSERT(ptask != NULL);
4345 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4349 (
"__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n",
4352 KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY);
4354 __kmp_first_top_half_finish_proxy(taskdata);
4356 __kmpc_give_task(ptask);
4358 __kmp_second_top_half_finish_proxy(taskdata);
4362 (
"__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n",
4366kmp_event_t *__kmpc_task_allow_completion_event(
ident_t *loc_ref,
int gtid,
4368 kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(task);
4369 if (td->td_allow_completion_event.type == KMP_EVENT_UNINITIALIZED) {
4370 td->td_allow_completion_event.type = KMP_EVENT_ALLOW_COMPLETION;
4371 td->td_allow_completion_event.ed.task = task;
4372 __kmp_init_tas_lock(&td->td_allow_completion_event.lock);
4374 return &td->td_allow_completion_event;
4377void __kmp_fulfill_event(kmp_event_t *event) {
4378 if (event->type == KMP_EVENT_ALLOW_COMPLETION) {
4379 kmp_task_t *ptask = event->ed.task;
4380 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask);
4381 bool detached =
false;
4382 int gtid = __kmp_get_gtid();
4387 __kmp_acquire_tas_lock(&event->lock, gtid);
4388 if (taskdata->td_flags.proxy == TASK_PROXY) {
4394 if (UNLIKELY(ompt_enabled.enabled))
4395 __ompt_task_finish(ptask, NULL, ompt_task_early_fulfill);
4398 event->type = KMP_EVENT_UNINITIALIZED;
4399 __kmp_release_tas_lock(&event->lock, gtid);
4405 if (UNLIKELY(ompt_enabled.enabled))
4406 __ompt_task_finish(ptask, NULL, ompt_task_late_fulfill);
4410 kmp_team_t *team = taskdata->td_team;
4411 kmp_info_t *thread = __kmp_get_thread();
4412 if (thread->th.th_team == team) {
4430kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) {
4432 kmp_taskdata_t *taskdata;
4433 kmp_taskdata_t *taskdata_src = KMP_TASK_TO_TASKDATA(task_src);
4434 kmp_taskdata_t *parent_task = taskdata_src->td_parent;
4435 size_t shareds_offset;
4438 KA_TRACE(10, (
"__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread,
4440 KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy ==
4442 KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT);
4443 task_size = taskdata_src->td_size_alloc;
4446 KA_TRACE(30, (
"__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread,
4449 taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size);
4451 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size);
4453 KMP_MEMCPY(taskdata, taskdata_src, task_size);
4455 task = KMP_TASKDATA_TO_TASK(taskdata);
4458 taskdata->td_task_id = KMP_GEN_TASK_ID();
4459 if (task->shareds != NULL) {
4460 shareds_offset = (
char *)task_src->shareds - (
char *)taskdata_src;
4461 task->shareds = &((
char *)taskdata)[shareds_offset];
4462 KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (
sizeof(
void *) - 1)) ==
4465 taskdata->td_alloc_thread = thread;
4466 taskdata->td_parent = parent_task;
4468 taskdata->td_taskgroup = parent_task->td_taskgroup;
4471 if (taskdata->td_flags.tiedness == TASK_TIED)
4472 taskdata->td_last_tied = taskdata;
4476 if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) {
4477 KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks);
4478 if (parent_task->td_taskgroup)
4479 KMP_ATOMIC_INC(&parent_task->td_taskgroup->count);
4482 if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT)
4483 KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks);
4487 (
"__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n",
4488 thread, taskdata, taskdata->td_parent));
4490 if (UNLIKELY(ompt_enabled.enabled))
4491 __ompt_task_init(taskdata, thread->th.th_info.ds.ds_gtid);
4500typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);
4502KMP_BUILD_ASSERT(
sizeof(
long) == 4 ||
sizeof(
long) == 8);
4507class kmp_taskloop_bounds_t {
4509 const kmp_taskdata_t *taskdata;
4510 size_t lower_offset;
4511 size_t upper_offset;
4514 kmp_taskloop_bounds_t(kmp_task_t *_task, kmp_uint64 *lb, kmp_uint64 *ub)
4515 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(task)),
4516 lower_offset((char *)lb - (char *)task),
4517 upper_offset((char *)ub - (char *)task) {
4518 KMP_DEBUG_ASSERT((
char *)lb > (
char *)_task);
4519 KMP_DEBUG_ASSERT((
char *)ub > (
char *)_task);
4521 kmp_taskloop_bounds_t(kmp_task_t *_task,
const kmp_taskloop_bounds_t &bounds)
4522 : task(_task), taskdata(KMP_TASK_TO_TASKDATA(_task)),
4523 lower_offset(bounds.lower_offset), upper_offset(bounds.upper_offset) {}
4524 size_t get_lower_offset()
const {
return lower_offset; }
4525 size_t get_upper_offset()
const {
return upper_offset; }
4526 kmp_uint64 get_lb()
const {
4528#if defined(KMP_GOMP_COMPAT)
4530 if (!taskdata->td_flags.native) {
4531 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4534 if (taskdata->td_size_loop_bounds == 4) {
4535 kmp_int32 *lb = RCAST(kmp_int32 *, task->shareds);
4536 retval = (kmp_int64)*lb;
4538 kmp_int64 *lb = RCAST(kmp_int64 *, task->shareds);
4539 retval = (kmp_int64)*lb;
4544 retval = *(kmp_int64 *)((
char *)task + lower_offset);
4548 kmp_uint64 get_ub()
const {
4550#if defined(KMP_GOMP_COMPAT)
4552 if (!taskdata->td_flags.native) {
4553 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4556 if (taskdata->td_size_loop_bounds == 4) {
4557 kmp_int32 *ub = RCAST(kmp_int32 *, task->shareds) + 1;
4558 retval = (kmp_int64)*ub;
4560 kmp_int64 *ub = RCAST(kmp_int64 *, task->shareds) + 1;
4561 retval = (kmp_int64)*ub;
4565 retval = *(kmp_int64 *)((
char *)task + upper_offset);
4569 void set_lb(kmp_uint64 lb) {
4570#if defined(KMP_GOMP_COMPAT)
4572 if (!taskdata->td_flags.native) {
4573 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4576 if (taskdata->td_size_loop_bounds == 4) {
4577 kmp_uint32 *lower = RCAST(kmp_uint32 *, task->shareds);
4578 *lower = (kmp_uint32)lb;
4580 kmp_uint64 *lower = RCAST(kmp_uint64 *, task->shareds);
4581 *lower = (kmp_uint64)lb;
4585 *(kmp_uint64 *)((
char *)task + lower_offset) = lb;
4588 void set_ub(kmp_uint64 ub) {
4589#if defined(KMP_GOMP_COMPAT)
4591 if (!taskdata->td_flags.native) {
4592 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4595 if (taskdata->td_size_loop_bounds == 4) {
4596 kmp_uint32 *upper = RCAST(kmp_uint32 *, task->shareds) + 1;
4597 *upper = (kmp_uint32)ub;
4599 kmp_uint64 *upper = RCAST(kmp_uint64 *, task->shareds) + 1;
4600 *upper = (kmp_uint64)ub;
4604 *(kmp_uint64 *)((
char *)task + upper_offset) = ub;
4625void __kmp_taskloop_linear(
ident_t *loc,
int gtid, kmp_task_t *task,
4626 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4627 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4628 kmp_uint64 grainsize, kmp_uint64 extras,
4629 kmp_int64 last_chunk, kmp_uint64 tc,
4635 KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling);
4636 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4638 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4639 kmp_uint64 lower = task_bounds.get_lb();
4640 kmp_uint64 upper = task_bounds.get_ub();
4642 kmp_info_t *thread = __kmp_threads[gtid];
4643 kmp_taskdata_t *current_task = thread->th.th_current_task;
4644 kmp_task_t *next_task;
4645 kmp_int32 lastpriv = 0;
4647 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4648 (last_chunk < 0 ? last_chunk : extras));
4649 KMP_DEBUG_ASSERT(num_tasks > extras);
4650 KMP_DEBUG_ASSERT(num_tasks > 0);
4651 KA_TRACE(20, (
"__kmp_taskloop_linear: T#%d: %lld tasks, grainsize %lld, "
4652 "extras %lld, last_chunk %lld, i=%lld,%lld(%d)%lld, dup %p\n",
4653 gtid, num_tasks, grainsize, extras, last_chunk, lower, upper,
4654 ub_glob, st, task_dup));
4657 for (i = 0; i < num_tasks; ++i) {
4658 kmp_uint64 chunk_minus_1;
4660 chunk_minus_1 = grainsize - 1;
4662 chunk_minus_1 = grainsize;
4665 upper = lower + st * chunk_minus_1;
4669 if (i == num_tasks - 1) {
4672 KMP_DEBUG_ASSERT(upper == *ub);
4673 if (upper == ub_glob)
4675 }
else if (st > 0) {
4676 KMP_DEBUG_ASSERT((kmp_uint64)st > *ub - upper);
4677 if ((kmp_uint64)st > ub_glob - upper)
4680 KMP_DEBUG_ASSERT(upper + st < *ub);
4681 if (upper - ub_glob < (kmp_uint64)(-st))
4685 next_task = __kmp_task_dup_alloc(thread, task);
4686 kmp_taskdata_t *next_taskdata = KMP_TASK_TO_TASKDATA(next_task);
4687 kmp_taskloop_bounds_t next_task_bounds =
4688 kmp_taskloop_bounds_t(next_task, task_bounds);
4691 next_task_bounds.set_lb(lower);
4692 if (next_taskdata->td_flags.native) {
4693 next_task_bounds.set_ub(upper + (st > 0 ? 1 : -1));
4695 next_task_bounds.set_ub(upper);
4697 if (ptask_dup != NULL)
4699 ptask_dup(next_task, task, lastpriv);
4701 (
"__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, "
4702 "upper %lld stride %lld, (offsets %p %p)\n",
4703 gtid, i, next_task, lower, upper, st,
4704 next_task_bounds.get_lower_offset(),
4705 next_task_bounds.get_upper_offset()));
4707 __kmp_omp_taskloop_task(NULL, gtid, next_task,
4710 if (ompt_enabled.ompt_callback_dispatch) {
4711 OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
4716 __kmp_omp_task(gtid, next_task,
true);
4721 __kmp_task_start(gtid, task, current_task);
4723 __kmp_task_finish<false>(gtid, task, current_task);
4728typedef struct __taskloop_params {
4735 kmp_uint64 num_tasks;
4736 kmp_uint64 grainsize;
4738 kmp_int64 last_chunk;
4740 kmp_uint64 num_t_min;
4744} __taskloop_params_t;
4746void __kmp_taskloop_recur(
ident_t *,
int, kmp_task_t *, kmp_uint64 *,
4747 kmp_uint64 *, kmp_int64, kmp_uint64, kmp_uint64,
4748 kmp_uint64, kmp_uint64, kmp_int64, kmp_uint64,
4756int __kmp_taskloop_task(
int gtid,
void *ptask) {
4757 __taskloop_params_t *p =
4758 (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds;
4759 kmp_task_t *task = p->task;
4760 kmp_uint64 *lb = p->lb;
4761 kmp_uint64 *ub = p->ub;
4762 void *task_dup = p->task_dup;
4764 kmp_int64 st = p->st;
4765 kmp_uint64 ub_glob = p->ub_glob;
4766 kmp_uint64 num_tasks = p->num_tasks;
4767 kmp_uint64 grainsize = p->grainsize;
4768 kmp_uint64 extras = p->extras;
4769 kmp_int64 last_chunk = p->last_chunk;
4770 kmp_uint64 tc = p->tc;
4771 kmp_uint64 num_t_min = p->num_t_min;
4773 void *codeptr_ra = p->codeptr_ra;
4776 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4777 KMP_DEBUG_ASSERT(task != NULL);
4779 (
"__kmp_taskloop_task: T#%d, task %p: %lld tasks, grainsize"
4780 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4781 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4784 KMP_DEBUG_ASSERT(num_tasks * 2 + 1 > num_t_min);
4785 if (num_tasks > num_t_min)
4786 __kmp_taskloop_recur(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4787 grainsize, extras, last_chunk, tc, num_t_min,
4793 __kmp_taskloop_linear(NULL, gtid, task, lb, ub, st, ub_glob, num_tasks,
4794 grainsize, extras, last_chunk, tc,
4800 KA_TRACE(40, (
"__kmp_taskloop_task(exit): T#%d\n", gtid));
4822void __kmp_taskloop_recur(
ident_t *loc,
int gtid, kmp_task_t *task,
4823 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4824 kmp_uint64 ub_glob, kmp_uint64 num_tasks,
4825 kmp_uint64 grainsize, kmp_uint64 extras,
4826 kmp_int64 last_chunk, kmp_uint64 tc,
4827 kmp_uint64 num_t_min,
4832 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4833 KMP_DEBUG_ASSERT(task != NULL);
4834 KMP_DEBUG_ASSERT(num_tasks > num_t_min);
4836 (
"__kmp_taskloop_recur: T#%d, task %p: %lld tasks, grainsize"
4837 " %lld, extras %lld, last_chunk %lld, i=%lld,%lld(%d), dup %p\n",
4838 gtid, taskdata, num_tasks, grainsize, extras, last_chunk, *lb, *ub,
4840 p_task_dup_t ptask_dup = (p_task_dup_t)task_dup;
4841 kmp_uint64 lower = *lb;
4842 kmp_info_t *thread = __kmp_threads[gtid];
4844 kmp_task_t *next_task;
4845 size_t lower_offset =
4846 (
char *)lb - (
char *)task;
4847 size_t upper_offset =
4848 (
char *)ub - (
char *)task;
4850 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
4851 (last_chunk < 0 ? last_chunk : extras));
4852 KMP_DEBUG_ASSERT(num_tasks > extras);
4853 KMP_DEBUG_ASSERT(num_tasks > 0);
4856 kmp_uint64 lb1, ub0, tc0, tc1, ext0, ext1;
4857 kmp_int64 last_chunk0 = 0, last_chunk1 = 0;
4858 kmp_uint64 gr_size0 = grainsize;
4859 kmp_uint64 n_tsk0 = num_tasks >> 1;
4860 kmp_uint64 n_tsk1 = num_tasks - n_tsk0;
4861 if (last_chunk < 0) {
4863 last_chunk1 = last_chunk;
4864 tc0 = grainsize * n_tsk0;
4866 }
else if (n_tsk0 <= extras) {
4869 ext1 = extras - n_tsk0;
4870 tc0 = gr_size0 * n_tsk0;
4875 tc1 = grainsize * n_tsk1;
4878 ub0 = lower + st * (tc0 - 1);
4882 next_task = __kmp_task_dup_alloc(thread, task);
4884 *(kmp_uint64 *)((
char *)next_task + lower_offset) = lb1;
4885 if (ptask_dup != NULL)
4886 ptask_dup(next_task, task, 0);
4891 kmp_taskdata_t *current_task = thread->th.th_current_task;
4892 thread->th.th_current_task = taskdata->td_parent;
4893 kmp_task_t *new_task =
4894 __kmpc_omp_task_alloc(loc, gtid, 1, 3 *
sizeof(
void *),
4895 sizeof(__taskloop_params_t), &__kmp_taskloop_task);
4897 thread->th.th_current_task = current_task;
4898 __taskloop_params_t *p = (__taskloop_params_t *)new_task->shareds;
4899 p->task = next_task;
4900 p->lb = (kmp_uint64 *)((
char *)next_task + lower_offset);
4901 p->ub = (kmp_uint64 *)((
char *)next_task + upper_offset);
4902 p->task_dup = task_dup;
4904 p->ub_glob = ub_glob;
4905 p->num_tasks = n_tsk1;
4906 p->grainsize = grainsize;
4908 p->last_chunk = last_chunk1;
4910 p->num_t_min = num_t_min;
4912 p->codeptr_ra = codeptr_ra;
4917 __kmp_omp_taskloop_task(NULL, gtid, new_task, codeptr_ra);
4919 __kmp_omp_task(gtid, new_task,
true);
4923 if (n_tsk0 > num_t_min)
4924 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0, gr_size0,
4925 ext0, last_chunk0, tc0, num_t_min,
4931 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, n_tsk0,
4932 gr_size0, ext0, last_chunk0, tc0,
4938 KA_TRACE(40, (
"__kmp_taskloop_recur(exit): T#%d\n", gtid));
4941static void __kmp_taskloop(
ident_t *loc,
int gtid, kmp_task_t *task,
int if_val,
4942 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
4943 int nogroup,
int sched, kmp_uint64 grainsize,
4944 int modifier,
void *task_dup) {
4945 kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
4946 KMP_DEBUG_ASSERT(task != NULL);
4948#if OMPT_SUPPORT && OMPT_OPTIONAL
4949 OMPT_STORE_RETURN_ADDRESS(gtid);
4951 __kmpc_taskgroup(loc, gtid);
4956 kmp_taskloop_bounds_t task_bounds(task, lb, ub);
4959 kmp_uint64 lower = task_bounds.get_lb();
4960 kmp_uint64 upper = task_bounds.get_ub();
4961 kmp_uint64 ub_glob = upper;
4962 kmp_uint64 num_tasks = 0, extras = 0;
4963 kmp_int64 last_chunk =
4965 kmp_uint64 num_tasks_min = __kmp_taskloop_min_tasks;
4966 kmp_info_t *thread = __kmp_threads[gtid];
4967 kmp_taskdata_t *current_task = thread->th.th_current_task;
4969 KA_TRACE(20, (
"__kmp_taskloop: T#%d, task %p, lb %lld, ub %lld, st %lld, "
4970 "grain %llu(%d, %d), dup %p\n",
4971 gtid, taskdata, lower, upper, st, grainsize, sched, modifier,
4976 tc = upper - lower + 1;
4977 }
else if (st < 0) {
4978 tc = (lower - upper) / (-st) + 1;
4980 tc = (upper - lower) / st + 1;
4983 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d zero-trip loop\n", gtid));
4985 __kmp_task_start(gtid, task, current_task);
4987 __kmp_task_finish<false>(gtid, task, current_task);
4991#if OMPT_SUPPORT && OMPT_OPTIONAL
4992 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
4993 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
4994 if (ompt_enabled.ompt_callback_work) {
4995 ompt_callbacks.ompt_callback(ompt_callback_work)(
4996 ompt_work_taskloop, ompt_scope_begin, &(team_info->parallel_data),
4997 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5001 if (num_tasks_min == 0)
5004 KMP_MIN(thread->th.th_team_nproc * 10, INITIAL_TASK_DEQUE_SIZE);
5010 grainsize = thread->th.th_team_nproc * 10;
5013 if (grainsize > tc) {
5018 num_tasks = grainsize;
5019 grainsize = tc / num_tasks;
5020 extras = tc % num_tasks;
5024 if (grainsize > tc) {
5030 num_tasks = (tc + grainsize - 1) / grainsize;
5031 last_chunk = tc - (num_tasks * grainsize);
5034 num_tasks = tc / grainsize;
5036 grainsize = tc / num_tasks;
5037 extras = tc % num_tasks;
5042 KMP_ASSERT2(0,
"unknown scheduling of taskloop");
5045 KMP_DEBUG_ASSERT(tc == num_tasks * grainsize +
5046 (last_chunk < 0 ? last_chunk : extras));
5047 KMP_DEBUG_ASSERT(num_tasks > extras);
5048 KMP_DEBUG_ASSERT(num_tasks > 0);
5054 taskdata->td_flags.task_serial = 1;
5055 taskdata->td_flags.tiedness = TASK_TIED;
5057 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5058 grainsize, extras, last_chunk, tc,
5060 OMPT_GET_RETURN_ADDRESS(0),
5065 }
else if (num_tasks > num_tasks_min && !taskdata->td_flags.native) {
5066 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go recursive: tc %llu, #tasks %llu"
5067 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5068 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5070 __kmp_taskloop_recur(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5071 grainsize, extras, last_chunk, tc, num_tasks_min,
5073 OMPT_GET_RETURN_ADDRESS(0),
5077 KA_TRACE(20, (
"__kmp_taskloop: T#%d, go linear: tc %llu, #tasks %llu"
5078 "(%lld), grain %llu, extras %llu, last_chunk %lld\n",
5079 gtid, tc, num_tasks, num_tasks_min, grainsize, extras,
5081 __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, ub_glob, num_tasks,
5082 grainsize, extras, last_chunk, tc,
5084 OMPT_GET_RETURN_ADDRESS(0),
5089#if OMPT_SUPPORT && OMPT_OPTIONAL
5090 if (ompt_enabled.ompt_callback_work) {
5091 ompt_callbacks.ompt_callback(ompt_callback_work)(
5092 ompt_work_taskloop, ompt_scope_end, &(team_info->parallel_data),
5093 &(task_info->task_data), tc, OMPT_GET_RETURN_ADDRESS(0));
5098#if OMPT_SUPPORT && OMPT_OPTIONAL
5099 OMPT_STORE_RETURN_ADDRESS(gtid);
5101 __kmpc_end_taskgroup(loc, gtid);
5103 KA_TRACE(20, (
"__kmp_taskloop(exit): T#%d\n", gtid));
5123 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
int nogroup,
5124 int sched, kmp_uint64 grainsize,
void *task_dup) {
5125 __kmp_assert_valid_gtid(gtid);
5126 KA_TRACE(20, (
"__kmpc_taskloop(enter): T#%d\n", gtid));
5127 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5129 KA_TRACE(20, (
"__kmpc_taskloop(exit): T#%d\n", gtid));
5150 kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,
5151 int nogroup,
int sched, kmp_uint64 grainsize,
5152 int modifier,
void *task_dup) {
5153 __kmp_assert_valid_gtid(gtid);
5154 KA_TRACE(20, (
"__kmpc_taskloop_5(enter): T#%d\n", gtid));
5155 __kmp_taskloop(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize,
5156 modifier, task_dup);
5157 KA_TRACE(20, (
"__kmpc_taskloop_5(exit): T#%d\n", gtid));
5169 if (gtid == KMP_GTID_DNE)
5172 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5173 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5178 return &taskdata->td_target_data.async_handle;
5190 if (gtid == KMP_GTID_DNE)
5193 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
5194 kmp_taskdata_t *taskdata = thread->th.th_current_task;
5199 return taskdata->td_task_team != NULL;
struct kmp_taskred_data kmp_taskred_data_t
struct kmp_task_red_input kmp_task_red_input_t
struct kmp_taskred_flags kmp_taskred_flags_t
struct kmp_taskred_input kmp_taskred_input_t
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
void * __kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data)
void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup)
void * __kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
void * __kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, void *data)
bool __kmpc_omp_has_task_team(kmp_int32 gtid)
void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask)
void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws)
kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list)
void __kmpc_taskloop_5(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, int modifier, void *task_dup)
void * __kmpc_task_reduction_init(int gtid, int num, void *data)
void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask)
void * __kmpc_taskred_init(int gtid, int num, void *data)
void ** __kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid)
kmp_taskred_flags_t flags